mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2024-10-01 01:06:10 -04:00
server: improve correctness of request parsing and responses (#2929)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
1aae4ffe0a
commit
39005288c5
@ -317,9 +317,9 @@ jobs:
|
|||||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||||
packages=(
|
packages=(
|
||||||
bison build-essential ccache cuda-compiler-11-8 flex gperf libcublas-dev-11-8 libfontconfig1 libfreetype6
|
bison build-essential ccache cuda-compiler-11-8 flex g++-12 gperf libcublas-dev-11-8 libfontconfig1
|
||||||
libgl1-mesa-dev libmysqlclient21 libnvidia-compute-550-server libodbc2 libpq5 libwayland-dev libx11-6
|
libfreetype6 libgl1-mesa-dev libmysqlclient21 libnvidia-compute-550-server libodbc2 libpq5 libwayland-dev
|
||||||
libx11-xcb1 libxcb-cursor0 libxcb-glx0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0
|
libx11-6 libx11-xcb1 libxcb-cursor0 libxcb-glx0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0
|
||||||
libxcb-render-util0 libxcb-shape0 libxcb-shm0 libxcb-sync1 libxcb-util1 libxcb-xfixes0 libxcb-xinerama0
|
libxcb-render-util0 libxcb-shape0 libxcb-shm0 libxcb-sync1 libxcb-util1 libxcb-xfixes0 libxcb-xinerama0
|
||||||
libxcb-xkb1 libxcb1 libxext6 libxfixes3 libxi6 libxkbcommon-x11-0 libxkbcommon0 libxrender1 patchelf
|
libxcb-xkb1 libxcb1 libxext6 libxfixes3 libxi6 libxkbcommon-x11-0 libxkbcommon0 libxrender1 patchelf
|
||||||
python3 vulkan-sdk
|
python3 vulkan-sdk
|
||||||
@ -352,6 +352,8 @@ jobs:
|
|||||||
~/Qt/Tools/CMake/bin/cmake \
|
~/Qt/Tools/CMake/bin/cmake \
|
||||||
-S ../gpt4all-chat -B . \
|
-S ../gpt4all-chat -B . \
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DCMAKE_C_COMPILER=gcc-12 \
|
||||||
|
-DCMAKE_CXX_COMPILER=g++-12 \
|
||||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||||
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
|
||||||
@ -391,9 +393,9 @@ jobs:
|
|||||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||||
packages=(
|
packages=(
|
||||||
bison build-essential ccache cuda-compiler-11-8 flex gperf libcublas-dev-11-8 libfontconfig1 libfreetype6
|
bison build-essential ccache cuda-compiler-11-8 flex g++-12 gperf libcublas-dev-11-8 libfontconfig1
|
||||||
libgl1-mesa-dev libmysqlclient21 libnvidia-compute-550-server libodbc2 libpq5 libwayland-dev libx11-6
|
libfreetype6 libgl1-mesa-dev libmysqlclient21 libnvidia-compute-550-server libodbc2 libpq5 libwayland-dev
|
||||||
libx11-xcb1 libxcb-cursor0 libxcb-glx0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0
|
libx11-6 libx11-xcb1 libxcb-cursor0 libxcb-glx0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0
|
||||||
libxcb-render-util0 libxcb-shape0 libxcb-shm0 libxcb-sync1 libxcb-util1 libxcb-xfixes0 libxcb-xinerama0
|
libxcb-render-util0 libxcb-shape0 libxcb-shm0 libxcb-sync1 libxcb-util1 libxcb-xfixes0 libxcb-xinerama0
|
||||||
libxcb-xkb1 libxcb1 libxext6 libxfixes3 libxi6 libxkbcommon-x11-0 libxkbcommon0 libxrender1 patchelf
|
libxcb-xkb1 libxcb1 libxext6 libxfixes3 libxi6 libxkbcommon-x11-0 libxkbcommon0 libxrender1 patchelf
|
||||||
python3 vulkan-sdk
|
python3 vulkan-sdk
|
||||||
@ -426,6 +428,8 @@ jobs:
|
|||||||
~/Qt/Tools/CMake/bin/cmake \
|
~/Qt/Tools/CMake/bin/cmake \
|
||||||
-S ../gpt4all-chat -B . \
|
-S ../gpt4all-chat -B . \
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DCMAKE_C_COMPILER=gcc-12 \
|
||||||
|
-DCMAKE_CXX_COMPILER=g++-12 \
|
||||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||||
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
|
||||||
@ -447,7 +451,7 @@ jobs:
|
|||||||
|
|
||||||
build-offline-chat-installer-windows:
|
build-offline-chat-installer-windows:
|
||||||
machine:
|
machine:
|
||||||
image: 'windows-server-2019-vs2019:2022.08.1'
|
image: windows-server-2022-gui:current
|
||||||
resource_class: windows.large
|
resource_class: windows.large
|
||||||
shell: powershell.exe -ExecutionPolicy Bypass
|
shell: powershell.exe -ExecutionPolicy Bypass
|
||||||
steps:
|
steps:
|
||||||
@ -538,7 +542,7 @@ jobs:
|
|||||||
|
|
||||||
sign-offline-chat-installer-windows:
|
sign-offline-chat-installer-windows:
|
||||||
machine:
|
machine:
|
||||||
image: 'windows-server-2019-vs2019:2022.08.1'
|
image: windows-server-2022-gui:current
|
||||||
resource_class: windows.large
|
resource_class: windows.large
|
||||||
shell: powershell.exe -ExecutionPolicy Bypass
|
shell: powershell.exe -ExecutionPolicy Bypass
|
||||||
steps:
|
steps:
|
||||||
@ -568,7 +572,7 @@ jobs:
|
|||||||
|
|
||||||
build-online-chat-installer-windows:
|
build-online-chat-installer-windows:
|
||||||
machine:
|
machine:
|
||||||
image: 'windows-server-2019-vs2019:2022.08.1'
|
image: windows-server-2022-gui:current
|
||||||
resource_class: windows.large
|
resource_class: windows.large
|
||||||
shell: powershell.exe -ExecutionPolicy Bypass
|
shell: powershell.exe -ExecutionPolicy Bypass
|
||||||
steps:
|
steps:
|
||||||
@ -666,7 +670,7 @@ jobs:
|
|||||||
|
|
||||||
sign-online-chat-installer-windows:
|
sign-online-chat-installer-windows:
|
||||||
machine:
|
machine:
|
||||||
image: 'windows-server-2019-vs2019:2022.08.1'
|
image: windows-server-2022-gui:current
|
||||||
resource_class: windows.large
|
resource_class: windows.large
|
||||||
shell: powershell.exe -ExecutionPolicy Bypass
|
shell: powershell.exe -ExecutionPolicy Bypass
|
||||||
steps:
|
steps:
|
||||||
@ -720,9 +724,9 @@ jobs:
|
|||||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||||
packages=(
|
packages=(
|
||||||
bison build-essential ccache cuda-compiler-11-8 flex gperf libcublas-dev-11-8 libfontconfig1 libfreetype6
|
bison build-essential ccache cuda-compiler-11-8 flex g++-12 gperf libcublas-dev-11-8 libfontconfig1
|
||||||
libgl1-mesa-dev libmysqlclient21 libnvidia-compute-550-server libodbc2 libpq5 libwayland-dev libx11-6
|
libfreetype6 libgl1-mesa-dev libmysqlclient21 libnvidia-compute-550-server libodbc2 libpq5 libwayland-dev
|
||||||
libx11-xcb1 libxcb-cursor0 libxcb-glx0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0
|
libx11-6 libx11-xcb1 libxcb-cursor0 libxcb-glx0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0
|
||||||
libxcb-render-util0 libxcb-shape0 libxcb-shm0 libxcb-sync1 libxcb-util1 libxcb-xfixes0 libxcb-xinerama0
|
libxcb-render-util0 libxcb-shape0 libxcb-shm0 libxcb-sync1 libxcb-util1 libxcb-xfixes0 libxcb-xinerama0
|
||||||
libxcb-xkb1 libxcb1 libxext6 libxfixes3 libxi6 libxkbcommon-x11-0 libxkbcommon0 libxrender1 python3
|
libxcb-xkb1 libxcb1 libxext6 libxfixes3 libxi6 libxkbcommon-x11-0 libxkbcommon0 libxrender1 python3
|
||||||
vulkan-sdk
|
vulkan-sdk
|
||||||
@ -744,6 +748,8 @@ jobs:
|
|||||||
~/Qt/Tools/CMake/bin/cmake \
|
~/Qt/Tools/CMake/bin/cmake \
|
||||||
-S gpt4all-chat -B build \
|
-S gpt4all-chat -B build \
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DCMAKE_C_COMPILER=gcc-12 \
|
||||||
|
-DCMAKE_CXX_COMPILER=g++-12 \
|
||||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||||
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
|
||||||
@ -758,7 +764,7 @@ jobs:
|
|||||||
|
|
||||||
build-gpt4all-chat-windows:
|
build-gpt4all-chat-windows:
|
||||||
machine:
|
machine:
|
||||||
image: 'windows-server-2019-vs2019:2022.08.1'
|
image: windows-server-2022-gui:current
|
||||||
resource_class: windows.large
|
resource_class: windows.large
|
||||||
shell: powershell.exe -ExecutionPolicy Bypass
|
shell: powershell.exe -ExecutionPolicy Bypass
|
||||||
steps:
|
steps:
|
||||||
@ -864,8 +870,8 @@ jobs:
|
|||||||
paths:
|
paths:
|
||||||
- ../.ccache
|
- ../.ccache
|
||||||
|
|
||||||
build-ts-docs:
|
build-ts-docs:
|
||||||
docker:
|
docker:
|
||||||
- image: cimg/base:stable
|
- image: cimg/base:stable
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
@ -887,7 +893,7 @@ jobs:
|
|||||||
docker:
|
docker:
|
||||||
- image: circleci/python:3.8
|
- image: circleci/python:3.8
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
- run:
|
- run:
|
||||||
name: Install dependencies
|
name: Install dependencies
|
||||||
command: |
|
command: |
|
||||||
@ -928,7 +934,8 @@ jobs:
|
|||||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||||
packages=(
|
packages=(
|
||||||
build-essential ccache cmake cuda-compiler-11-8 libcublas-dev-11-8 libnvidia-compute-550-server vulkan-sdk
|
build-essential ccache cmake cuda-compiler-11-8 g++-12 libcublas-dev-11-8 libnvidia-compute-550-server
|
||||||
|
vulkan-sdk
|
||||||
)
|
)
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y "${packages[@]}"
|
sudo apt-get install -y "${packages[@]}"
|
||||||
@ -942,6 +949,8 @@ jobs:
|
|||||||
cd gpt4all-backend
|
cd gpt4all-backend
|
||||||
cmake -B build \
|
cmake -B build \
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DCMAKE_C_COMPILER=gcc-12 \
|
||||||
|
-DCMAKE_CXX_COMPILER=g++-12 \
|
||||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||||
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
|
||||||
@ -1014,7 +1023,7 @@ jobs:
|
|||||||
|
|
||||||
build-py-windows:
|
build-py-windows:
|
||||||
machine:
|
machine:
|
||||||
image: 'windows-server-2019-vs2019:2022.08.1'
|
image: windows-server-2022-gui:current
|
||||||
resource_class: windows.large
|
resource_class: windows.large
|
||||||
shell: powershell.exe -ExecutionPolicy Bypass
|
shell: powershell.exe -ExecutionPolicy Bypass
|
||||||
steps:
|
steps:
|
||||||
@ -1118,11 +1127,12 @@ jobs:
|
|||||||
name: Install dependencies
|
name: Install dependencies
|
||||||
command: |
|
command: |
|
||||||
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
|
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
|
||||||
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
|
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
|
||||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||||
packages=(
|
packages=(
|
||||||
build-essential ccache cmake cuda-compiler-11-8 libcublas-dev-11-8 libnvidia-compute-550-server vulkan-sdk
|
build-essential ccache cmake cuda-compiler-11-8 g++-12 libcublas-dev-11-8 libnvidia-compute-550-server
|
||||||
|
vulkan-sdk
|
||||||
)
|
)
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y "${packages[@]}"
|
sudo apt-get install -y "${packages[@]}"
|
||||||
@ -1135,6 +1145,9 @@ jobs:
|
|||||||
mkdir -p runtimes/build
|
mkdir -p runtimes/build
|
||||||
cd runtimes/build
|
cd runtimes/build
|
||||||
cmake ../.. \
|
cmake ../.. \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DCMAKE_C_COMPILER=gcc-12 \
|
||||||
|
-DCMAKE_C_COMPILER=g++-12 \
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||||
@ -1204,7 +1217,7 @@ jobs:
|
|||||||
|
|
||||||
build-bindings-backend-windows:
|
build-bindings-backend-windows:
|
||||||
machine:
|
machine:
|
||||||
image: 'windows-server-2022-gui:2023.03.1'
|
image: windows-server-2022-gui:current
|
||||||
resource_class: windows.large
|
resource_class: windows.large
|
||||||
shell: powershell.exe -ExecutionPolicy Bypass
|
shell: powershell.exe -ExecutionPolicy Bypass
|
||||||
steps:
|
steps:
|
||||||
@ -1230,7 +1243,7 @@ jobs:
|
|||||||
- run:
|
- run:
|
||||||
name: Install dependencies
|
name: Install dependencies
|
||||||
command: |
|
command: |
|
||||||
choco install -y ccache cmake ninja --installargs 'ADD_CMAKE_TO_PATH=System'
|
choco install -y ccache cmake ninja --installargs 'ADD_CMAKE_TO_PATH=System'
|
||||||
- run:
|
- run:
|
||||||
name: Build Libraries
|
name: Build Libraries
|
||||||
command: |
|
command: |
|
||||||
@ -1263,8 +1276,8 @@ jobs:
|
|||||||
paths:
|
paths:
|
||||||
- runtimes/win-x64_msvc/*.dll
|
- runtimes/win-x64_msvc/*.dll
|
||||||
|
|
||||||
build-nodejs-linux:
|
build-nodejs-linux:
|
||||||
docker:
|
docker:
|
||||||
- image: cimg/base:stable
|
- image: cimg/base:stable
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
@ -1280,10 +1293,10 @@ jobs:
|
|||||||
pkg-manager: yarn
|
pkg-manager: yarn
|
||||||
override-ci-command: yarn install
|
override-ci-command: yarn install
|
||||||
- run:
|
- run:
|
||||||
command: |
|
command: |
|
||||||
cd gpt4all-bindings/typescript
|
cd gpt4all-bindings/typescript
|
||||||
yarn prebuildify -t 18.16.0 --napi
|
yarn prebuildify -t 18.16.0 --napi
|
||||||
- run:
|
- run:
|
||||||
command: |
|
command: |
|
||||||
mkdir -p gpt4all-backend/prebuilds/linux-x64
|
mkdir -p gpt4all-backend/prebuilds/linux-x64
|
||||||
mkdir -p gpt4all-backend/runtimes/linux-x64
|
mkdir -p gpt4all-backend/runtimes/linux-x64
|
||||||
@ -1292,10 +1305,10 @@ jobs:
|
|||||||
- persist_to_workspace:
|
- persist_to_workspace:
|
||||||
root: gpt4all-backend
|
root: gpt4all-backend
|
||||||
paths:
|
paths:
|
||||||
- prebuilds/linux-x64/*.node
|
- prebuilds/linux-x64/*.node
|
||||||
- runtimes/linux-x64/*-*.so
|
- runtimes/linux-x64/*-*.so
|
||||||
|
|
||||||
build-nodejs-macos:
|
build-nodejs-macos:
|
||||||
macos:
|
macos:
|
||||||
xcode: 15.4.0
|
xcode: 15.4.0
|
||||||
steps:
|
steps:
|
||||||
@ -1312,12 +1325,12 @@ jobs:
|
|||||||
pkg-manager: yarn
|
pkg-manager: yarn
|
||||||
override-ci-command: yarn install
|
override-ci-command: yarn install
|
||||||
- run:
|
- run:
|
||||||
command: |
|
command: |
|
||||||
cd gpt4all-bindings/typescript
|
cd gpt4all-bindings/typescript
|
||||||
yarn prebuildify -t 18.16.0 --napi
|
yarn prebuildify -t 18.16.0 --napi
|
||||||
- run:
|
- run:
|
||||||
name: "Persisting all necessary things to workspace"
|
name: "Persisting all necessary things to workspace"
|
||||||
command: |
|
command: |
|
||||||
mkdir -p gpt4all-backend/prebuilds/darwin-x64
|
mkdir -p gpt4all-backend/prebuilds/darwin-x64
|
||||||
mkdir -p gpt4all-backend/runtimes/darwin
|
mkdir -p gpt4all-backend/runtimes/darwin
|
||||||
cp /tmp/gpt4all-backend/runtimes/osx-x64/*-*.* gpt4all-backend/runtimes/darwin
|
cp /tmp/gpt4all-backend/runtimes/osx-x64/*-*.* gpt4all-backend/runtimes/darwin
|
||||||
@ -1328,7 +1341,7 @@ jobs:
|
|||||||
- prebuilds/darwin-x64/*.node
|
- prebuilds/darwin-x64/*.node
|
||||||
- runtimes/darwin/*-*.*
|
- runtimes/darwin/*-*.*
|
||||||
|
|
||||||
build-nodejs-windows:
|
build-nodejs-windows:
|
||||||
executor:
|
executor:
|
||||||
name: win/default
|
name: win/default
|
||||||
size: large
|
size: large
|
||||||
@ -1342,29 +1355,29 @@ jobs:
|
|||||||
command: wget https://nodejs.org/dist/v18.16.0/node-v18.16.0-x86.msi -P C:\Users\circleci\Downloads\
|
command: wget https://nodejs.org/dist/v18.16.0/node-v18.16.0-x86.msi -P C:\Users\circleci\Downloads\
|
||||||
shell: cmd.exe
|
shell: cmd.exe
|
||||||
- run: MsiExec.exe /i C:\Users\circleci\Downloads\node-v18.16.0-x86.msi /qn
|
- run: MsiExec.exe /i C:\Users\circleci\Downloads\node-v18.16.0-x86.msi /qn
|
||||||
- run:
|
- run:
|
||||||
command: |
|
command: |
|
||||||
Start-Process powershell -verb runAs -Args "-start GeneralProfile"
|
Start-Process powershell -verb runAs -Args "-start GeneralProfile"
|
||||||
nvm install 18.16.0
|
nvm install 18.16.0
|
||||||
nvm use 18.16.0
|
nvm use 18.16.0
|
||||||
- run: node --version
|
- run: node --version
|
||||||
- run: corepack enable
|
- run: corepack enable
|
||||||
- run:
|
- run:
|
||||||
command: |
|
command: |
|
||||||
npm install -g yarn
|
npm install -g yarn
|
||||||
cd gpt4all-bindings/typescript
|
cd gpt4all-bindings/typescript
|
||||||
yarn install
|
yarn install
|
||||||
- run:
|
- run:
|
||||||
command: |
|
command: |
|
||||||
cd gpt4all-bindings/typescript
|
cd gpt4all-bindings/typescript
|
||||||
yarn prebuildify -t 18.16.0 --napi
|
yarn prebuildify -t 18.16.0 --napi
|
||||||
- run:
|
- run:
|
||||||
command: |
|
command: |
|
||||||
mkdir -p gpt4all-backend/prebuilds/win32-x64
|
mkdir -p gpt4all-backend/prebuilds/win32-x64
|
||||||
mkdir -p gpt4all-backend/runtimes/win32-x64
|
mkdir -p gpt4all-backend/runtimes/win32-x64
|
||||||
cp /tmp/gpt4all-backend/runtimes/win-x64_msvc/*-*.dll gpt4all-backend/runtimes/win32-x64
|
cp /tmp/gpt4all-backend/runtimes/win-x64_msvc/*-*.dll gpt4all-backend/runtimes/win32-x64
|
||||||
cp gpt4all-bindings/typescript/prebuilds/win32-x64/*.node gpt4all-backend/prebuilds/win32-x64
|
cp gpt4all-bindings/typescript/prebuilds/win32-x64/*.node gpt4all-backend/prebuilds/win32-x64
|
||||||
|
|
||||||
- persist_to_workspace:
|
- persist_to_workspace:
|
||||||
root: gpt4all-backend
|
root: gpt4all-backend
|
||||||
paths:
|
paths:
|
||||||
@ -1372,7 +1385,7 @@ jobs:
|
|||||||
- runtimes/win32-x64/*-*.dll
|
- runtimes/win32-x64/*-*.dll
|
||||||
|
|
||||||
prepare-npm-pkg:
|
prepare-npm-pkg:
|
||||||
docker:
|
docker:
|
||||||
- image: cimg/base:stable
|
- image: cimg/base:stable
|
||||||
steps:
|
steps:
|
||||||
- attach_workspace:
|
- attach_workspace:
|
||||||
@ -1383,19 +1396,19 @@ jobs:
|
|||||||
node-version: "18.16"
|
node-version: "18.16"
|
||||||
- run: node --version
|
- run: node --version
|
||||||
- run: corepack enable
|
- run: corepack enable
|
||||||
- run:
|
- run:
|
||||||
command: |
|
command: |
|
||||||
cd gpt4all-bindings/typescript
|
cd gpt4all-bindings/typescript
|
||||||
# excluding llmodel. nodejs bindings dont need llmodel.dll
|
# excluding llmodel. nodejs bindings dont need llmodel.dll
|
||||||
mkdir -p runtimes/win32-x64/native
|
mkdir -p runtimes/win32-x64/native
|
||||||
mkdir -p prebuilds/win32-x64/
|
mkdir -p prebuilds/win32-x64/
|
||||||
cp /tmp/gpt4all-backend/runtimes/win-x64_msvc/*-*.dll runtimes/win32-x64/native/
|
cp /tmp/gpt4all-backend/runtimes/win-x64_msvc/*-*.dll runtimes/win32-x64/native/
|
||||||
cp /tmp/gpt4all-backend/prebuilds/win32-x64/*.node prebuilds/win32-x64/
|
cp /tmp/gpt4all-backend/prebuilds/win32-x64/*.node prebuilds/win32-x64/
|
||||||
|
|
||||||
mkdir -p runtimes/linux-x64/native
|
mkdir -p runtimes/linux-x64/native
|
||||||
mkdir -p prebuilds/linux-x64/
|
mkdir -p prebuilds/linux-x64/
|
||||||
cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/
|
cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/
|
||||||
cp /tmp/gpt4all-backend/prebuilds/linux-x64/*.node prebuilds/linux-x64/
|
cp /tmp/gpt4all-backend/prebuilds/linux-x64/*.node prebuilds/linux-x64/
|
||||||
|
|
||||||
# darwin has univeral runtime libraries
|
# darwin has univeral runtime libraries
|
||||||
mkdir -p runtimes/darwin/native
|
mkdir -p runtimes/darwin/native
|
||||||
@ -1403,22 +1416,22 @@ jobs:
|
|||||||
|
|
||||||
cp /tmp/gpt4all-backend/runtimes/darwin/*-*.* runtimes/darwin/native/
|
cp /tmp/gpt4all-backend/runtimes/darwin/*-*.* runtimes/darwin/native/
|
||||||
|
|
||||||
cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/
|
cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/
|
||||||
|
|
||||||
# Fallback build if user is not on above prebuilds
|
# Fallback build if user is not on above prebuilds
|
||||||
mv -f binding.ci.gyp binding.gyp
|
mv -f binding.ci.gyp binding.gyp
|
||||||
|
|
||||||
mkdir gpt4all-backend
|
mkdir gpt4all-backend
|
||||||
cd ../../gpt4all-backend
|
cd ../../gpt4all-backend
|
||||||
mv llmodel.h llmodel.cpp llmodel_c.cpp llmodel_c.h sysinfo.h dlhandle.h ../gpt4all-bindings/typescript/gpt4all-backend/
|
mv llmodel.h llmodel.cpp llmodel_c.cpp llmodel_c.h sysinfo.h dlhandle.h ../gpt4all-bindings/typescript/gpt4all-backend/
|
||||||
|
|
||||||
# Test install
|
# Test install
|
||||||
- node/install-packages:
|
- node/install-packages:
|
||||||
app-dir: gpt4all-bindings/typescript
|
app-dir: gpt4all-bindings/typescript
|
||||||
pkg-manager: yarn
|
pkg-manager: yarn
|
||||||
override-ci-command: yarn install
|
override-ci-command: yarn install
|
||||||
- run:
|
- run:
|
||||||
command: |
|
command: |
|
||||||
cd gpt4all-bindings/typescript
|
cd gpt4all-bindings/typescript
|
||||||
yarn run test
|
yarn run test
|
||||||
- run:
|
- run:
|
||||||
@ -1552,7 +1565,7 @@ workflows:
|
|||||||
- build-py-linux
|
- build-py-linux
|
||||||
- build-py-macos
|
- build-py-macos
|
||||||
build-bindings:
|
build-bindings:
|
||||||
when:
|
when:
|
||||||
or:
|
or:
|
||||||
- << pipeline.parameters.run-all-workflows >>
|
- << pipeline.parameters.run-all-workflows >>
|
||||||
- << pipeline.parameters.run-python-workflow >>
|
- << pipeline.parameters.run-python-workflow >>
|
||||||
@ -1585,8 +1598,8 @@ workflows:
|
|||||||
requires:
|
requires:
|
||||||
- hold
|
- hold
|
||||||
|
|
||||||
# NodeJs Jobs
|
# NodeJs Jobs
|
||||||
- prepare-npm-pkg:
|
- prepare-npm-pkg:
|
||||||
filters:
|
filters:
|
||||||
branches:
|
branches:
|
||||||
only:
|
only:
|
||||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -8,3 +8,6 @@
|
|||||||
[submodule "gpt4all-chat/deps/SingleApplication"]
|
[submodule "gpt4all-chat/deps/SingleApplication"]
|
||||||
path = gpt4all-chat/deps/SingleApplication
|
path = gpt4all-chat/deps/SingleApplication
|
||||||
url = https://github.com/nomic-ai/SingleApplication.git
|
url = https://github.com/nomic-ai/SingleApplication.git
|
||||||
|
[submodule "gpt4all-chat/deps/fmt"]
|
||||||
|
path = gpt4all-chat/deps/fmt
|
||||||
|
url = https://github.com/fmtlib/fmt.git
|
||||||
|
@ -33,7 +33,7 @@ set(LLMODEL_VERSION_PATCH 0)
|
|||||||
set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
|
set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
|
||||||
project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)
|
project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)
|
||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 20)
|
set(CMAKE_CXX_STANDARD 23)
|
||||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
|
||||||
set(BUILD_SHARED_LIBS ON)
|
set(BUILD_SHARED_LIBS ON)
|
||||||
|
@ -1 +1 @@
|
|||||||
Subproject commit 443665aec4721ecf57df8162e7e093a0cd674a76
|
Subproject commit ced74fbad4b258507f3ec06e77eec9445583511a
|
@ -162,7 +162,7 @@ public:
|
|||||||
bool allowContextShift,
|
bool allowContextShift,
|
||||||
PromptContext &ctx,
|
PromptContext &ctx,
|
||||||
bool special = false,
|
bool special = false,
|
||||||
std::string *fakeReply = nullptr);
|
std::optional<std::string_view> fakeReply = {});
|
||||||
|
|
||||||
using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend);
|
using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend);
|
||||||
|
|
||||||
@ -212,7 +212,7 @@ public:
|
|||||||
protected:
|
protected:
|
||||||
// These are pure virtual because subclasses need to implement as the default implementation of
|
// These are pure virtual because subclasses need to implement as the default implementation of
|
||||||
// 'prompt' above calls these functions
|
// 'prompt' above calls these functions
|
||||||
virtual std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special = false) = 0;
|
virtual std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special = false) = 0;
|
||||||
virtual bool isSpecialToken(Token id) const = 0;
|
virtual bool isSpecialToken(Token id) const = 0;
|
||||||
virtual std::string tokenToString(Token id) const = 0;
|
virtual std::string tokenToString(Token id) const = 0;
|
||||||
virtual Token sampleToken(PromptContext &ctx) const = 0;
|
virtual Token sampleToken(PromptContext &ctx) const = 0;
|
||||||
@ -249,7 +249,8 @@ protected:
|
|||||||
std::function<bool(int32_t, const std::string&)> responseCallback,
|
std::function<bool(int32_t, const std::string&)> responseCallback,
|
||||||
bool allowContextShift,
|
bool allowContextShift,
|
||||||
PromptContext &promptCtx,
|
PromptContext &promptCtx,
|
||||||
std::vector<Token> embd_inp);
|
std::vector<Token> embd_inp,
|
||||||
|
bool isResponse = false);
|
||||||
void generateResponse(std::function<bool(int32_t, const std::string&)> responseCallback,
|
void generateResponse(std::function<bool(int32_t, const std::string&)> responseCallback,
|
||||||
bool allowContextShift,
|
bool allowContextShift,
|
||||||
PromptContext &promptCtx);
|
PromptContext &promptCtx);
|
||||||
|
@ -536,13 +536,13 @@ size_t LLamaModel::restoreState(const uint8_t *src)
|
|||||||
return llama_set_state_data(d_ptr->ctx, const_cast<uint8_t*>(src));
|
return llama_set_state_data(d_ptr->ctx, const_cast<uint8_t*>(src));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<LLModel::Token> LLamaModel::tokenize(PromptContext &ctx, const std::string &str, bool special)
|
std::vector<LLModel::Token> LLamaModel::tokenize(PromptContext &ctx, std::string_view str, bool special)
|
||||||
{
|
{
|
||||||
bool atStart = m_tokenize_last_token == -1;
|
bool atStart = m_tokenize_last_token == -1;
|
||||||
bool insertSpace = atStart || isSpecialToken(m_tokenize_last_token);
|
bool insertSpace = atStart || isSpecialToken(m_tokenize_last_token);
|
||||||
std::vector<LLModel::Token> fres(str.length() + 4);
|
std::vector<LLModel::Token> fres(str.length() + 4);
|
||||||
int32_t fres_len = llama_tokenize_gpt4all(
|
int32_t fres_len = llama_tokenize_gpt4all(
|
||||||
d_ptr->model, str.c_str(), str.length(), fres.data(), fres.size(), /*add_special*/ atStart,
|
d_ptr->model, str.data(), str.length(), fres.data(), fres.size(), /*add_special*/ atStart,
|
||||||
/*parse_special*/ special, /*insert_space*/ insertSpace
|
/*parse_special*/ special, /*insert_space*/ insertSpace
|
||||||
);
|
);
|
||||||
fres.resize(fres_len);
|
fres.resize(fres_len);
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
struct LLamaPrivate;
|
struct LLamaPrivate;
|
||||||
@ -52,7 +53,7 @@ private:
|
|||||||
bool m_supportsCompletion = false;
|
bool m_supportsCompletion = false;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) override;
|
std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special) override;
|
||||||
bool isSpecialToken(Token id) const override;
|
bool isSpecialToken(Token id) const override;
|
||||||
std::string tokenToString(Token id) const override;
|
std::string tokenToString(Token id) const override;
|
||||||
Token sampleToken(PromptContext &ctx) const override;
|
Token sampleToken(PromptContext &ctx) const override;
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
struct LLModelWrapper {
|
struct LLModelWrapper {
|
||||||
@ -130,13 +131,10 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
|
|||||||
wrapper->promptContext.repeat_last_n = ctx->repeat_last_n;
|
wrapper->promptContext.repeat_last_n = ctx->repeat_last_n;
|
||||||
wrapper->promptContext.contextErase = ctx->context_erase;
|
wrapper->promptContext.contextErase = ctx->context_erase;
|
||||||
|
|
||||||
std::string fake_reply_str;
|
|
||||||
if (fake_reply) { fake_reply_str = fake_reply; }
|
|
||||||
auto *fake_reply_p = fake_reply ? &fake_reply_str : nullptr;
|
|
||||||
|
|
||||||
// Call the C++ prompt method
|
// Call the C++ prompt method
|
||||||
wrapper->llModel->prompt(prompt, prompt_template, prompt_callback, response_func, allow_context_shift,
|
wrapper->llModel->prompt(prompt, prompt_template, prompt_callback, response_func, allow_context_shift,
|
||||||
wrapper->promptContext, special, fake_reply_p);
|
wrapper->promptContext, special,
|
||||||
|
fake_reply ? std::make_optional<std::string_view>(fake_reply) : std::nullopt);
|
||||||
|
|
||||||
// Update the C context by giving access to the wrappers raw pointers to std::vector data
|
// Update the C context by giving access to the wrappers raw pointers to std::vector data
|
||||||
// which involves no copies
|
// which involves no copies
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace ranges = std::ranges;
|
namespace ranges = std::ranges;
|
||||||
@ -45,7 +46,7 @@ void LLModel::prompt(const std::string &prompt,
|
|||||||
bool allowContextShift,
|
bool allowContextShift,
|
||||||
PromptContext &promptCtx,
|
PromptContext &promptCtx,
|
||||||
bool special,
|
bool special,
|
||||||
std::string *fakeReply)
|
std::optional<std::string_view> fakeReply)
|
||||||
{
|
{
|
||||||
if (!isModelLoaded()) {
|
if (!isModelLoaded()) {
|
||||||
std::cerr << implementation().modelType() << " ERROR: prompt won't work with an unloaded model!\n";
|
std::cerr << implementation().modelType() << " ERROR: prompt won't work with an unloaded model!\n";
|
||||||
@ -129,11 +130,11 @@ void LLModel::prompt(const std::string &prompt,
|
|||||||
return; // error
|
return; // error
|
||||||
|
|
||||||
// decode the assistant's reply, either generated or spoofed
|
// decode the assistant's reply, either generated or spoofed
|
||||||
if (fakeReply == nullptr) {
|
if (!fakeReply) {
|
||||||
generateResponse(responseCallback, allowContextShift, promptCtx);
|
generateResponse(responseCallback, allowContextShift, promptCtx);
|
||||||
} else {
|
} else {
|
||||||
embd_inp = tokenize(promptCtx, *fakeReply, false);
|
embd_inp = tokenize(promptCtx, *fakeReply, false);
|
||||||
if (!decodePrompt(promptCallback, responseCallback, allowContextShift, promptCtx, embd_inp))
|
if (!decodePrompt(promptCallback, responseCallback, allowContextShift, promptCtx, embd_inp, true))
|
||||||
return; // error
|
return; // error
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -157,7 +158,8 @@ bool LLModel::decodePrompt(std::function<bool(int32_t)> promptCallback,
|
|||||||
std::function<bool(int32_t, const std::string&)> responseCallback,
|
std::function<bool(int32_t, const std::string&)> responseCallback,
|
||||||
bool allowContextShift,
|
bool allowContextShift,
|
||||||
PromptContext &promptCtx,
|
PromptContext &promptCtx,
|
||||||
std::vector<Token> embd_inp) {
|
std::vector<Token> embd_inp,
|
||||||
|
bool isResponse) {
|
||||||
if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
|
if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
|
||||||
responseCallback(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed.");
|
responseCallback(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed.");
|
||||||
std::cerr << implementation().modelType() << " ERROR: The prompt is " << embd_inp.size() <<
|
std::cerr << implementation().modelType() << " ERROR: The prompt is " << embd_inp.size() <<
|
||||||
@ -196,7 +198,9 @@ bool LLModel::decodePrompt(std::function<bool(int32_t)> promptCallback,
|
|||||||
for (size_t t = 0; t < tokens; ++t) {
|
for (size_t t = 0; t < tokens; ++t) {
|
||||||
promptCtx.tokens.push_back(batch.at(t));
|
promptCtx.tokens.push_back(batch.at(t));
|
||||||
promptCtx.n_past += 1;
|
promptCtx.n_past += 1;
|
||||||
if (!promptCallback(batch.at(t)))
|
Token tok = batch.at(t);
|
||||||
|
bool res = isResponse ? responseCallback(tok, tokenToString(tok)) : promptCallback(tok);
|
||||||
|
if (!res)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
i = batch_end;
|
i = batch_end;
|
||||||
|
@ -26,6 +26,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||||||
- Fix a typo in Model Settings (by [@3Simplex](https://github.com/3Simplex) in [#2916](https://github.com/nomic-ai/gpt4all/pull/2916))
|
- Fix a typo in Model Settings (by [@3Simplex](https://github.com/3Simplex) in [#2916](https://github.com/nomic-ai/gpt4all/pull/2916))
|
||||||
- Fix the antenna icon tooltip when using the local server ([#2922](https://github.com/nomic-ai/gpt4all/pull/2922))
|
- Fix the antenna icon tooltip when using the local server ([#2922](https://github.com/nomic-ai/gpt4all/pull/2922))
|
||||||
- Fix a few issues with locating files and handling errors when loading remote models on startup ([#2875](https://github.com/nomic-ai/gpt4all/pull/2875))
|
- Fix a few issues with locating files and handling errors when loading remote models on startup ([#2875](https://github.com/nomic-ai/gpt4all/pull/2875))
|
||||||
|
- Significantly improve API server request parsing and response correctness ([#2929](https://github.com/nomic-ai/gpt4all/pull/2929))
|
||||||
|
|
||||||
## [3.2.1] - 2024-08-13
|
## [3.2.1] - 2024-08-13
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
cmake_minimum_required(VERSION 3.16)
|
cmake_minimum_required(VERSION 3.16)
|
||||||
|
|
||||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
set(CMAKE_CXX_STANDARD 20)
|
set(CMAKE_CXX_STANDARD 23)
|
||||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
if(APPLE)
|
if(APPLE)
|
||||||
@ -64,6 +64,12 @@ message(STATUS "Qt 6 root directory: ${Qt6_ROOT_DIR}")
|
|||||||
|
|
||||||
set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||||
|
|
||||||
|
set(FMT_INSTALL OFF)
|
||||||
|
set(BUILD_SHARED_LIBS_SAVED "${BUILD_SHARED_LIBS}")
|
||||||
|
set(BUILD_SHARED_LIBS OFF)
|
||||||
|
add_subdirectory(deps/fmt)
|
||||||
|
set(BUILD_SHARED_LIBS "${BUILD_SHARED_LIBS_SAVED}")
|
||||||
|
|
||||||
add_subdirectory(../gpt4all-backend llmodel)
|
add_subdirectory(../gpt4all-backend llmodel)
|
||||||
|
|
||||||
set(CHAT_EXE_RESOURCES)
|
set(CHAT_EXE_RESOURCES)
|
||||||
@ -240,7 +246,7 @@ else()
|
|||||||
PRIVATE Qt6::Quick Qt6::Svg Qt6::HttpServer Qt6::Sql Qt6::Pdf)
|
PRIVATE Qt6::Quick Qt6::Svg Qt6::HttpServer Qt6::Sql Qt6::Pdf)
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(chat
|
target_link_libraries(chat
|
||||||
PRIVATE llmodel SingleApplication)
|
PRIVATE llmodel SingleApplication fmt::fmt)
|
||||||
|
|
||||||
|
|
||||||
# -- install --
|
# -- install --
|
||||||
|
1
gpt4all-chat/deps/fmt
Submodule
1
gpt4all-chat/deps/fmt
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 0c9fce2ffefecfdce794e1859584e25877b7b592
|
@ -239,16 +239,17 @@ void Chat::newPromptResponsePair(const QString &prompt)
|
|||||||
resetResponseState();
|
resetResponseState();
|
||||||
m_chatModel->updateCurrentResponse(m_chatModel->count() - 1, false);
|
m_chatModel->updateCurrentResponse(m_chatModel->count() - 1, false);
|
||||||
m_chatModel->appendPrompt("Prompt: ", prompt);
|
m_chatModel->appendPrompt("Prompt: ", prompt);
|
||||||
m_chatModel->appendResponse("Response: ", prompt);
|
m_chatModel->appendResponse("Response: ", QString());
|
||||||
emit resetResponseRequested();
|
emit resetResponseRequested();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// the server needs to block until response is reset, so it calls resetResponse on its own m_llmThread
|
||||||
void Chat::serverNewPromptResponsePair(const QString &prompt)
|
void Chat::serverNewPromptResponsePair(const QString &prompt)
|
||||||
{
|
{
|
||||||
resetResponseState();
|
resetResponseState();
|
||||||
m_chatModel->updateCurrentResponse(m_chatModel->count() - 1, false);
|
m_chatModel->updateCurrentResponse(m_chatModel->count() - 1, false);
|
||||||
m_chatModel->appendPrompt("Prompt: ", prompt);
|
m_chatModel->appendPrompt("Prompt: ", prompt);
|
||||||
m_chatModel->appendResponse("Response: ", prompt);
|
m_chatModel->appendResponse("Response: ", QString());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Chat::restoringFromText() const
|
bool Chat::restoringFromText() const
|
||||||
|
@ -93,7 +93,7 @@ void ChatAPI::prompt(const std::string &prompt,
|
|||||||
bool allowContextShift,
|
bool allowContextShift,
|
||||||
PromptContext &promptCtx,
|
PromptContext &promptCtx,
|
||||||
bool special,
|
bool special,
|
||||||
std::string *fakeReply) {
|
std::optional<std::string_view> fakeReply) {
|
||||||
|
|
||||||
Q_UNUSED(promptCallback);
|
Q_UNUSED(promptCallback);
|
||||||
Q_UNUSED(allowContextShift);
|
Q_UNUSED(allowContextShift);
|
||||||
@ -121,7 +121,7 @@ void ChatAPI::prompt(const std::string &prompt,
|
|||||||
if (fakeReply) {
|
if (fakeReply) {
|
||||||
promptCtx.n_past += 1;
|
promptCtx.n_past += 1;
|
||||||
m_context.append(formattedPrompt);
|
m_context.append(formattedPrompt);
|
||||||
m_context.append(QString::fromStdString(*fakeReply));
|
m_context.append(QString::fromUtf8(fakeReply->data(), fakeReply->size()));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,9 +12,10 @@
|
|||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <stdexcept>
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <stdexcept>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
class QNetworkAccessManager;
|
class QNetworkAccessManager;
|
||||||
@ -72,7 +73,7 @@ public:
|
|||||||
bool allowContextShift,
|
bool allowContextShift,
|
||||||
PromptContext &ctx,
|
PromptContext &ctx,
|
||||||
bool special,
|
bool special,
|
||||||
std::string *fakeReply) override;
|
std::optional<std::string_view> fakeReply) override;
|
||||||
|
|
||||||
void setThreadCount(int32_t n_threads) override;
|
void setThreadCount(int32_t n_threads) override;
|
||||||
int32_t threadCount() const override;
|
int32_t threadCount() const override;
|
||||||
@ -97,7 +98,7 @@ protected:
|
|||||||
// them as they are only called from the default implementation of 'prompt' which we override and
|
// them as they are only called from the default implementation of 'prompt' which we override and
|
||||||
// completely replace
|
// completely replace
|
||||||
|
|
||||||
std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) override
|
std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special) override
|
||||||
{
|
{
|
||||||
(void)ctx;
|
(void)ctx;
|
||||||
(void)str;
|
(void)str;
|
||||||
|
@ -626,16 +626,16 @@ void ChatLLM::regenerateResponse()
|
|||||||
m_ctx.tokens.erase(m_ctx.tokens.end() - m_promptResponseTokens, m_ctx.tokens.end());
|
m_ctx.tokens.erase(m_ctx.tokens.end() - m_promptResponseTokens, m_ctx.tokens.end());
|
||||||
m_promptResponseTokens = 0;
|
m_promptResponseTokens = 0;
|
||||||
m_promptTokens = 0;
|
m_promptTokens = 0;
|
||||||
m_response = std::string();
|
m_response = m_trimmedResponse = std::string();
|
||||||
emit responseChanged(QString::fromStdString(m_response));
|
emit responseChanged(QString::fromStdString(m_trimmedResponse));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChatLLM::resetResponse()
|
void ChatLLM::resetResponse()
|
||||||
{
|
{
|
||||||
m_promptTokens = 0;
|
m_promptTokens = 0;
|
||||||
m_promptResponseTokens = 0;
|
m_promptResponseTokens = 0;
|
||||||
m_response = std::string();
|
m_response = m_trimmedResponse = std::string();
|
||||||
emit responseChanged(QString::fromStdString(m_response));
|
emit responseChanged(QString::fromStdString(m_trimmedResponse));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChatLLM::resetContext()
|
void ChatLLM::resetContext()
|
||||||
@ -645,9 +645,12 @@ void ChatLLM::resetContext()
|
|||||||
m_ctx = LLModel::PromptContext();
|
m_ctx = LLModel::PromptContext();
|
||||||
}
|
}
|
||||||
|
|
||||||
QString ChatLLM::response() const
|
QString ChatLLM::response(bool trim) const
|
||||||
{
|
{
|
||||||
return QString::fromStdString(remove_leading_whitespace(m_response));
|
std::string resp = m_response;
|
||||||
|
if (trim)
|
||||||
|
resp = remove_leading_whitespace(resp);
|
||||||
|
return QString::fromStdString(resp);
|
||||||
}
|
}
|
||||||
|
|
||||||
ModelInfo ChatLLM::modelInfo() const
|
ModelInfo ChatLLM::modelInfo() const
|
||||||
@ -705,7 +708,8 @@ bool ChatLLM::handleResponse(int32_t token, const std::string &response)
|
|||||||
// check for error
|
// check for error
|
||||||
if (token < 0) {
|
if (token < 0) {
|
||||||
m_response.append(response);
|
m_response.append(response);
|
||||||
emit responseChanged(QString::fromStdString(remove_leading_whitespace(m_response)));
|
m_trimmedResponse = remove_leading_whitespace(m_response);
|
||||||
|
emit responseChanged(QString::fromStdString(m_trimmedResponse));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -715,7 +719,8 @@ bool ChatLLM::handleResponse(int32_t token, const std::string &response)
|
|||||||
m_timer->inc();
|
m_timer->inc();
|
||||||
Q_ASSERT(!response.empty());
|
Q_ASSERT(!response.empty());
|
||||||
m_response.append(response);
|
m_response.append(response);
|
||||||
emit responseChanged(QString::fromStdString(remove_leading_whitespace(m_response)));
|
m_trimmedResponse = remove_leading_whitespace(m_response);
|
||||||
|
emit responseChanged(QString::fromStdString(m_trimmedResponse));
|
||||||
return !m_stopGenerating;
|
return !m_stopGenerating;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -741,7 +746,7 @@ bool ChatLLM::prompt(const QList<QString> &collectionList, const QString &prompt
|
|||||||
|
|
||||||
bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString &prompt, const QString &promptTemplate,
|
bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString &prompt, const QString &promptTemplate,
|
||||||
int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty,
|
int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty,
|
||||||
int32_t repeat_penalty_tokens)
|
int32_t repeat_penalty_tokens, std::optional<QString> fakeReply)
|
||||||
{
|
{
|
||||||
if (!isModelLoaded())
|
if (!isModelLoaded())
|
||||||
return false;
|
return false;
|
||||||
@ -751,7 +756,7 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
|
|||||||
|
|
||||||
QList<ResultInfo> databaseResults;
|
QList<ResultInfo> databaseResults;
|
||||||
const int retrievalSize = MySettings::globalInstance()->localDocsRetrievalSize();
|
const int retrievalSize = MySettings::globalInstance()->localDocsRetrievalSize();
|
||||||
if (!collectionList.isEmpty()) {
|
if (!fakeReply && !collectionList.isEmpty()) {
|
||||||
emit requestRetrieveFromDB(collectionList, prompt, retrievalSize, &databaseResults); // blocks
|
emit requestRetrieveFromDB(collectionList, prompt, retrievalSize, &databaseResults); // blocks
|
||||||
emit databaseResultsChanged(databaseResults);
|
emit databaseResultsChanged(databaseResults);
|
||||||
}
|
}
|
||||||
@ -797,7 +802,8 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
|
|||||||
m_ctx.n_predict = old_n_predict; // now we are ready for a response
|
m_ctx.n_predict = old_n_predict; // now we are ready for a response
|
||||||
}
|
}
|
||||||
m_llModelInfo.model->prompt(prompt.toStdString(), promptTemplate.toStdString(), promptFunc, responseFunc,
|
m_llModelInfo.model->prompt(prompt.toStdString(), promptTemplate.toStdString(), promptFunc, responseFunc,
|
||||||
/*allowContextShift*/ true, m_ctx);
|
/*allowContextShift*/ true, m_ctx, false,
|
||||||
|
fakeReply.transform(std::mem_fn(&QString::toStdString)));
|
||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
printf("\n");
|
printf("\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
@ -805,9 +811,9 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
|
|||||||
m_timer->stop();
|
m_timer->stop();
|
||||||
qint64 elapsed = totalTime.elapsed();
|
qint64 elapsed = totalTime.elapsed();
|
||||||
std::string trimmed = trim_whitespace(m_response);
|
std::string trimmed = trim_whitespace(m_response);
|
||||||
if (trimmed != m_response) {
|
if (trimmed != m_trimmedResponse) {
|
||||||
m_response = trimmed;
|
m_trimmedResponse = trimmed;
|
||||||
emit responseChanged(QString::fromStdString(m_response));
|
emit responseChanged(QString::fromStdString(m_trimmedResponse));
|
||||||
}
|
}
|
||||||
|
|
||||||
SuggestionMode mode = MySettings::globalInstance()->suggestionMode();
|
SuggestionMode mode = MySettings::globalInstance()->suggestionMode();
|
||||||
@ -1078,6 +1084,7 @@ bool ChatLLM::deserialize(QDataStream &stream, int version, bool deserializeKV,
|
|||||||
QString response;
|
QString response;
|
||||||
stream >> response;
|
stream >> response;
|
||||||
m_response = response.toStdString();
|
m_response = response.toStdString();
|
||||||
|
m_trimmedResponse = trim_whitespace(m_response);
|
||||||
QString nameResponse;
|
QString nameResponse;
|
||||||
stream >> nameResponse;
|
stream >> nameResponse;
|
||||||
m_nameResponse = nameResponse.toStdString();
|
m_nameResponse = nameResponse.toStdString();
|
||||||
@ -1306,10 +1313,9 @@ void ChatLLM::processRestoreStateFromText()
|
|||||||
|
|
||||||
auto &response = *it++;
|
auto &response = *it++;
|
||||||
Q_ASSERT(response.first != "Prompt: ");
|
Q_ASSERT(response.first != "Prompt: ");
|
||||||
auto responseText = response.second.toStdString();
|
|
||||||
|
|
||||||
m_llModelInfo.model->prompt(prompt.second.toStdString(), promptTemplate.toStdString(), promptFunc, nullptr,
|
m_llModelInfo.model->prompt(prompt.second.toStdString(), promptTemplate.toStdString(), promptFunc, nullptr,
|
||||||
/*allowContextShift*/ true, m_ctx, false, &responseText);
|
/*allowContextShift*/ true, m_ctx, false, response.second.toUtf8().constData());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!m_stopGenerating) {
|
if (!m_stopGenerating) {
|
||||||
|
@ -116,7 +116,7 @@ public:
|
|||||||
void setForceUnloadModel(bool b) { m_forceUnloadModel = b; }
|
void setForceUnloadModel(bool b) { m_forceUnloadModel = b; }
|
||||||
void setMarkedForDeletion(bool b) { m_markedForDeletion = b; }
|
void setMarkedForDeletion(bool b) { m_markedForDeletion = b; }
|
||||||
|
|
||||||
QString response() const;
|
QString response(bool trim = true) const;
|
||||||
|
|
||||||
ModelInfo modelInfo() const;
|
ModelInfo modelInfo() const;
|
||||||
void setModelInfo(const ModelInfo &info);
|
void setModelInfo(const ModelInfo &info);
|
||||||
@ -198,7 +198,7 @@ Q_SIGNALS:
|
|||||||
protected:
|
protected:
|
||||||
bool promptInternal(const QList<QString> &collectionList, const QString &prompt, const QString &promptTemplate,
|
bool promptInternal(const QList<QString> &collectionList, const QString &prompt, const QString &promptTemplate,
|
||||||
int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty,
|
int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty,
|
||||||
int32_t repeat_penalty_tokens);
|
int32_t repeat_penalty_tokens, std::optional<QString> fakeReply = {});
|
||||||
bool handlePrompt(int32_t token);
|
bool handlePrompt(int32_t token);
|
||||||
bool handleResponse(int32_t token, const std::string &response);
|
bool handleResponse(int32_t token, const std::string &response);
|
||||||
bool handleNamePrompt(int32_t token);
|
bool handleNamePrompt(int32_t token);
|
||||||
@ -221,6 +221,7 @@ private:
|
|||||||
bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps);
|
bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps);
|
||||||
|
|
||||||
std::string m_response;
|
std::string m_response;
|
||||||
|
std::string m_trimmedResponse;
|
||||||
std::string m_nameResponse;
|
std::string m_nameResponse;
|
||||||
QString m_questionResponse;
|
QString m_questionResponse;
|
||||||
LLModelInfo m_llModelInfo;
|
LLModelInfo m_llModelInfo;
|
||||||
|
@ -20,24 +20,25 @@ class LocalDocsCollectionsModel : public QSortFilterProxyModel
|
|||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
Q_PROPERTY(int count READ count NOTIFY countChanged)
|
Q_PROPERTY(int count READ count NOTIFY countChanged)
|
||||||
Q_PROPERTY(int updatingCount READ updatingCount NOTIFY updatingCountChanged)
|
Q_PROPERTY(int updatingCount READ updatingCount NOTIFY updatingCountChanged)
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit LocalDocsCollectionsModel(QObject *parent);
|
explicit LocalDocsCollectionsModel(QObject *parent);
|
||||||
|
int count() const { return rowCount(); }
|
||||||
|
int updatingCount() const;
|
||||||
|
|
||||||
public Q_SLOTS:
|
public Q_SLOTS:
|
||||||
int count() const { return rowCount(); }
|
|
||||||
void setCollections(const QList<QString> &collections);
|
void setCollections(const QList<QString> &collections);
|
||||||
int updatingCount() const;
|
|
||||||
|
|
||||||
Q_SIGNALS:
|
Q_SIGNALS:
|
||||||
void countChanged();
|
void countChanged();
|
||||||
void updatingCountChanged();
|
void updatingCountChanged();
|
||||||
|
|
||||||
private Q_SLOT:
|
|
||||||
void maybeTriggerUpdatingCountChanged();
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool filterAcceptsRow(int sourceRow, const QModelIndex &sourceParent) const override;
|
bool filterAcceptsRow(int sourceRow, const QModelIndex &sourceParent) const override;
|
||||||
|
|
||||||
|
private Q_SLOTS:
|
||||||
|
void maybeTriggerUpdatingCountChanged();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
QList<QString> m_collections;
|
QList<QString> m_collections;
|
||||||
int m_updatingCount = 0;
|
int m_updatingCount = 0;
|
||||||
|
@ -18,10 +18,12 @@
|
|||||||
#include <QVector>
|
#include <QVector>
|
||||||
#include <Qt>
|
#include <Qt>
|
||||||
#include <QtGlobal>
|
#include <QtGlobal>
|
||||||
#include <QtQml>
|
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
using namespace Qt::Literals::StringLiterals;
|
using namespace Qt::Literals::StringLiterals;
|
||||||
|
|
||||||
|
|
||||||
struct ModelInfo {
|
struct ModelInfo {
|
||||||
Q_GADGET
|
Q_GADGET
|
||||||
Q_PROPERTY(QString id READ id WRITE setId)
|
Q_PROPERTY(QString id READ id WRITE setId)
|
||||||
@ -523,7 +525,7 @@ private:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
explicit ModelList();
|
explicit ModelList();
|
||||||
~ModelList() { for (auto *model: m_models) { delete model; } }
|
~ModelList() override { for (auto *model: std::as_const(m_models)) { delete model; } }
|
||||||
friend class MyModelList;
|
friend class MyModelList;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <QSettings>
|
#include <QSettings>
|
||||||
#include <QString>
|
#include <QString>
|
||||||
#include <QStringList>
|
#include <QStringList>
|
||||||
|
#include <QTranslator>
|
||||||
#include <QVector>
|
#include <QVector>
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -4,22 +4,29 @@
|
|||||||
#include "chatllm.h"
|
#include "chatllm.h"
|
||||||
#include "database.h"
|
#include "database.h"
|
||||||
|
|
||||||
#include <QHttpServerRequest>
|
#include <QHttpServer>
|
||||||
#include <QHttpServerResponse>
|
#include <QHttpServerResponse>
|
||||||
#include <QObject>
|
#include <QJsonObject>
|
||||||
#include <QList>
|
#include <QList>
|
||||||
|
#include <QObject>
|
||||||
#include <QString>
|
#include <QString>
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <optional>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
class Chat;
|
class Chat;
|
||||||
class QHttpServer;
|
class ChatRequest;
|
||||||
|
class CompletionRequest;
|
||||||
|
|
||||||
|
|
||||||
class Server : public ChatLLM
|
class Server : public ChatLLM
|
||||||
{
|
{
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Server(Chat *parent);
|
explicit Server(Chat *chat);
|
||||||
virtual ~Server();
|
~Server() override = default;
|
||||||
|
|
||||||
public Q_SLOTS:
|
public Q_SLOTS:
|
||||||
void start();
|
void start();
|
||||||
@ -27,14 +34,17 @@ public Q_SLOTS:
|
|||||||
Q_SIGNALS:
|
Q_SIGNALS:
|
||||||
void requestServerNewPromptResponsePair(const QString &prompt);
|
void requestServerNewPromptResponsePair(const QString &prompt);
|
||||||
|
|
||||||
|
private:
|
||||||
|
auto handleCompletionRequest(const CompletionRequest &request) -> std::pair<QHttpServerResponse, std::optional<QJsonObject>>;
|
||||||
|
auto handleChatRequest(const ChatRequest &request) -> std::pair<QHttpServerResponse, std::optional<QJsonObject>>;
|
||||||
|
|
||||||
private Q_SLOTS:
|
private Q_SLOTS:
|
||||||
QHttpServerResponse handleCompletionRequest(const QHttpServerRequest &request, bool isChat);
|
|
||||||
void handleDatabaseResultsChanged(const QList<ResultInfo> &results) { m_databaseResults = results; }
|
void handleDatabaseResultsChanged(const QList<ResultInfo> &results) { m_databaseResults = results; }
|
||||||
void handleCollectionListChanged(const QList<QString> &collectionList) { m_collections = collectionList; }
|
void handleCollectionListChanged(const QList<QString> &collectionList) { m_collections = collectionList; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Chat *m_chat;
|
Chat *m_chat;
|
||||||
QHttpServer *m_server;
|
std::unique_ptr<QHttpServer> m_server;
|
||||||
QList<ResultInfo> m_databaseResults;
|
QList<ResultInfo> m_databaseResults;
|
||||||
QList<QString> m_collections;
|
QList<QString> m_collections;
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user