server: improve correctness of request parsing and responses (#2929)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel 2024-09-09 10:48:57 -04:00 committed by GitHub
parent 1aae4ffe0a
commit 39005288c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
22 changed files with 790 additions and 328 deletions

View File

@ -317,9 +317,9 @@ jobs:
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb
packages=( packages=(
bison build-essential ccache cuda-compiler-11-8 flex gperf libcublas-dev-11-8 libfontconfig1 libfreetype6 bison build-essential ccache cuda-compiler-11-8 flex g++-12 gperf libcublas-dev-11-8 libfontconfig1
libgl1-mesa-dev libmysqlclient21 libnvidia-compute-550-server libodbc2 libpq5 libwayland-dev libx11-6 libfreetype6 libgl1-mesa-dev libmysqlclient21 libnvidia-compute-550-server libodbc2 libpq5 libwayland-dev
libx11-xcb1 libxcb-cursor0 libxcb-glx0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libx11-6 libx11-xcb1 libxcb-cursor0 libxcb-glx0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0
libxcb-render-util0 libxcb-shape0 libxcb-shm0 libxcb-sync1 libxcb-util1 libxcb-xfixes0 libxcb-xinerama0 libxcb-render-util0 libxcb-shape0 libxcb-shm0 libxcb-sync1 libxcb-util1 libxcb-xfixes0 libxcb-xinerama0
libxcb-xkb1 libxcb1 libxext6 libxfixes3 libxi6 libxkbcommon-x11-0 libxkbcommon0 libxrender1 patchelf libxcb-xkb1 libxcb1 libxext6 libxfixes3 libxi6 libxkbcommon-x11-0 libxkbcommon0 libxrender1 patchelf
python3 vulkan-sdk python3 vulkan-sdk
@ -352,6 +352,8 @@ jobs:
~/Qt/Tools/CMake/bin/cmake \ ~/Qt/Tools/CMake/bin/cmake \
-S ../gpt4all-chat -B . \ -S ../gpt4all-chat -B . \
-DCMAKE_BUILD_TYPE=Release \ -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=gcc-12 \
-DCMAKE_CXX_COMPILER=g++-12 \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \ -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
@ -391,9 +393,9 @@ jobs:
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb
packages=( packages=(
bison build-essential ccache cuda-compiler-11-8 flex gperf libcublas-dev-11-8 libfontconfig1 libfreetype6 bison build-essential ccache cuda-compiler-11-8 flex g++-12 gperf libcublas-dev-11-8 libfontconfig1
libgl1-mesa-dev libmysqlclient21 libnvidia-compute-550-server libodbc2 libpq5 libwayland-dev libx11-6 libfreetype6 libgl1-mesa-dev libmysqlclient21 libnvidia-compute-550-server libodbc2 libpq5 libwayland-dev
libx11-xcb1 libxcb-cursor0 libxcb-glx0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libx11-6 libx11-xcb1 libxcb-cursor0 libxcb-glx0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0
libxcb-render-util0 libxcb-shape0 libxcb-shm0 libxcb-sync1 libxcb-util1 libxcb-xfixes0 libxcb-xinerama0 libxcb-render-util0 libxcb-shape0 libxcb-shm0 libxcb-sync1 libxcb-util1 libxcb-xfixes0 libxcb-xinerama0
libxcb-xkb1 libxcb1 libxext6 libxfixes3 libxi6 libxkbcommon-x11-0 libxkbcommon0 libxrender1 patchelf libxcb-xkb1 libxcb1 libxext6 libxfixes3 libxi6 libxkbcommon-x11-0 libxkbcommon0 libxrender1 patchelf
python3 vulkan-sdk python3 vulkan-sdk
@ -426,6 +428,8 @@ jobs:
~/Qt/Tools/CMake/bin/cmake \ ~/Qt/Tools/CMake/bin/cmake \
-S ../gpt4all-chat -B . \ -S ../gpt4all-chat -B . \
-DCMAKE_BUILD_TYPE=Release \ -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=gcc-12 \
-DCMAKE_CXX_COMPILER=g++-12 \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \ -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
@ -447,7 +451,7 @@ jobs:
build-offline-chat-installer-windows: build-offline-chat-installer-windows:
machine: machine:
image: 'windows-server-2019-vs2019:2022.08.1' image: windows-server-2022-gui:current
resource_class: windows.large resource_class: windows.large
shell: powershell.exe -ExecutionPolicy Bypass shell: powershell.exe -ExecutionPolicy Bypass
steps: steps:
@ -538,7 +542,7 @@ jobs:
sign-offline-chat-installer-windows: sign-offline-chat-installer-windows:
machine: machine:
image: 'windows-server-2019-vs2019:2022.08.1' image: windows-server-2022-gui:current
resource_class: windows.large resource_class: windows.large
shell: powershell.exe -ExecutionPolicy Bypass shell: powershell.exe -ExecutionPolicy Bypass
steps: steps:
@ -568,7 +572,7 @@ jobs:
build-online-chat-installer-windows: build-online-chat-installer-windows:
machine: machine:
image: 'windows-server-2019-vs2019:2022.08.1' image: windows-server-2022-gui:current
resource_class: windows.large resource_class: windows.large
shell: powershell.exe -ExecutionPolicy Bypass shell: powershell.exe -ExecutionPolicy Bypass
steps: steps:
@ -666,7 +670,7 @@ jobs:
sign-online-chat-installer-windows: sign-online-chat-installer-windows:
machine: machine:
image: 'windows-server-2019-vs2019:2022.08.1' image: windows-server-2022-gui:current
resource_class: windows.large resource_class: windows.large
shell: powershell.exe -ExecutionPolicy Bypass shell: powershell.exe -ExecutionPolicy Bypass
steps: steps:
@ -720,9 +724,9 @@ jobs:
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb
packages=( packages=(
bison build-essential ccache cuda-compiler-11-8 flex gperf libcublas-dev-11-8 libfontconfig1 libfreetype6 bison build-essential ccache cuda-compiler-11-8 flex g++-12 gperf libcublas-dev-11-8 libfontconfig1
libgl1-mesa-dev libmysqlclient21 libnvidia-compute-550-server libodbc2 libpq5 libwayland-dev libx11-6 libfreetype6 libgl1-mesa-dev libmysqlclient21 libnvidia-compute-550-server libodbc2 libpq5 libwayland-dev
libx11-xcb1 libxcb-cursor0 libxcb-glx0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libx11-6 libx11-xcb1 libxcb-cursor0 libxcb-glx0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0
libxcb-render-util0 libxcb-shape0 libxcb-shm0 libxcb-sync1 libxcb-util1 libxcb-xfixes0 libxcb-xinerama0 libxcb-render-util0 libxcb-shape0 libxcb-shm0 libxcb-sync1 libxcb-util1 libxcb-xfixes0 libxcb-xinerama0
libxcb-xkb1 libxcb1 libxext6 libxfixes3 libxi6 libxkbcommon-x11-0 libxkbcommon0 libxrender1 python3 libxcb-xkb1 libxcb1 libxext6 libxfixes3 libxi6 libxkbcommon-x11-0 libxkbcommon0 libxrender1 python3
vulkan-sdk vulkan-sdk
@ -744,6 +748,8 @@ jobs:
~/Qt/Tools/CMake/bin/cmake \ ~/Qt/Tools/CMake/bin/cmake \
-S gpt4all-chat -B build \ -S gpt4all-chat -B build \
-DCMAKE_BUILD_TYPE=Release \ -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=gcc-12 \
-DCMAKE_CXX_COMPILER=g++-12 \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \ -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
@ -758,7 +764,7 @@ jobs:
build-gpt4all-chat-windows: build-gpt4all-chat-windows:
machine: machine:
image: 'windows-server-2019-vs2019:2022.08.1' image: windows-server-2022-gui:current
resource_class: windows.large resource_class: windows.large
shell: powershell.exe -ExecutionPolicy Bypass shell: powershell.exe -ExecutionPolicy Bypass
steps: steps:
@ -864,8 +870,8 @@ jobs:
paths: paths:
- ../.ccache - ../.ccache
build-ts-docs: build-ts-docs:
docker: docker:
- image: cimg/base:stable - image: cimg/base:stable
steps: steps:
- checkout - checkout
@ -887,7 +893,7 @@ jobs:
docker: docker:
- image: circleci/python:3.8 - image: circleci/python:3.8
steps: steps:
- checkout - checkout
- run: - run:
name: Install dependencies name: Install dependencies
command: | command: |
@ -928,7 +934,8 @@ jobs:
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb
packages=( packages=(
build-essential ccache cmake cuda-compiler-11-8 libcublas-dev-11-8 libnvidia-compute-550-server vulkan-sdk build-essential ccache cmake cuda-compiler-11-8 g++-12 libcublas-dev-11-8 libnvidia-compute-550-server
vulkan-sdk
) )
sudo apt-get update sudo apt-get update
sudo apt-get install -y "${packages[@]}" sudo apt-get install -y "${packages[@]}"
@ -942,6 +949,8 @@ jobs:
cd gpt4all-backend cd gpt4all-backend
cmake -B build \ cmake -B build \
-DCMAKE_BUILD_TYPE=Release \ -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=gcc-12 \
-DCMAKE_CXX_COMPILER=g++-12 \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \ -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
@ -1014,7 +1023,7 @@ jobs:
build-py-windows: build-py-windows:
machine: machine:
image: 'windows-server-2019-vs2019:2022.08.1' image: windows-server-2022-gui:current
resource_class: windows.large resource_class: windows.large
shell: powershell.exe -ExecutionPolicy Bypass shell: powershell.exe -ExecutionPolicy Bypass
steps: steps:
@ -1118,11 +1127,12 @@ jobs:
name: Install dependencies name: Install dependencies
command: | command: |
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb
packages=( packages=(
build-essential ccache cmake cuda-compiler-11-8 libcublas-dev-11-8 libnvidia-compute-550-server vulkan-sdk build-essential ccache cmake cuda-compiler-11-8 g++-12 libcublas-dev-11-8 libnvidia-compute-550-server
vulkan-sdk
) )
sudo apt-get update sudo apt-get update
sudo apt-get install -y "${packages[@]}" sudo apt-get install -y "${packages[@]}"
@ -1135,6 +1145,9 @@ jobs:
mkdir -p runtimes/build mkdir -p runtimes/build
cd runtimes/build cd runtimes/build
cmake ../.. \ cmake ../.. \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=gcc-12 \
-DCMAKE_C_COMPILER=g++-12 \
-DCMAKE_BUILD_TYPE=Release \ -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
@ -1204,7 +1217,7 @@ jobs:
build-bindings-backend-windows: build-bindings-backend-windows:
machine: machine:
image: 'windows-server-2022-gui:2023.03.1' image: windows-server-2022-gui:current
resource_class: windows.large resource_class: windows.large
shell: powershell.exe -ExecutionPolicy Bypass shell: powershell.exe -ExecutionPolicy Bypass
steps: steps:
@ -1230,7 +1243,7 @@ jobs:
- run: - run:
name: Install dependencies name: Install dependencies
command: | command: |
choco install -y ccache cmake ninja --installargs 'ADD_CMAKE_TO_PATH=System' choco install -y ccache cmake ninja --installargs 'ADD_CMAKE_TO_PATH=System'
- run: - run:
name: Build Libraries name: Build Libraries
command: | command: |
@ -1263,8 +1276,8 @@ jobs:
paths: paths:
- runtimes/win-x64_msvc/*.dll - runtimes/win-x64_msvc/*.dll
build-nodejs-linux: build-nodejs-linux:
docker: docker:
- image: cimg/base:stable - image: cimg/base:stable
steps: steps:
- checkout - checkout
@ -1280,10 +1293,10 @@ jobs:
pkg-manager: yarn pkg-manager: yarn
override-ci-command: yarn install override-ci-command: yarn install
- run: - run:
command: | command: |
cd gpt4all-bindings/typescript cd gpt4all-bindings/typescript
yarn prebuildify -t 18.16.0 --napi yarn prebuildify -t 18.16.0 --napi
- run: - run:
command: | command: |
mkdir -p gpt4all-backend/prebuilds/linux-x64 mkdir -p gpt4all-backend/prebuilds/linux-x64
mkdir -p gpt4all-backend/runtimes/linux-x64 mkdir -p gpt4all-backend/runtimes/linux-x64
@ -1292,10 +1305,10 @@ jobs:
- persist_to_workspace: - persist_to_workspace:
root: gpt4all-backend root: gpt4all-backend
paths: paths:
- prebuilds/linux-x64/*.node - prebuilds/linux-x64/*.node
- runtimes/linux-x64/*-*.so - runtimes/linux-x64/*-*.so
build-nodejs-macos: build-nodejs-macos:
macos: macos:
xcode: 15.4.0 xcode: 15.4.0
steps: steps:
@ -1312,12 +1325,12 @@ jobs:
pkg-manager: yarn pkg-manager: yarn
override-ci-command: yarn install override-ci-command: yarn install
- run: - run:
command: | command: |
cd gpt4all-bindings/typescript cd gpt4all-bindings/typescript
yarn prebuildify -t 18.16.0 --napi yarn prebuildify -t 18.16.0 --napi
- run: - run:
name: "Persisting all necessary things to workspace" name: "Persisting all necessary things to workspace"
command: | command: |
mkdir -p gpt4all-backend/prebuilds/darwin-x64 mkdir -p gpt4all-backend/prebuilds/darwin-x64
mkdir -p gpt4all-backend/runtimes/darwin mkdir -p gpt4all-backend/runtimes/darwin
cp /tmp/gpt4all-backend/runtimes/osx-x64/*-*.* gpt4all-backend/runtimes/darwin cp /tmp/gpt4all-backend/runtimes/osx-x64/*-*.* gpt4all-backend/runtimes/darwin
@ -1328,7 +1341,7 @@ jobs:
- prebuilds/darwin-x64/*.node - prebuilds/darwin-x64/*.node
- runtimes/darwin/*-*.* - runtimes/darwin/*-*.*
build-nodejs-windows: build-nodejs-windows:
executor: executor:
name: win/default name: win/default
size: large size: large
@ -1342,29 +1355,29 @@ jobs:
command: wget https://nodejs.org/dist/v18.16.0/node-v18.16.0-x86.msi -P C:\Users\circleci\Downloads\ command: wget https://nodejs.org/dist/v18.16.0/node-v18.16.0-x86.msi -P C:\Users\circleci\Downloads\
shell: cmd.exe shell: cmd.exe
- run: MsiExec.exe /i C:\Users\circleci\Downloads\node-v18.16.0-x86.msi /qn - run: MsiExec.exe /i C:\Users\circleci\Downloads\node-v18.16.0-x86.msi /qn
- run: - run:
command: | command: |
Start-Process powershell -verb runAs -Args "-start GeneralProfile" Start-Process powershell -verb runAs -Args "-start GeneralProfile"
nvm install 18.16.0 nvm install 18.16.0
nvm use 18.16.0 nvm use 18.16.0
- run: node --version - run: node --version
- run: corepack enable - run: corepack enable
- run: - run:
command: | command: |
npm install -g yarn npm install -g yarn
cd gpt4all-bindings/typescript cd gpt4all-bindings/typescript
yarn install yarn install
- run: - run:
command: | command: |
cd gpt4all-bindings/typescript cd gpt4all-bindings/typescript
yarn prebuildify -t 18.16.0 --napi yarn prebuildify -t 18.16.0 --napi
- run: - run:
command: | command: |
mkdir -p gpt4all-backend/prebuilds/win32-x64 mkdir -p gpt4all-backend/prebuilds/win32-x64
mkdir -p gpt4all-backend/runtimes/win32-x64 mkdir -p gpt4all-backend/runtimes/win32-x64
cp /tmp/gpt4all-backend/runtimes/win-x64_msvc/*-*.dll gpt4all-backend/runtimes/win32-x64 cp /tmp/gpt4all-backend/runtimes/win-x64_msvc/*-*.dll gpt4all-backend/runtimes/win32-x64
cp gpt4all-bindings/typescript/prebuilds/win32-x64/*.node gpt4all-backend/prebuilds/win32-x64 cp gpt4all-bindings/typescript/prebuilds/win32-x64/*.node gpt4all-backend/prebuilds/win32-x64
- persist_to_workspace: - persist_to_workspace:
root: gpt4all-backend root: gpt4all-backend
paths: paths:
@ -1372,7 +1385,7 @@ jobs:
- runtimes/win32-x64/*-*.dll - runtimes/win32-x64/*-*.dll
prepare-npm-pkg: prepare-npm-pkg:
docker: docker:
- image: cimg/base:stable - image: cimg/base:stable
steps: steps:
- attach_workspace: - attach_workspace:
@ -1383,19 +1396,19 @@ jobs:
node-version: "18.16" node-version: "18.16"
- run: node --version - run: node --version
- run: corepack enable - run: corepack enable
- run: - run:
command: | command: |
cd gpt4all-bindings/typescript cd gpt4all-bindings/typescript
# excluding llmodel. nodejs bindings dont need llmodel.dll # excluding llmodel. nodejs bindings dont need llmodel.dll
mkdir -p runtimes/win32-x64/native mkdir -p runtimes/win32-x64/native
mkdir -p prebuilds/win32-x64/ mkdir -p prebuilds/win32-x64/
cp /tmp/gpt4all-backend/runtimes/win-x64_msvc/*-*.dll runtimes/win32-x64/native/ cp /tmp/gpt4all-backend/runtimes/win-x64_msvc/*-*.dll runtimes/win32-x64/native/
cp /tmp/gpt4all-backend/prebuilds/win32-x64/*.node prebuilds/win32-x64/ cp /tmp/gpt4all-backend/prebuilds/win32-x64/*.node prebuilds/win32-x64/
mkdir -p runtimes/linux-x64/native mkdir -p runtimes/linux-x64/native
mkdir -p prebuilds/linux-x64/ mkdir -p prebuilds/linux-x64/
cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/ cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/
cp /tmp/gpt4all-backend/prebuilds/linux-x64/*.node prebuilds/linux-x64/ cp /tmp/gpt4all-backend/prebuilds/linux-x64/*.node prebuilds/linux-x64/
# darwin has univeral runtime libraries # darwin has univeral runtime libraries
mkdir -p runtimes/darwin/native mkdir -p runtimes/darwin/native
@ -1403,22 +1416,22 @@ jobs:
cp /tmp/gpt4all-backend/runtimes/darwin/*-*.* runtimes/darwin/native/ cp /tmp/gpt4all-backend/runtimes/darwin/*-*.* runtimes/darwin/native/
cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/ cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/
# Fallback build if user is not on above prebuilds # Fallback build if user is not on above prebuilds
mv -f binding.ci.gyp binding.gyp mv -f binding.ci.gyp binding.gyp
mkdir gpt4all-backend mkdir gpt4all-backend
cd ../../gpt4all-backend cd ../../gpt4all-backend
mv llmodel.h llmodel.cpp llmodel_c.cpp llmodel_c.h sysinfo.h dlhandle.h ../gpt4all-bindings/typescript/gpt4all-backend/ mv llmodel.h llmodel.cpp llmodel_c.cpp llmodel_c.h sysinfo.h dlhandle.h ../gpt4all-bindings/typescript/gpt4all-backend/
# Test install # Test install
- node/install-packages: - node/install-packages:
app-dir: gpt4all-bindings/typescript app-dir: gpt4all-bindings/typescript
pkg-manager: yarn pkg-manager: yarn
override-ci-command: yarn install override-ci-command: yarn install
- run: - run:
command: | command: |
cd gpt4all-bindings/typescript cd gpt4all-bindings/typescript
yarn run test yarn run test
- run: - run:
@ -1552,7 +1565,7 @@ workflows:
- build-py-linux - build-py-linux
- build-py-macos - build-py-macos
build-bindings: build-bindings:
when: when:
or: or:
- << pipeline.parameters.run-all-workflows >> - << pipeline.parameters.run-all-workflows >>
- << pipeline.parameters.run-python-workflow >> - << pipeline.parameters.run-python-workflow >>
@ -1585,8 +1598,8 @@ workflows:
requires: requires:
- hold - hold
# NodeJs Jobs # NodeJs Jobs
- prepare-npm-pkg: - prepare-npm-pkg:
filters: filters:
branches: branches:
only: only:

3
.gitmodules vendored
View File

@ -8,3 +8,6 @@
[submodule "gpt4all-chat/deps/SingleApplication"] [submodule "gpt4all-chat/deps/SingleApplication"]
path = gpt4all-chat/deps/SingleApplication path = gpt4all-chat/deps/SingleApplication
url = https://github.com/nomic-ai/SingleApplication.git url = https://github.com/nomic-ai/SingleApplication.git
[submodule "gpt4all-chat/deps/fmt"]
path = gpt4all-chat/deps/fmt
url = https://github.com/fmtlib/fmt.git

View File

@ -33,7 +33,7 @@ set(LLMODEL_VERSION_PATCH 0)
set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}") set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C) project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)
set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
set(BUILD_SHARED_LIBS ON) set(BUILD_SHARED_LIBS ON)

@ -1 +1 @@
Subproject commit 443665aec4721ecf57df8162e7e093a0cd674a76 Subproject commit ced74fbad4b258507f3ec06e77eec9445583511a

View File

@ -162,7 +162,7 @@ public:
bool allowContextShift, bool allowContextShift,
PromptContext &ctx, PromptContext &ctx,
bool special = false, bool special = false,
std::string *fakeReply = nullptr); std::optional<std::string_view> fakeReply = {});
using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend); using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend);
@ -212,7 +212,7 @@ public:
protected: protected:
// These are pure virtual because subclasses need to implement as the default implementation of // These are pure virtual because subclasses need to implement as the default implementation of
// 'prompt' above calls these functions // 'prompt' above calls these functions
virtual std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special = false) = 0; virtual std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special = false) = 0;
virtual bool isSpecialToken(Token id) const = 0; virtual bool isSpecialToken(Token id) const = 0;
virtual std::string tokenToString(Token id) const = 0; virtual std::string tokenToString(Token id) const = 0;
virtual Token sampleToken(PromptContext &ctx) const = 0; virtual Token sampleToken(PromptContext &ctx) const = 0;
@ -249,7 +249,8 @@ protected:
std::function<bool(int32_t, const std::string&)> responseCallback, std::function<bool(int32_t, const std::string&)> responseCallback,
bool allowContextShift, bool allowContextShift,
PromptContext &promptCtx, PromptContext &promptCtx,
std::vector<Token> embd_inp); std::vector<Token> embd_inp,
bool isResponse = false);
void generateResponse(std::function<bool(int32_t, const std::string&)> responseCallback, void generateResponse(std::function<bool(int32_t, const std::string&)> responseCallback,
bool allowContextShift, bool allowContextShift,
PromptContext &promptCtx); PromptContext &promptCtx);

View File

@ -536,13 +536,13 @@ size_t LLamaModel::restoreState(const uint8_t *src)
return llama_set_state_data(d_ptr->ctx, const_cast<uint8_t*>(src)); return llama_set_state_data(d_ptr->ctx, const_cast<uint8_t*>(src));
} }
std::vector<LLModel::Token> LLamaModel::tokenize(PromptContext &ctx, const std::string &str, bool special) std::vector<LLModel::Token> LLamaModel::tokenize(PromptContext &ctx, std::string_view str, bool special)
{ {
bool atStart = m_tokenize_last_token == -1; bool atStart = m_tokenize_last_token == -1;
bool insertSpace = atStart || isSpecialToken(m_tokenize_last_token); bool insertSpace = atStart || isSpecialToken(m_tokenize_last_token);
std::vector<LLModel::Token> fres(str.length() + 4); std::vector<LLModel::Token> fres(str.length() + 4);
int32_t fres_len = llama_tokenize_gpt4all( int32_t fres_len = llama_tokenize_gpt4all(
d_ptr->model, str.c_str(), str.length(), fres.data(), fres.size(), /*add_special*/ atStart, d_ptr->model, str.data(), str.length(), fres.data(), fres.size(), /*add_special*/ atStart,
/*parse_special*/ special, /*insert_space*/ insertSpace /*parse_special*/ special, /*insert_space*/ insertSpace
); );
fres.resize(fres_len); fres.resize(fres_len);

View File

@ -8,6 +8,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <string_view>
#include <vector> #include <vector>
struct LLamaPrivate; struct LLamaPrivate;
@ -52,7 +53,7 @@ private:
bool m_supportsCompletion = false; bool m_supportsCompletion = false;
protected: protected:
std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) override; std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special) override;
bool isSpecialToken(Token id) const override; bool isSpecialToken(Token id) const override;
std::string tokenToString(Token id) const override; std::string tokenToString(Token id) const override;
Token sampleToken(PromptContext &ctx) const override; Token sampleToken(PromptContext &ctx) const override;

View File

@ -12,6 +12,7 @@
#include <memory> #include <memory>
#include <optional> #include <optional>
#include <string> #include <string>
#include <string_view>
#include <vector> #include <vector>
struct LLModelWrapper { struct LLModelWrapper {
@ -130,13 +131,10 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
wrapper->promptContext.repeat_last_n = ctx->repeat_last_n; wrapper->promptContext.repeat_last_n = ctx->repeat_last_n;
wrapper->promptContext.contextErase = ctx->context_erase; wrapper->promptContext.contextErase = ctx->context_erase;
std::string fake_reply_str;
if (fake_reply) { fake_reply_str = fake_reply; }
auto *fake_reply_p = fake_reply ? &fake_reply_str : nullptr;
// Call the C++ prompt method // Call the C++ prompt method
wrapper->llModel->prompt(prompt, prompt_template, prompt_callback, response_func, allow_context_shift, wrapper->llModel->prompt(prompt, prompt_template, prompt_callback, response_func, allow_context_shift,
wrapper->promptContext, special, fake_reply_p); wrapper->promptContext, special,
fake_reply ? std::make_optional<std::string_view>(fake_reply) : std::nullopt);
// Update the C context by giving access to the wrappers raw pointers to std::vector data // Update the C context by giving access to the wrappers raw pointers to std::vector data
// which involves no copies // which involves no copies

View File

@ -11,6 +11,7 @@
#include <sstream> #include <sstream>
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include <string_view>
#include <vector> #include <vector>
namespace ranges = std::ranges; namespace ranges = std::ranges;
@ -45,7 +46,7 @@ void LLModel::prompt(const std::string &prompt,
bool allowContextShift, bool allowContextShift,
PromptContext &promptCtx, PromptContext &promptCtx,
bool special, bool special,
std::string *fakeReply) std::optional<std::string_view> fakeReply)
{ {
if (!isModelLoaded()) { if (!isModelLoaded()) {
std::cerr << implementation().modelType() << " ERROR: prompt won't work with an unloaded model!\n"; std::cerr << implementation().modelType() << " ERROR: prompt won't work with an unloaded model!\n";
@ -129,11 +130,11 @@ void LLModel::prompt(const std::string &prompt,
return; // error return; // error
// decode the assistant's reply, either generated or spoofed // decode the assistant's reply, either generated or spoofed
if (fakeReply == nullptr) { if (!fakeReply) {
generateResponse(responseCallback, allowContextShift, promptCtx); generateResponse(responseCallback, allowContextShift, promptCtx);
} else { } else {
embd_inp = tokenize(promptCtx, *fakeReply, false); embd_inp = tokenize(promptCtx, *fakeReply, false);
if (!decodePrompt(promptCallback, responseCallback, allowContextShift, promptCtx, embd_inp)) if (!decodePrompt(promptCallback, responseCallback, allowContextShift, promptCtx, embd_inp, true))
return; // error return; // error
} }
@ -157,7 +158,8 @@ bool LLModel::decodePrompt(std::function<bool(int32_t)> promptCallback,
std::function<bool(int32_t, const std::string&)> responseCallback, std::function<bool(int32_t, const std::string&)> responseCallback,
bool allowContextShift, bool allowContextShift,
PromptContext &promptCtx, PromptContext &promptCtx,
std::vector<Token> embd_inp) { std::vector<Token> embd_inp,
bool isResponse) {
if ((int) embd_inp.size() > promptCtx.n_ctx - 4) { if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
responseCallback(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed."); responseCallback(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed.");
std::cerr << implementation().modelType() << " ERROR: The prompt is " << embd_inp.size() << std::cerr << implementation().modelType() << " ERROR: The prompt is " << embd_inp.size() <<
@ -196,7 +198,9 @@ bool LLModel::decodePrompt(std::function<bool(int32_t)> promptCallback,
for (size_t t = 0; t < tokens; ++t) { for (size_t t = 0; t < tokens; ++t) {
promptCtx.tokens.push_back(batch.at(t)); promptCtx.tokens.push_back(batch.at(t));
promptCtx.n_past += 1; promptCtx.n_past += 1;
if (!promptCallback(batch.at(t))) Token tok = batch.at(t);
bool res = isResponse ? responseCallback(tok, tokenToString(tok)) : promptCallback(tok);
if (!res)
return false; return false;
} }
i = batch_end; i = batch_end;

View File

@ -26,6 +26,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
- Fix a typo in Model Settings (by [@3Simplex](https://github.com/3Simplex) in [#2916](https://github.com/nomic-ai/gpt4all/pull/2916)) - Fix a typo in Model Settings (by [@3Simplex](https://github.com/3Simplex) in [#2916](https://github.com/nomic-ai/gpt4all/pull/2916))
- Fix the antenna icon tooltip when using the local server ([#2922](https://github.com/nomic-ai/gpt4all/pull/2922)) - Fix the antenna icon tooltip when using the local server ([#2922](https://github.com/nomic-ai/gpt4all/pull/2922))
- Fix a few issues with locating files and handling errors when loading remote models on startup ([#2875](https://github.com/nomic-ai/gpt4all/pull/2875)) - Fix a few issues with locating files and handling errors when loading remote models on startup ([#2875](https://github.com/nomic-ai/gpt4all/pull/2875))
- Significantly improve API server request parsing and response correctness ([#2929](https://github.com/nomic-ai/gpt4all/pull/2929))
## [3.2.1] - 2024-08-13 ## [3.2.1] - 2024-08-13

View File

@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.16) cmake_minimum_required(VERSION 3.16)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
if(APPLE) if(APPLE)
@ -64,6 +64,12 @@ message(STATUS "Qt 6 root directory: ${Qt6_ROOT_DIR}")
set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(FMT_INSTALL OFF)
set(BUILD_SHARED_LIBS_SAVED "${BUILD_SHARED_LIBS}")
set(BUILD_SHARED_LIBS OFF)
add_subdirectory(deps/fmt)
set(BUILD_SHARED_LIBS "${BUILD_SHARED_LIBS_SAVED}")
add_subdirectory(../gpt4all-backend llmodel) add_subdirectory(../gpt4all-backend llmodel)
set(CHAT_EXE_RESOURCES) set(CHAT_EXE_RESOURCES)
@ -240,7 +246,7 @@ else()
PRIVATE Qt6::Quick Qt6::Svg Qt6::HttpServer Qt6::Sql Qt6::Pdf) PRIVATE Qt6::Quick Qt6::Svg Qt6::HttpServer Qt6::Sql Qt6::Pdf)
endif() endif()
target_link_libraries(chat target_link_libraries(chat
PRIVATE llmodel SingleApplication) PRIVATE llmodel SingleApplication fmt::fmt)
# -- install -- # -- install --

1
gpt4all-chat/deps/fmt Submodule

@ -0,0 +1 @@
Subproject commit 0c9fce2ffefecfdce794e1859584e25877b7b592

View File

@ -239,16 +239,17 @@ void Chat::newPromptResponsePair(const QString &prompt)
resetResponseState(); resetResponseState();
m_chatModel->updateCurrentResponse(m_chatModel->count() - 1, false); m_chatModel->updateCurrentResponse(m_chatModel->count() - 1, false);
m_chatModel->appendPrompt("Prompt: ", prompt); m_chatModel->appendPrompt("Prompt: ", prompt);
m_chatModel->appendResponse("Response: ", prompt); m_chatModel->appendResponse("Response: ", QString());
emit resetResponseRequested(); emit resetResponseRequested();
} }
// the server needs to block until response is reset, so it calls resetResponse on its own m_llmThread
void Chat::serverNewPromptResponsePair(const QString &prompt) void Chat::serverNewPromptResponsePair(const QString &prompt)
{ {
resetResponseState(); resetResponseState();
m_chatModel->updateCurrentResponse(m_chatModel->count() - 1, false); m_chatModel->updateCurrentResponse(m_chatModel->count() - 1, false);
m_chatModel->appendPrompt("Prompt: ", prompt); m_chatModel->appendPrompt("Prompt: ", prompt);
m_chatModel->appendResponse("Response: ", prompt); m_chatModel->appendResponse("Response: ", QString());
} }
bool Chat::restoringFromText() const bool Chat::restoringFromText() const

View File

@ -93,7 +93,7 @@ void ChatAPI::prompt(const std::string &prompt,
bool allowContextShift, bool allowContextShift,
PromptContext &promptCtx, PromptContext &promptCtx,
bool special, bool special,
std::string *fakeReply) { std::optional<std::string_view> fakeReply) {
Q_UNUSED(promptCallback); Q_UNUSED(promptCallback);
Q_UNUSED(allowContextShift); Q_UNUSED(allowContextShift);
@ -121,7 +121,7 @@ void ChatAPI::prompt(const std::string &prompt,
if (fakeReply) { if (fakeReply) {
promptCtx.n_past += 1; promptCtx.n_past += 1;
m_context.append(formattedPrompt); m_context.append(formattedPrompt);
m_context.append(QString::fromStdString(*fakeReply)); m_context.append(QString::fromUtf8(fakeReply->data(), fakeReply->size()));
return; return;
} }

View File

@ -12,9 +12,10 @@
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <stdexcept>
#include <functional> #include <functional>
#include <stdexcept>
#include <string> #include <string>
#include <string_view>
#include <vector> #include <vector>
class QNetworkAccessManager; class QNetworkAccessManager;
@ -72,7 +73,7 @@ public:
bool allowContextShift, bool allowContextShift,
PromptContext &ctx, PromptContext &ctx,
bool special, bool special,
std::string *fakeReply) override; std::optional<std::string_view> fakeReply) override;
void setThreadCount(int32_t n_threads) override; void setThreadCount(int32_t n_threads) override;
int32_t threadCount() const override; int32_t threadCount() const override;
@ -97,7 +98,7 @@ protected:
// them as they are only called from the default implementation of 'prompt' which we override and // them as they are only called from the default implementation of 'prompt' which we override and
// completely replace // completely replace
std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) override std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special) override
{ {
(void)ctx; (void)ctx;
(void)str; (void)str;

View File

@ -626,16 +626,16 @@ void ChatLLM::regenerateResponse()
m_ctx.tokens.erase(m_ctx.tokens.end() - m_promptResponseTokens, m_ctx.tokens.end()); m_ctx.tokens.erase(m_ctx.tokens.end() - m_promptResponseTokens, m_ctx.tokens.end());
m_promptResponseTokens = 0; m_promptResponseTokens = 0;
m_promptTokens = 0; m_promptTokens = 0;
m_response = std::string(); m_response = m_trimmedResponse = std::string();
emit responseChanged(QString::fromStdString(m_response)); emit responseChanged(QString::fromStdString(m_trimmedResponse));
} }
void ChatLLM::resetResponse() void ChatLLM::resetResponse()
{ {
m_promptTokens = 0; m_promptTokens = 0;
m_promptResponseTokens = 0; m_promptResponseTokens = 0;
m_response = std::string(); m_response = m_trimmedResponse = std::string();
emit responseChanged(QString::fromStdString(m_response)); emit responseChanged(QString::fromStdString(m_trimmedResponse));
} }
void ChatLLM::resetContext() void ChatLLM::resetContext()
@ -645,9 +645,12 @@ void ChatLLM::resetContext()
m_ctx = LLModel::PromptContext(); m_ctx = LLModel::PromptContext();
} }
QString ChatLLM::response() const QString ChatLLM::response(bool trim) const
{ {
return QString::fromStdString(remove_leading_whitespace(m_response)); std::string resp = m_response;
if (trim)
resp = remove_leading_whitespace(resp);
return QString::fromStdString(resp);
} }
ModelInfo ChatLLM::modelInfo() const ModelInfo ChatLLM::modelInfo() const
@ -705,7 +708,8 @@ bool ChatLLM::handleResponse(int32_t token, const std::string &response)
// check for error // check for error
if (token < 0) { if (token < 0) {
m_response.append(response); m_response.append(response);
emit responseChanged(QString::fromStdString(remove_leading_whitespace(m_response))); m_trimmedResponse = remove_leading_whitespace(m_response);
emit responseChanged(QString::fromStdString(m_trimmedResponse));
return false; return false;
} }
@ -715,7 +719,8 @@ bool ChatLLM::handleResponse(int32_t token, const std::string &response)
m_timer->inc(); m_timer->inc();
Q_ASSERT(!response.empty()); Q_ASSERT(!response.empty());
m_response.append(response); m_response.append(response);
emit responseChanged(QString::fromStdString(remove_leading_whitespace(m_response))); m_trimmedResponse = remove_leading_whitespace(m_response);
emit responseChanged(QString::fromStdString(m_trimmedResponse));
return !m_stopGenerating; return !m_stopGenerating;
} }
@ -741,7 +746,7 @@ bool ChatLLM::prompt(const QList<QString> &collectionList, const QString &prompt
bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString &prompt, const QString &promptTemplate, bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString &prompt, const QString &promptTemplate,
int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty, int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty,
int32_t repeat_penalty_tokens) int32_t repeat_penalty_tokens, std::optional<QString> fakeReply)
{ {
if (!isModelLoaded()) if (!isModelLoaded())
return false; return false;
@ -751,7 +756,7 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
QList<ResultInfo> databaseResults; QList<ResultInfo> databaseResults;
const int retrievalSize = MySettings::globalInstance()->localDocsRetrievalSize(); const int retrievalSize = MySettings::globalInstance()->localDocsRetrievalSize();
if (!collectionList.isEmpty()) { if (!fakeReply && !collectionList.isEmpty()) {
emit requestRetrieveFromDB(collectionList, prompt, retrievalSize, &databaseResults); // blocks emit requestRetrieveFromDB(collectionList, prompt, retrievalSize, &databaseResults); // blocks
emit databaseResultsChanged(databaseResults); emit databaseResultsChanged(databaseResults);
} }
@ -797,7 +802,8 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
m_ctx.n_predict = old_n_predict; // now we are ready for a response m_ctx.n_predict = old_n_predict; // now we are ready for a response
} }
m_llModelInfo.model->prompt(prompt.toStdString(), promptTemplate.toStdString(), promptFunc, responseFunc, m_llModelInfo.model->prompt(prompt.toStdString(), promptTemplate.toStdString(), promptFunc, responseFunc,
/*allowContextShift*/ true, m_ctx); /*allowContextShift*/ true, m_ctx, false,
fakeReply.transform(std::mem_fn(&QString::toStdString)));
#if defined(DEBUG) #if defined(DEBUG)
printf("\n"); printf("\n");
fflush(stdout); fflush(stdout);
@ -805,9 +811,9 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
m_timer->stop(); m_timer->stop();
qint64 elapsed = totalTime.elapsed(); qint64 elapsed = totalTime.elapsed();
std::string trimmed = trim_whitespace(m_response); std::string trimmed = trim_whitespace(m_response);
if (trimmed != m_response) { if (trimmed != m_trimmedResponse) {
m_response = trimmed; m_trimmedResponse = trimmed;
emit responseChanged(QString::fromStdString(m_response)); emit responseChanged(QString::fromStdString(m_trimmedResponse));
} }
SuggestionMode mode = MySettings::globalInstance()->suggestionMode(); SuggestionMode mode = MySettings::globalInstance()->suggestionMode();
@ -1078,6 +1084,7 @@ bool ChatLLM::deserialize(QDataStream &stream, int version, bool deserializeKV,
QString response; QString response;
stream >> response; stream >> response;
m_response = response.toStdString(); m_response = response.toStdString();
m_trimmedResponse = trim_whitespace(m_response);
QString nameResponse; QString nameResponse;
stream >> nameResponse; stream >> nameResponse;
m_nameResponse = nameResponse.toStdString(); m_nameResponse = nameResponse.toStdString();
@ -1306,10 +1313,9 @@ void ChatLLM::processRestoreStateFromText()
auto &response = *it++; auto &response = *it++;
Q_ASSERT(response.first != "Prompt: "); Q_ASSERT(response.first != "Prompt: ");
auto responseText = response.second.toStdString();
m_llModelInfo.model->prompt(prompt.second.toStdString(), promptTemplate.toStdString(), promptFunc, nullptr, m_llModelInfo.model->prompt(prompt.second.toStdString(), promptTemplate.toStdString(), promptFunc, nullptr,
/*allowContextShift*/ true, m_ctx, false, &responseText); /*allowContextShift*/ true, m_ctx, false, response.second.toUtf8().constData());
} }
if (!m_stopGenerating) { if (!m_stopGenerating) {

View File

@ -116,7 +116,7 @@ public:
void setForceUnloadModel(bool b) { m_forceUnloadModel = b; } void setForceUnloadModel(bool b) { m_forceUnloadModel = b; }
void setMarkedForDeletion(bool b) { m_markedForDeletion = b; } void setMarkedForDeletion(bool b) { m_markedForDeletion = b; }
QString response() const; QString response(bool trim = true) const;
ModelInfo modelInfo() const; ModelInfo modelInfo() const;
void setModelInfo(const ModelInfo &info); void setModelInfo(const ModelInfo &info);
@ -198,7 +198,7 @@ Q_SIGNALS:
protected: protected:
bool promptInternal(const QList<QString> &collectionList, const QString &prompt, const QString &promptTemplate, bool promptInternal(const QList<QString> &collectionList, const QString &prompt, const QString &promptTemplate,
int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty, int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty,
int32_t repeat_penalty_tokens); int32_t repeat_penalty_tokens, std::optional<QString> fakeReply = {});
bool handlePrompt(int32_t token); bool handlePrompt(int32_t token);
bool handleResponse(int32_t token, const std::string &response); bool handleResponse(int32_t token, const std::string &response);
bool handleNamePrompt(int32_t token); bool handleNamePrompt(int32_t token);
@ -221,6 +221,7 @@ private:
bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps); bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps);
std::string m_response; std::string m_response;
std::string m_trimmedResponse;
std::string m_nameResponse; std::string m_nameResponse;
QString m_questionResponse; QString m_questionResponse;
LLModelInfo m_llModelInfo; LLModelInfo m_llModelInfo;

View File

@ -20,24 +20,25 @@ class LocalDocsCollectionsModel : public QSortFilterProxyModel
Q_OBJECT Q_OBJECT
Q_PROPERTY(int count READ count NOTIFY countChanged) Q_PROPERTY(int count READ count NOTIFY countChanged)
Q_PROPERTY(int updatingCount READ updatingCount NOTIFY updatingCountChanged) Q_PROPERTY(int updatingCount READ updatingCount NOTIFY updatingCountChanged)
public: public:
explicit LocalDocsCollectionsModel(QObject *parent); explicit LocalDocsCollectionsModel(QObject *parent);
int count() const { return rowCount(); }
int updatingCount() const;
public Q_SLOTS: public Q_SLOTS:
int count() const { return rowCount(); }
void setCollections(const QList<QString> &collections); void setCollections(const QList<QString> &collections);
int updatingCount() const;
Q_SIGNALS: Q_SIGNALS:
void countChanged(); void countChanged();
void updatingCountChanged(); void updatingCountChanged();
private Q_SLOT:
void maybeTriggerUpdatingCountChanged();
protected: protected:
bool filterAcceptsRow(int sourceRow, const QModelIndex &sourceParent) const override; bool filterAcceptsRow(int sourceRow, const QModelIndex &sourceParent) const override;
private Q_SLOTS:
void maybeTriggerUpdatingCountChanged();
private: private:
QList<QString> m_collections; QList<QString> m_collections;
int m_updatingCount = 0; int m_updatingCount = 0;

View File

@ -18,10 +18,12 @@
#include <QVector> #include <QVector>
#include <Qt> #include <Qt>
#include <QtGlobal> #include <QtGlobal>
#include <QtQml>
#include <utility>
using namespace Qt::Literals::StringLiterals; using namespace Qt::Literals::StringLiterals;
struct ModelInfo { struct ModelInfo {
Q_GADGET Q_GADGET
Q_PROPERTY(QString id READ id WRITE setId) Q_PROPERTY(QString id READ id WRITE setId)
@ -523,7 +525,7 @@ private:
protected: protected:
explicit ModelList(); explicit ModelList();
~ModelList() { for (auto *model: m_models) { delete model; } } ~ModelList() override { for (auto *model: std::as_const(m_models)) { delete model; } }
friend class MyModelList; friend class MyModelList;
}; };

View File

@ -8,6 +8,7 @@
#include <QSettings> #include <QSettings>
#include <QString> #include <QString>
#include <QStringList> #include <QStringList>
#include <QTranslator>
#include <QVector> #include <QVector>
#include <cstdint> #include <cstdint>

File diff suppressed because it is too large Load Diff

View File

@ -4,22 +4,29 @@
#include "chatllm.h" #include "chatllm.h"
#include "database.h" #include "database.h"
#include <QHttpServerRequest> #include <QHttpServer>
#include <QHttpServerResponse> #include <QHttpServerResponse>
#include <QObject> #include <QJsonObject>
#include <QList> #include <QList>
#include <QObject>
#include <QString> #include <QString>
#include <memory>
#include <optional>
#include <utility>
class Chat; class Chat;
class QHttpServer; class ChatRequest;
class CompletionRequest;
class Server : public ChatLLM class Server : public ChatLLM
{ {
Q_OBJECT Q_OBJECT
public: public:
Server(Chat *parent); explicit Server(Chat *chat);
virtual ~Server(); ~Server() override = default;
public Q_SLOTS: public Q_SLOTS:
void start(); void start();
@ -27,14 +34,17 @@ public Q_SLOTS:
Q_SIGNALS: Q_SIGNALS:
void requestServerNewPromptResponsePair(const QString &prompt); void requestServerNewPromptResponsePair(const QString &prompt);
private:
auto handleCompletionRequest(const CompletionRequest &request) -> std::pair<QHttpServerResponse, std::optional<QJsonObject>>;
auto handleChatRequest(const ChatRequest &request) -> std::pair<QHttpServerResponse, std::optional<QJsonObject>>;
private Q_SLOTS: private Q_SLOTS:
QHttpServerResponse handleCompletionRequest(const QHttpServerRequest &request, bool isChat);
void handleDatabaseResultsChanged(const QList<ResultInfo> &results) { m_databaseResults = results; } void handleDatabaseResultsChanged(const QList<ResultInfo> &results) { m_databaseResults = results; }
void handleCollectionListChanged(const QList<QString> &collectionList) { m_collections = collectionList; } void handleCollectionListChanged(const QList<QString> &collectionList) { m_collections = collectionList; }
private: private:
Chat *m_chat; Chat *m_chat;
QHttpServer *m_server; std::unique_ptr<QHttpServer> m_server;
QList<ResultInfo> m_databaseResults; QList<ResultInfo> m_databaseResults;
QList<QString> m_collections; QList<QString> m_collections;
}; };