mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2024-10-01 01:06:10 -04:00
deploying new version with streaming
This commit is contained in:
parent
bce2b3025b
commit
057b9f51bc
@ -170,25 +170,25 @@ workflows:
|
||||
branches:
|
||||
only:
|
||||
- main
|
||||
# build-py-deploy:
|
||||
# jobs:
|
||||
# - build-py-linux:
|
||||
# filters:
|
||||
# branches:
|
||||
# only:
|
||||
# - build-py-macos:
|
||||
# filters:
|
||||
# branches:
|
||||
# only:
|
||||
# - build-py-windows:
|
||||
# filters:
|
||||
# branches:
|
||||
# only:
|
||||
# - store-and-upload-wheels:
|
||||
# filters:
|
||||
# branches:
|
||||
# only:
|
||||
# requires:
|
||||
# - build-py-windows
|
||||
# - build-py-linux
|
||||
# - build-py-macos
|
||||
build-py-deploy:
|
||||
jobs:
|
||||
- build-py-linux:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- build-py-macos:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- build-py-windows:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- store-and-upload-wheels:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
requires:
|
||||
- build-py-windows
|
||||
- build-py-linux
|
||||
- build-py-macos
|
||||
|
@ -155,24 +155,26 @@ class GPT4All():
|
||||
print("Model downloaded at: " + download_path)
|
||||
return download_path
|
||||
|
||||
def generate(self, prompt: str, **generate_kwargs) -> str:
|
||||
def generate(self, prompt: str, streaming: bool = False, **generate_kwargs) -> str:
|
||||
"""
|
||||
Surfaced method of running generate without accessing model object.
|
||||
|
||||
Args:
|
||||
prompt: Raw string to be passed to model.
|
||||
streaming: True if want output streamed to stdout.
|
||||
**generate_kwargs: Optional kwargs to pass to prompt context.
|
||||
|
||||
Returns:
|
||||
Raw string of generated model response.
|
||||
"""
|
||||
return self.model.generate(prompt, **generate_kwargs)
|
||||
return self.model.generate(prompt, streaming=streaming, **generate_kwargs)
|
||||
|
||||
def chat_completion(self,
|
||||
messages: List[Dict],
|
||||
default_prompt_header: bool = True,
|
||||
default_prompt_footer: bool = True,
|
||||
verbose: bool = True,
|
||||
streaming: bool = True,
|
||||
**generate_kwargs) -> str:
|
||||
"""
|
||||
Format list of message dictionaries into a prompt and call model
|
||||
@ -189,6 +191,7 @@ class GPT4All():
|
||||
before user/assistant role messages.
|
||||
default_prompt_footer: If True (default), add default footer at end of prompt.
|
||||
verbose: If True (default), print full prompt and generated response.
|
||||
streaming: True if want output streamed to stdout.
|
||||
**generate_kwargs: Optional kwargs to pass to prompt context.
|
||||
|
||||
Returns:
|
||||
@ -206,7 +209,7 @@ class GPT4All():
|
||||
if verbose:
|
||||
print(full_prompt)
|
||||
|
||||
response = self.model.generate(full_prompt, **generate_kwargs)
|
||||
response = self.model.generate(full_prompt, streaming=streaming, **generate_kwargs)
|
||||
|
||||
if verbose:
|
||||
print(response)
|
||||
|
@ -1,25 +1,23 @@
|
||||
from io import StringIO
|
||||
import pkg_resources
|
||||
import ctypes
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
class DualOutput:
|
||||
def __init__(self, stdout, string_io):
|
||||
self.stdout = stdout
|
||||
self.string_io = string_io
|
||||
class DualStreamProcessor:
|
||||
def __init__(self, stream=None):
|
||||
self.stream = stream
|
||||
self.output = ""
|
||||
|
||||
def write(self, text):
|
||||
self.stdout.write(text)
|
||||
self.string_io.write(text)
|
||||
cleaned_text = re.sub(r"\n(?!\n)", "", text)
|
||||
if self.stream is not None:
|
||||
self.stream.write(cleaned_text)
|
||||
self.stream.flush()
|
||||
self.output += cleaned_text
|
||||
|
||||
def flush(self):
|
||||
# It's a good idea to also define a flush method that flushes both
|
||||
# outputs, as sys.stdout is expected to have this method.
|
||||
self.stdout.flush()
|
||||
self.string_io.flush()
|
||||
|
||||
# TODO: provide a config file to make this more robust
|
||||
LLMODEL_PATH = os.path.join("llmodel_DO_NOT_MODIFY", "build").replace("\\", "\\\\")
|
||||
@ -175,7 +173,7 @@ class LLModel:
|
||||
repeat_penalty: float = 1.2,
|
||||
repeat_last_n: int = 10,
|
||||
context_erase: float = .5,
|
||||
std_passthrough: bool = False) -> str:
|
||||
streaming: bool = False) -> str:
|
||||
"""
|
||||
Generate response from model from a prompt.
|
||||
|
||||
@ -183,12 +181,8 @@ class LLModel:
|
||||
----------
|
||||
prompt: str
|
||||
Question, task, or conversation for model to respond to
|
||||
add_default_header: bool, optional
|
||||
Whether to add a prompt header (default is True)
|
||||
add_default_footer: bool, optional
|
||||
Whether to add a prompt footer (default is True)
|
||||
verbose: bool, optional
|
||||
Whether to print prompt and response
|
||||
streaming: bool
|
||||
Stream response to stdout
|
||||
|
||||
Returns
|
||||
-------
|
||||
@ -198,13 +192,14 @@ class LLModel:
|
||||
prompt = prompt.encode('utf-8')
|
||||
prompt = ctypes.c_char_p(prompt)
|
||||
|
||||
# Change stdout to StringIO so we can collect response
|
||||
old_stdout = sys.stdout
|
||||
collect_response = StringIO()
|
||||
if std_passthrough:
|
||||
sys.stdout = DualOutput(old_stdout, collect_response)
|
||||
else:
|
||||
sys.stdout = collect_response
|
||||
|
||||
stream_processor = DualStreamProcessor()
|
||||
|
||||
if streaming:
|
||||
stream_processor.stream = sys.stdout
|
||||
|
||||
sys.stdout = stream_processor
|
||||
|
||||
context = LLModelPromptContext(
|
||||
logits_size=logits_size,
|
||||
@ -228,13 +223,10 @@ class LLModel:
|
||||
RecalculateCallback(self._recalculate_callback),
|
||||
context)
|
||||
|
||||
response = collect_response.getvalue()
|
||||
# Revert to old stdout
|
||||
sys.stdout = old_stdout
|
||||
|
||||
# Remove the unnecessary new lines from response
|
||||
response = re.sub(r"\n(?!\n)", "", response).strip()
|
||||
|
||||
return response
|
||||
return stream_processor.output
|
||||
|
||||
# Empty prompt callback
|
||||
@staticmethod
|
||||
|
@ -78,6 +78,8 @@ setup(
|
||||
'dev': [
|
||||
'pytest',
|
||||
'twine',
|
||||
'wheel',
|
||||
'setuptools',
|
||||
'mkdocs-material',
|
||||
'mkautodoc',
|
||||
'mkdocstrings[python]',
|
||||
|
Loading…
Reference in New Issue
Block a user