mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2024-10-01 01:06:10 -04:00
Complete the settings for localdocs.
This commit is contained in:
parent
01b8c7617f
commit
d81302950e
@ -44,8 +44,8 @@ const auto SELECT_SQL = QLatin1String(R"(
|
|||||||
join folders ON documents.folder_id = folders.id
|
join folders ON documents.folder_id = folders.id
|
||||||
join collections ON folders.id = collections.folder_id
|
join collections ON folders.id = collections.folder_id
|
||||||
where chunks_fts match ? and collections.collection_name in (%1)
|
where chunks_fts match ? and collections.collection_name in (%1)
|
||||||
order by bm25(chunks_fts) desc
|
order by bm25(chunks_fts)
|
||||||
limit 3;
|
limit %2;
|
||||||
)");
|
)");
|
||||||
|
|
||||||
bool addChunk(QSqlQuery &q, int document_id, int chunk_id, const QString &chunk_text, int embedding_id,
|
bool addChunk(QSqlQuery &q, int document_id, int chunk_id, const QString &chunk_text, int embedding_id,
|
||||||
@ -120,7 +120,7 @@ QStringList generateGrams(const QString &input, int N)
|
|||||||
return ngrams;
|
return ngrams;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool selectChunk(QSqlQuery &q, const QList<QString> &collection_names, const QString &chunk_text)
|
bool selectChunk(QSqlQuery &q, const QList<QString> &collection_names, const QString &chunk_text, int retrievalSize)
|
||||||
{
|
{
|
||||||
const int N_WORDS = chunk_text.split(QRegularExpression("\\s+")).size();
|
const int N_WORDS = chunk_text.split(QRegularExpression("\\s+")).size();
|
||||||
for (int N = N_WORDS; N > 2; N--) {
|
for (int N = N_WORDS; N > 2; N--) {
|
||||||
@ -128,7 +128,7 @@ bool selectChunk(QSqlQuery &q, const QList<QString> &collection_names, const QSt
|
|||||||
QList<QString> text = generateGrams(chunk_text, N);
|
QList<QString> text = generateGrams(chunk_text, N);
|
||||||
QString orText = text.join(" OR ");
|
QString orText = text.join(" OR ");
|
||||||
const QString collection_names_str = collection_names.join("', '");
|
const QString collection_names_str = collection_names.join("', '");
|
||||||
const QString formatted_query = SELECT_SQL.arg("'" + collection_names_str + "'");
|
const QString formatted_query = SELECT_SQL.arg("'" + collection_names_str + "'").arg(QString::number(retrievalSize));
|
||||||
if (!q.prepare(formatted_query))
|
if (!q.prepare(formatted_query))
|
||||||
return false;
|
return false;
|
||||||
q.addBindValue(orText);
|
q.addBindValue(orText);
|
||||||
@ -480,9 +480,10 @@ QSqlError initDb()
|
|||||||
return QSqlError();
|
return QSqlError();
|
||||||
}
|
}
|
||||||
|
|
||||||
Database::Database()
|
Database::Database(int chunkSize)
|
||||||
: QObject(nullptr)
|
: QObject(nullptr)
|
||||||
, m_watcher(new QFileSystemWatcher(this))
|
, m_watcher(new QFileSystemWatcher(this))
|
||||||
|
, m_chunkSize(chunkSize)
|
||||||
{
|
{
|
||||||
moveToThread(&m_dbThread);
|
moveToThread(&m_dbThread);
|
||||||
connect(&m_dbThread, &QThread::started, this, &Database::start);
|
connect(&m_dbThread, &QThread::started, this, &Database::start);
|
||||||
@ -500,7 +501,6 @@ void Database::handleDocumentErrorAndScheduleNext(const QString &errorMessage,
|
|||||||
|
|
||||||
void Database::chunkStream(QTextStream &stream, int document_id)
|
void Database::chunkStream(QTextStream &stream, int document_id)
|
||||||
{
|
{
|
||||||
const int chunkSize = 256;
|
|
||||||
int chunk_id = 0;
|
int chunk_id = 0;
|
||||||
int charCount = 0;
|
int charCount = 0;
|
||||||
QList<QString> words;
|
QList<QString> words;
|
||||||
@ -510,7 +510,7 @@ void Database::chunkStream(QTextStream &stream, int document_id)
|
|||||||
stream >> word;
|
stream >> word;
|
||||||
charCount += word.length();
|
charCount += word.length();
|
||||||
words.append(word);
|
words.append(word);
|
||||||
if (charCount + words.size() - 1 >= chunkSize || stream.atEnd()) {
|
if (charCount + words.size() - 1 >= m_chunkSize || stream.atEnd()) {
|
||||||
const QString chunk = words.join(" ");
|
const QString chunk = words.join(" ");
|
||||||
QSqlQuery q;
|
QSqlQuery q;
|
||||||
if (!addChunk(q,
|
if (!addChunk(q,
|
||||||
@ -752,9 +752,7 @@ void Database::addFolder(const QString &collection, const QString &path)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!addFolderToWatch(path))
|
addFolderToWatch(path);
|
||||||
return;
|
|
||||||
|
|
||||||
scanDocuments(folder_id, path);
|
scanDocuments(folder_id, path);
|
||||||
updateCollectionList();
|
updateCollectionList();
|
||||||
}
|
}
|
||||||
@ -869,14 +867,14 @@ bool Database::removeFolderFromWatch(const QString &path)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Database::retrieveFromDB(const QList<QString> &collections, const QString &text)
|
void Database::retrieveFromDB(const QList<QString> &collections, const QString &text, int retrievalSize)
|
||||||
{
|
{
|
||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
qDebug() << "retrieveFromDB" << collections << text;
|
qDebug() << "retrieveFromDB" << collections << text << retrievalSize;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
QSqlQuery q;
|
QSqlQuery q;
|
||||||
if (!selectChunk(q, collections, text)) {
|
if (!selectChunk(q, collections, text, retrievalSize)) {
|
||||||
qDebug() << "ERROR: selecting chunks:" << q.lastError().text();
|
qDebug() << "ERROR: selecting chunks:" << q.lastError().text();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -957,6 +955,45 @@ void Database::cleanDB()
|
|||||||
updateCollectionList();
|
updateCollectionList();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Database::changeChunkSize(int chunkSize)
|
||||||
|
{
|
||||||
|
if (chunkSize == m_chunkSize)
|
||||||
|
return;
|
||||||
|
|
||||||
|
#if defined(DEBUG)
|
||||||
|
qDebug() << "changeChunkSize" << chunkSize;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
m_chunkSize = chunkSize;
|
||||||
|
|
||||||
|
QSqlQuery q;
|
||||||
|
// Scan all documents in db to make sure they still exist
|
||||||
|
if (!q.prepare(SELECT_ALL_DOCUMENTS_SQL)) {
|
||||||
|
qWarning() << "ERROR: Cannot prepare sql for select all documents" << q.lastError();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!q.exec()) {
|
||||||
|
qWarning() << "ERROR: Cannot exec sql for select all documents" << q.lastError();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (q.next()) {
|
||||||
|
int document_id = q.value(0).toInt();
|
||||||
|
QString document_path = q.value(1).toString();
|
||||||
|
// Remove all chunks and documents to change the chunk size
|
||||||
|
QSqlQuery query;
|
||||||
|
if (!removeChunksByDocumentId(query, document_id)) {
|
||||||
|
qWarning() << "ERROR: Cannot remove chunks of document_id" << document_id << query.lastError();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!removeDocument(query, document_id)) {
|
||||||
|
qWarning() << "ERROR: Cannot remove document_id" << document_id << query.lastError();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
addCurrentFolders();
|
||||||
|
}
|
||||||
|
|
||||||
void Database::directoryChanged(const QString &path)
|
void Database::directoryChanged(const QString &path)
|
||||||
{
|
{
|
||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
|
@ -25,15 +25,16 @@ class Database : public QObject
|
|||||||
{
|
{
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
public:
|
public:
|
||||||
Database();
|
Database(int chunkSize);
|
||||||
|
|
||||||
public Q_SLOTS:
|
public Q_SLOTS:
|
||||||
void scanQueue();
|
void scanQueue();
|
||||||
void scanDocuments(int folder_id, const QString &folder_path);
|
void scanDocuments(int folder_id, const QString &folder_path);
|
||||||
void addFolder(const QString &collection, const QString &path);
|
void addFolder(const QString &collection, const QString &path);
|
||||||
void removeFolder(const QString &collection, const QString &path);
|
void removeFolder(const QString &collection, const QString &path);
|
||||||
void retrieveFromDB(const QList<QString> &collections, const QString &text);
|
void retrieveFromDB(const QList<QString> &collections, const QString &text, int retrievalSize);
|
||||||
void cleanDB();
|
void cleanDB();
|
||||||
|
void changeChunkSize(int chunkSize);
|
||||||
|
|
||||||
Q_SIGNALS:
|
Q_SIGNALS:
|
||||||
void docsToScanChanged();
|
void docsToScanChanged();
|
||||||
@ -55,6 +56,7 @@ private:
|
|||||||
int document_id, const QString &document_path, const QSqlError &error);
|
int document_id, const QString &document_path, const QSqlError &error);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
int m_chunkSize;
|
||||||
QQueue<DocumentInfo> m_docsToScan;
|
QQueue<DocumentInfo> m_docsToScan;
|
||||||
QList<QString> m_retrieve;
|
QList<QString> m_retrieve;
|
||||||
QThread m_dbThread;
|
QThread m_dbThread;
|
||||||
|
@ -10,14 +10,24 @@ LocalDocs *LocalDocs::globalInstance()
|
|||||||
LocalDocs::LocalDocs()
|
LocalDocs::LocalDocs()
|
||||||
: QObject(nullptr)
|
: QObject(nullptr)
|
||||||
, m_localDocsModel(new LocalDocsModel(this))
|
, m_localDocsModel(new LocalDocsModel(this))
|
||||||
, m_database(new Database)
|
, m_database(nullptr)
|
||||||
{
|
{
|
||||||
|
QSettings settings;
|
||||||
|
settings.sync();
|
||||||
|
m_chunkSize = settings.value("localdocs/chunkSize", 256).toInt();
|
||||||
|
m_retrievalSize = settings.value("localdocs/retrievalSize", 3).toInt();
|
||||||
|
|
||||||
|
// Create the DB with the chunk size from settings
|
||||||
|
m_database = new Database(m_chunkSize);
|
||||||
|
|
||||||
connect(this, &LocalDocs::requestAddFolder, m_database,
|
connect(this, &LocalDocs::requestAddFolder, m_database,
|
||||||
&Database::addFolder, Qt::QueuedConnection);
|
&Database::addFolder, Qt::QueuedConnection);
|
||||||
connect(this, &LocalDocs::requestRemoveFolder, m_database,
|
connect(this, &LocalDocs::requestRemoveFolder, m_database,
|
||||||
&Database::removeFolder, Qt::QueuedConnection);
|
&Database::removeFolder, Qt::QueuedConnection);
|
||||||
connect(this, &LocalDocs::requestRetrieveFromDB, m_database,
|
connect(this, &LocalDocs::requestRetrieveFromDB, m_database,
|
||||||
&Database::retrieveFromDB, Qt::QueuedConnection);
|
&Database::retrieveFromDB, Qt::QueuedConnection);
|
||||||
|
connect(this, &LocalDocs::requestChunkSizeChange, m_database,
|
||||||
|
&Database::changeChunkSize, Qt::QueuedConnection);
|
||||||
connect(m_database, &Database::retrieveResult, this,
|
connect(m_database, &Database::retrieveResult, this,
|
||||||
&LocalDocs::handleRetrieveResult, Qt::QueuedConnection);
|
&LocalDocs::handleRetrieveResult, Qt::QueuedConnection);
|
||||||
connect(m_database, &Database::collectionListUpdated,
|
connect(m_database, &Database::collectionListUpdated,
|
||||||
@ -42,7 +52,36 @@ void LocalDocs::removeFolder(const QString &collection, const QString &path)
|
|||||||
void LocalDocs::requestRetrieve(const QList<QString> &collections, const QString &text)
|
void LocalDocs::requestRetrieve(const QList<QString> &collections, const QString &text)
|
||||||
{
|
{
|
||||||
m_retrieveResult = QList<QString>();
|
m_retrieveResult = QList<QString>();
|
||||||
emit requestRetrieveFromDB(collections, text);
|
emit requestRetrieveFromDB(collections, text, m_retrievalSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
int LocalDocs::chunkSize() const
|
||||||
|
{
|
||||||
|
return m_chunkSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LocalDocs::setChunkSize(int chunkSize)
|
||||||
|
{
|
||||||
|
if (m_chunkSize == chunkSize)
|
||||||
|
return;
|
||||||
|
|
||||||
|
m_chunkSize = chunkSize;
|
||||||
|
emit chunkSizeChanged();
|
||||||
|
emit requestChunkSizeChange(chunkSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
int LocalDocs::retrievalSize() const
|
||||||
|
{
|
||||||
|
return m_retrievalSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LocalDocs::setRetrievalSize(int retrievalSize)
|
||||||
|
{
|
||||||
|
if (m_retrievalSize == retrievalSize)
|
||||||
|
return;
|
||||||
|
|
||||||
|
m_retrievalSize = retrievalSize;
|
||||||
|
emit retrievalSizeChanged();
|
||||||
}
|
}
|
||||||
|
|
||||||
void LocalDocs::handleRetrieveResult(const QList<QString> &result)
|
void LocalDocs::handleRetrieveResult(const QList<QString> &result)
|
||||||
|
@ -10,6 +10,8 @@ class LocalDocs : public QObject
|
|||||||
{
|
{
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
Q_PROPERTY(LocalDocsModel *localDocsModel READ localDocsModel NOTIFY localDocsModelChanged)
|
Q_PROPERTY(LocalDocsModel *localDocsModel READ localDocsModel NOTIFY localDocsModelChanged)
|
||||||
|
Q_PROPERTY(int chunkSize READ chunkSize WRITE setChunkSize NOTIFY chunkSizeChanged)
|
||||||
|
Q_PROPERTY(int retrievalSize READ retrievalSize WRITE setRetrievalSize NOTIFY retrievalSizeChanged)
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static LocalDocs *globalInstance();
|
static LocalDocs *globalInstance();
|
||||||
@ -22,17 +24,28 @@ public:
|
|||||||
QList<QString> result() const { return m_retrieveResult; }
|
QList<QString> result() const { return m_retrieveResult; }
|
||||||
void requestRetrieve(const QList<QString> &collections, const QString &text);
|
void requestRetrieve(const QList<QString> &collections, const QString &text);
|
||||||
|
|
||||||
|
int chunkSize() const;
|
||||||
|
void setChunkSize(int chunkSize);
|
||||||
|
|
||||||
|
int retrievalSize() const;
|
||||||
|
void setRetrievalSize(int retrievalSize);
|
||||||
|
|
||||||
Q_SIGNALS:
|
Q_SIGNALS:
|
||||||
void requestAddFolder(const QString &collection, const QString &path);
|
void requestAddFolder(const QString &collection, const QString &path);
|
||||||
void requestRemoveFolder(const QString &collection, const QString &path);
|
void requestRemoveFolder(const QString &collection, const QString &path);
|
||||||
void requestRetrieveFromDB(const QList<QString> &collections, const QString &text);
|
void requestRetrieveFromDB(const QList<QString> &collections, const QString &text, int N);
|
||||||
|
void requestChunkSizeChange(int chunkSize);
|
||||||
void receivedResult();
|
void receivedResult();
|
||||||
void localDocsModelChanged();
|
void localDocsModelChanged();
|
||||||
|
void chunkSizeChanged();
|
||||||
|
void retrievalSizeChanged();
|
||||||
|
|
||||||
private Q_SLOTS:
|
private Q_SLOTS:
|
||||||
void handleRetrieveResult(const QList<QString> &result);
|
void handleRetrieveResult(const QList<QString> &result);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
int m_chunkSize;
|
||||||
|
int m_retrievalSize;
|
||||||
LocalDocsModel *m_localDocsModel;
|
LocalDocsModel *m_localDocsModel;
|
||||||
Database *m_database;
|
Database *m_database;
|
||||||
QList<QString> m_retrieveResult;
|
QList<QString> m_retrieveResult;
|
||||||
|
@ -8,9 +8,31 @@ import localdocs
|
|||||||
|
|
||||||
Item {
|
Item {
|
||||||
id: root
|
id: root
|
||||||
|
|
||||||
property string collection: ""
|
property string collection: ""
|
||||||
property string folder_path: ""
|
property string folder_path: ""
|
||||||
|
|
||||||
|
property int defaultChunkSize: 256
|
||||||
|
property int defaultRetrievalSize: 3
|
||||||
|
|
||||||
|
property alias chunkSize: settings.chunkSize
|
||||||
|
property alias retrievalSize: settings.retrievalSize
|
||||||
|
|
||||||
|
Settings {
|
||||||
|
id: settings
|
||||||
|
category: "localdocs"
|
||||||
|
property int chunkSize: root.defaultChunkSize
|
||||||
|
property int retrievalSize: root.defaultRetrievalSize
|
||||||
|
}
|
||||||
|
|
||||||
|
function restoreLocalDocsDefaults() {
|
||||||
|
settings.chunkSize = root.defaultChunkSize
|
||||||
|
settings.retrievalSize = root.defaultRetrievalSize
|
||||||
|
LocalDocs.chunkSize = settings.chunkSize
|
||||||
|
LocalDocs.retrievalSize = settings.retrievalSize
|
||||||
|
settings.sync()
|
||||||
|
}
|
||||||
|
|
||||||
FolderDialog {
|
FolderDialog {
|
||||||
id: folderDialog
|
id: folderDialog
|
||||||
title: "Please choose a directory"
|
title: "Please choose a directory"
|
||||||
@ -188,6 +210,21 @@ Item {
|
|||||||
Layout.column: 1
|
Layout.column: 1
|
||||||
ToolTip.text: qsTr("Number of characters per document snippet.\nNOTE: larger numbers increase likelihood of factual responses, but also result in slower generation.")
|
ToolTip.text: qsTr("Number of characters per document snippet.\nNOTE: larger numbers increase likelihood of factual responses, but also result in slower generation.")
|
||||||
ToolTip.visible: hovered
|
ToolTip.visible: hovered
|
||||||
|
text: settings.chunkSize.toString()
|
||||||
|
validator: IntValidator {
|
||||||
|
bottom: 1
|
||||||
|
}
|
||||||
|
onEditingFinished: {
|
||||||
|
var val = parseInt(text)
|
||||||
|
if (!isNaN(val)) {
|
||||||
|
settings.chunkSize = val
|
||||||
|
settings.sync()
|
||||||
|
focus = false
|
||||||
|
LocalDocs.chunkSize = settings.chunkSize
|
||||||
|
} else {
|
||||||
|
text = settings.chunkSize.toString()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Label {
|
Label {
|
||||||
@ -203,6 +240,21 @@ Item {
|
|||||||
Layout.column: 1
|
Layout.column: 1
|
||||||
ToolTip.text: qsTr("Best N matches of retrieved document snippets to add to the context for prompt.\nNOTE: larger numbers increase likelihood of factual responses, but also result in slower generation.")
|
ToolTip.text: qsTr("Best N matches of retrieved document snippets to add to the context for prompt.\nNOTE: larger numbers increase likelihood of factual responses, but also result in slower generation.")
|
||||||
ToolTip.visible: hovered
|
ToolTip.visible: hovered
|
||||||
|
text: settings.retrievalSize.toString()
|
||||||
|
validator: IntValidator {
|
||||||
|
bottom: 1
|
||||||
|
}
|
||||||
|
onEditingFinished: {
|
||||||
|
var val = parseInt(text)
|
||||||
|
if (!isNaN(val)) {
|
||||||
|
settings.retrievalSize = val
|
||||||
|
settings.sync()
|
||||||
|
focus = false
|
||||||
|
LocalDocs.retrievalSize = settings.retrievalSize
|
||||||
|
} else {
|
||||||
|
text = settings.retrievalSize.toString()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MyButton {
|
MyButton {
|
||||||
@ -215,7 +267,7 @@ Item {
|
|||||||
Accessible.name: text
|
Accessible.name: text
|
||||||
Accessible.description: qsTr("Restores the settings dialog to a default state")
|
Accessible.description: qsTr("Restores the settings dialog to a default state")
|
||||||
onClicked: {
|
onClicked: {
|
||||||
// settingsDialog.restoreGenerationDefaults()
|
root.restoreLocalDocsDefaults();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user