Complete the settings for localdocs.

This commit is contained in:
Adam Treat 2023-05-23 20:26:31 -04:00 committed by AT
parent 01b8c7617f
commit d81302950e
5 changed files with 162 additions and 19 deletions

View File

@ -44,8 +44,8 @@ const auto SELECT_SQL = QLatin1String(R"(
join folders ON documents.folder_id = folders.id join folders ON documents.folder_id = folders.id
join collections ON folders.id = collections.folder_id join collections ON folders.id = collections.folder_id
where chunks_fts match ? and collections.collection_name in (%1) where chunks_fts match ? and collections.collection_name in (%1)
order by bm25(chunks_fts) desc order by bm25(chunks_fts)
limit 3; limit %2;
)"); )");
bool addChunk(QSqlQuery &q, int document_id, int chunk_id, const QString &chunk_text, int embedding_id, bool addChunk(QSqlQuery &q, int document_id, int chunk_id, const QString &chunk_text, int embedding_id,
@ -120,7 +120,7 @@ QStringList generateGrams(const QString &input, int N)
return ngrams; return ngrams;
} }
bool selectChunk(QSqlQuery &q, const QList<QString> &collection_names, const QString &chunk_text) bool selectChunk(QSqlQuery &q, const QList<QString> &collection_names, const QString &chunk_text, int retrievalSize)
{ {
const int N_WORDS = chunk_text.split(QRegularExpression("\\s+")).size(); const int N_WORDS = chunk_text.split(QRegularExpression("\\s+")).size();
for (int N = N_WORDS; N > 2; N--) { for (int N = N_WORDS; N > 2; N--) {
@ -128,7 +128,7 @@ bool selectChunk(QSqlQuery &q, const QList<QString> &collection_names, const QSt
QList<QString> text = generateGrams(chunk_text, N); QList<QString> text = generateGrams(chunk_text, N);
QString orText = text.join(" OR "); QString orText = text.join(" OR ");
const QString collection_names_str = collection_names.join("', '"); const QString collection_names_str = collection_names.join("', '");
const QString formatted_query = SELECT_SQL.arg("'" + collection_names_str + "'"); const QString formatted_query = SELECT_SQL.arg("'" + collection_names_str + "'").arg(QString::number(retrievalSize));
if (!q.prepare(formatted_query)) if (!q.prepare(formatted_query))
return false; return false;
q.addBindValue(orText); q.addBindValue(orText);
@ -480,9 +480,10 @@ QSqlError initDb()
return QSqlError(); return QSqlError();
} }
Database::Database() Database::Database(int chunkSize)
: QObject(nullptr) : QObject(nullptr)
, m_watcher(new QFileSystemWatcher(this)) , m_watcher(new QFileSystemWatcher(this))
, m_chunkSize(chunkSize)
{ {
moveToThread(&m_dbThread); moveToThread(&m_dbThread);
connect(&m_dbThread, &QThread::started, this, &Database::start); connect(&m_dbThread, &QThread::started, this, &Database::start);
@ -500,7 +501,6 @@ void Database::handleDocumentErrorAndScheduleNext(const QString &errorMessage,
void Database::chunkStream(QTextStream &stream, int document_id) void Database::chunkStream(QTextStream &stream, int document_id)
{ {
const int chunkSize = 256;
int chunk_id = 0; int chunk_id = 0;
int charCount = 0; int charCount = 0;
QList<QString> words; QList<QString> words;
@ -510,7 +510,7 @@ void Database::chunkStream(QTextStream &stream, int document_id)
stream >> word; stream >> word;
charCount += word.length(); charCount += word.length();
words.append(word); words.append(word);
if (charCount + words.size() - 1 >= chunkSize || stream.atEnd()) { if (charCount + words.size() - 1 >= m_chunkSize || stream.atEnd()) {
const QString chunk = words.join(" "); const QString chunk = words.join(" ");
QSqlQuery q; QSqlQuery q;
if (!addChunk(q, if (!addChunk(q,
@ -752,9 +752,7 @@ void Database::addFolder(const QString &collection, const QString &path)
return; return;
} }
if (!addFolderToWatch(path)) addFolderToWatch(path);
return;
scanDocuments(folder_id, path); scanDocuments(folder_id, path);
updateCollectionList(); updateCollectionList();
} }
@ -869,14 +867,14 @@ bool Database::removeFolderFromWatch(const QString &path)
return true; return true;
} }
void Database::retrieveFromDB(const QList<QString> &collections, const QString &text) void Database::retrieveFromDB(const QList<QString> &collections, const QString &text, int retrievalSize)
{ {
#if defined(DEBUG) #if defined(DEBUG)
qDebug() << "retrieveFromDB" << collections << text; qDebug() << "retrieveFromDB" << collections << text << retrievalSize;
#endif #endif
QSqlQuery q; QSqlQuery q;
if (!selectChunk(q, collections, text)) { if (!selectChunk(q, collections, text, retrievalSize)) {
qDebug() << "ERROR: selecting chunks:" << q.lastError().text(); qDebug() << "ERROR: selecting chunks:" << q.lastError().text();
return; return;
} }
@ -957,6 +955,45 @@ void Database::cleanDB()
updateCollectionList(); updateCollectionList();
} }
void Database::changeChunkSize(int chunkSize)
{
if (chunkSize == m_chunkSize)
return;
#if defined(DEBUG)
qDebug() << "changeChunkSize" << chunkSize;
#endif
m_chunkSize = chunkSize;
QSqlQuery q;
// Scan all documents in db to make sure they still exist
if (!q.prepare(SELECT_ALL_DOCUMENTS_SQL)) {
qWarning() << "ERROR: Cannot prepare sql for select all documents" << q.lastError();
return;
}
if (!q.exec()) {
qWarning() << "ERROR: Cannot exec sql for select all documents" << q.lastError();
return;
}
while (q.next()) {
int document_id = q.value(0).toInt();
QString document_path = q.value(1).toString();
// Remove all chunks and documents to change the chunk size
QSqlQuery query;
if (!removeChunksByDocumentId(query, document_id)) {
qWarning() << "ERROR: Cannot remove chunks of document_id" << document_id << query.lastError();
}
if (!removeDocument(query, document_id)) {
qWarning() << "ERROR: Cannot remove document_id" << document_id << query.lastError();
}
}
addCurrentFolders();
}
void Database::directoryChanged(const QString &path) void Database::directoryChanged(const QString &path)
{ {
#if defined(DEBUG) #if defined(DEBUG)

View File

@ -25,15 +25,16 @@ class Database : public QObject
{ {
Q_OBJECT Q_OBJECT
public: public:
Database(); Database(int chunkSize);
public Q_SLOTS: public Q_SLOTS:
void scanQueue(); void scanQueue();
void scanDocuments(int folder_id, const QString &folder_path); void scanDocuments(int folder_id, const QString &folder_path);
void addFolder(const QString &collection, const QString &path); void addFolder(const QString &collection, const QString &path);
void removeFolder(const QString &collection, const QString &path); void removeFolder(const QString &collection, const QString &path);
void retrieveFromDB(const QList<QString> &collections, const QString &text); void retrieveFromDB(const QList<QString> &collections, const QString &text, int retrievalSize);
void cleanDB(); void cleanDB();
void changeChunkSize(int chunkSize);
Q_SIGNALS: Q_SIGNALS:
void docsToScanChanged(); void docsToScanChanged();
@ -55,6 +56,7 @@ private:
int document_id, const QString &document_path, const QSqlError &error); int document_id, const QString &document_path, const QSqlError &error);
private: private:
int m_chunkSize;
QQueue<DocumentInfo> m_docsToScan; QQueue<DocumentInfo> m_docsToScan;
QList<QString> m_retrieve; QList<QString> m_retrieve;
QThread m_dbThread; QThread m_dbThread;

View File

@ -10,14 +10,24 @@ LocalDocs *LocalDocs::globalInstance()
LocalDocs::LocalDocs() LocalDocs::LocalDocs()
: QObject(nullptr) : QObject(nullptr)
, m_localDocsModel(new LocalDocsModel(this)) , m_localDocsModel(new LocalDocsModel(this))
, m_database(new Database) , m_database(nullptr)
{ {
QSettings settings;
settings.sync();
m_chunkSize = settings.value("localdocs/chunkSize", 256).toInt();
m_retrievalSize = settings.value("localdocs/retrievalSize", 3).toInt();
// Create the DB with the chunk size from settings
m_database = new Database(m_chunkSize);
connect(this, &LocalDocs::requestAddFolder, m_database, connect(this, &LocalDocs::requestAddFolder, m_database,
&Database::addFolder, Qt::QueuedConnection); &Database::addFolder, Qt::QueuedConnection);
connect(this, &LocalDocs::requestRemoveFolder, m_database, connect(this, &LocalDocs::requestRemoveFolder, m_database,
&Database::removeFolder, Qt::QueuedConnection); &Database::removeFolder, Qt::QueuedConnection);
connect(this, &LocalDocs::requestRetrieveFromDB, m_database, connect(this, &LocalDocs::requestRetrieveFromDB, m_database,
&Database::retrieveFromDB, Qt::QueuedConnection); &Database::retrieveFromDB, Qt::QueuedConnection);
connect(this, &LocalDocs::requestChunkSizeChange, m_database,
&Database::changeChunkSize, Qt::QueuedConnection);
connect(m_database, &Database::retrieveResult, this, connect(m_database, &Database::retrieveResult, this,
&LocalDocs::handleRetrieveResult, Qt::QueuedConnection); &LocalDocs::handleRetrieveResult, Qt::QueuedConnection);
connect(m_database, &Database::collectionListUpdated, connect(m_database, &Database::collectionListUpdated,
@ -42,7 +52,36 @@ void LocalDocs::removeFolder(const QString &collection, const QString &path)
void LocalDocs::requestRetrieve(const QList<QString> &collections, const QString &text) void LocalDocs::requestRetrieve(const QList<QString> &collections, const QString &text)
{ {
m_retrieveResult = QList<QString>(); m_retrieveResult = QList<QString>();
emit requestRetrieveFromDB(collections, text); emit requestRetrieveFromDB(collections, text, m_retrievalSize);
}
int LocalDocs::chunkSize() const
{
return m_chunkSize;
}
void LocalDocs::setChunkSize(int chunkSize)
{
if (m_chunkSize == chunkSize)
return;
m_chunkSize = chunkSize;
emit chunkSizeChanged();
emit requestChunkSizeChange(chunkSize);
}
int LocalDocs::retrievalSize() const
{
return m_retrievalSize;
}
void LocalDocs::setRetrievalSize(int retrievalSize)
{
if (m_retrievalSize == retrievalSize)
return;
m_retrievalSize = retrievalSize;
emit retrievalSizeChanged();
} }
void LocalDocs::handleRetrieveResult(const QList<QString> &result) void LocalDocs::handleRetrieveResult(const QList<QString> &result)

View File

@ -10,6 +10,8 @@ class LocalDocs : public QObject
{ {
Q_OBJECT Q_OBJECT
Q_PROPERTY(LocalDocsModel *localDocsModel READ localDocsModel NOTIFY localDocsModelChanged) Q_PROPERTY(LocalDocsModel *localDocsModel READ localDocsModel NOTIFY localDocsModelChanged)
Q_PROPERTY(int chunkSize READ chunkSize WRITE setChunkSize NOTIFY chunkSizeChanged)
Q_PROPERTY(int retrievalSize READ retrievalSize WRITE setRetrievalSize NOTIFY retrievalSizeChanged)
public: public:
static LocalDocs *globalInstance(); static LocalDocs *globalInstance();
@ -22,17 +24,28 @@ public:
QList<QString> result() const { return m_retrieveResult; } QList<QString> result() const { return m_retrieveResult; }
void requestRetrieve(const QList<QString> &collections, const QString &text); void requestRetrieve(const QList<QString> &collections, const QString &text);
int chunkSize() const;
void setChunkSize(int chunkSize);
int retrievalSize() const;
void setRetrievalSize(int retrievalSize);
Q_SIGNALS: Q_SIGNALS:
void requestAddFolder(const QString &collection, const QString &path); void requestAddFolder(const QString &collection, const QString &path);
void requestRemoveFolder(const QString &collection, const QString &path); void requestRemoveFolder(const QString &collection, const QString &path);
void requestRetrieveFromDB(const QList<QString> &collections, const QString &text); void requestRetrieveFromDB(const QList<QString> &collections, const QString &text, int N);
void requestChunkSizeChange(int chunkSize);
void receivedResult(); void receivedResult();
void localDocsModelChanged(); void localDocsModelChanged();
void chunkSizeChanged();
void retrievalSizeChanged();
private Q_SLOTS: private Q_SLOTS:
void handleRetrieveResult(const QList<QString> &result); void handleRetrieveResult(const QList<QString> &result);
private: private:
int m_chunkSize;
int m_retrievalSize;
LocalDocsModel *m_localDocsModel; LocalDocsModel *m_localDocsModel;
Database *m_database; Database *m_database;
QList<QString> m_retrieveResult; QList<QString> m_retrieveResult;

View File

@ -8,9 +8,31 @@ import localdocs
Item { Item {
id: root id: root
property string collection: "" property string collection: ""
property string folder_path: "" property string folder_path: ""
property int defaultChunkSize: 256
property int defaultRetrievalSize: 3
property alias chunkSize: settings.chunkSize
property alias retrievalSize: settings.retrievalSize
Settings {
id: settings
category: "localdocs"
property int chunkSize: root.defaultChunkSize
property int retrievalSize: root.defaultRetrievalSize
}
function restoreLocalDocsDefaults() {
settings.chunkSize = root.defaultChunkSize
settings.retrievalSize = root.defaultRetrievalSize
LocalDocs.chunkSize = settings.chunkSize
LocalDocs.retrievalSize = settings.retrievalSize
settings.sync()
}
FolderDialog { FolderDialog {
id: folderDialog id: folderDialog
title: "Please choose a directory" title: "Please choose a directory"
@ -188,6 +210,21 @@ Item {
Layout.column: 1 Layout.column: 1
ToolTip.text: qsTr("Number of characters per document snippet.\nNOTE: larger numbers increase likelihood of factual responses, but also result in slower generation.") ToolTip.text: qsTr("Number of characters per document snippet.\nNOTE: larger numbers increase likelihood of factual responses, but also result in slower generation.")
ToolTip.visible: hovered ToolTip.visible: hovered
text: settings.chunkSize.toString()
validator: IntValidator {
bottom: 1
}
onEditingFinished: {
var val = parseInt(text)
if (!isNaN(val)) {
settings.chunkSize = val
settings.sync()
focus = false
LocalDocs.chunkSize = settings.chunkSize
} else {
text = settings.chunkSize.toString()
}
}
} }
Label { Label {
@ -203,6 +240,21 @@ Item {
Layout.column: 1 Layout.column: 1
ToolTip.text: qsTr("Best N matches of retrieved document snippets to add to the context for prompt.\nNOTE: larger numbers increase likelihood of factual responses, but also result in slower generation.") ToolTip.text: qsTr("Best N matches of retrieved document snippets to add to the context for prompt.\nNOTE: larger numbers increase likelihood of factual responses, but also result in slower generation.")
ToolTip.visible: hovered ToolTip.visible: hovered
text: settings.retrievalSize.toString()
validator: IntValidator {
bottom: 1
}
onEditingFinished: {
var val = parseInt(text)
if (!isNaN(val)) {
settings.retrievalSize = val
settings.sync()
focus = false
LocalDocs.retrievalSize = settings.retrievalSize
} else {
text = settings.retrievalSize.toString()
}
}
} }
MyButton { MyButton {
@ -215,7 +267,7 @@ Item {
Accessible.name: text Accessible.name: text
Accessible.description: qsTr("Restores the settings dialog to a default state") Accessible.description: qsTr("Restores the settings dialog to a default state")
onClicked: { onClicked: {
// settingsDialog.restoreGenerationDefaults() root.restoreLocalDocsDefaults();
} }
} }
} }