database: rename DocumentInfo 'doc' to 'file'

We already use 'doc' to refer to a parsed representation of the
document, not the file info.

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel 2024-09-23 18:29:48 -04:00
parent adef7aa95d
commit 82c1368c86
2 changed files with 10 additions and 10 deletions

View File

@ -1271,15 +1271,15 @@ void Database::scanQueue()
const int folder_id = info.folder; const int folder_id = info.folder;
// Update info // Update info
info.doc.stat(); info.file.stat();
// If the doc has since been deleted or no longer readable, then we schedule more work and return // If the doc has since been deleted or no longer readable, then we schedule more work and return
// leaving the cleanup for the cleanup handler // leaving the cleanup for the cleanup handler
if (!info.doc.exists() || !info.doc.isReadable()) if (!info.file.exists() || !info.file.isReadable())
return updateFolderToIndex(folder_id, countForFolder); return updateFolderToIndex(folder_id, countForFolder);
const qint64 document_time = info.doc.fileTime(QFile::FileModificationTime).toMSecsSinceEpoch(); const qint64 document_time = info.file.fileTime(QFile::FileModificationTime).toMSecsSinceEpoch();
const QString document_path = info.doc.canonicalFilePath(); const QString document_path = info.file.canonicalFilePath();
const bool currentlyProcessing = info.currentlyProcessing; const bool currentlyProcessing = info.currentlyProcessing;
// Check and see if we already have this document // Check and see if we already have this document
@ -1342,12 +1342,12 @@ void Database::scanQueue()
Q_ASSERT(document_id != -1); Q_ASSERT(document_id != -1);
if (info.isPdf()) { if (info.isPdf()) {
QPdfDocument doc; QPdfDocument doc;
if (QPdfDocument::Error::None != doc.load(info.doc.canonicalFilePath())) { if (QPdfDocument::Error::None != doc.load(info.file.canonicalFilePath())) {
handleDocumentError("ERROR: Could not load pdf", handleDocumentError("ERROR: Could not load pdf",
document_id, document_path, q.lastError()); document_id, document_path, q.lastError());
return updateFolderToIndex(folder_id, countForFolder); return updateFolderToIndex(folder_id, countForFolder);
} }
const size_t bytes = info.doc.size(); const size_t bytes = info.file.size();
const size_t bytesPerPage = std::floor(bytes / doc.pageCount()); const size_t bytesPerPage = std::floor(bytes / doc.pageCount());
const int pageIndex = info.currentPage; const int pageIndex = info.currentPage;
#if defined(DEBUG) #if defined(DEBUG)
@ -1356,7 +1356,7 @@ void Database::scanQueue()
const QPdfSelection selection = doc.getAllText(pageIndex); const QPdfSelection selection = doc.getAllText(pageIndex);
QString text = selection.text(); QString text = selection.text();
QTextStream stream(&text); QTextStream stream(&text);
chunkStream(stream, info.folder, document_id, embedding_model, info.doc.fileName(), chunkStream(stream, info.folder, document_id, embedding_model, info.file.fileName(),
doc.metaData(QPdfDocument::MetaDataField::Title).toString(), doc.metaData(QPdfDocument::MetaDataField::Title).toString(),
doc.metaData(QPdfDocument::MetaDataField::Author).toString(), doc.metaData(QPdfDocument::MetaDataField::Author).toString(),
doc.metaData(QPdfDocument::MetaDataField::Subject).toString(), doc.metaData(QPdfDocument::MetaDataField::Subject).toString(),
@ -1384,7 +1384,7 @@ void Database::scanQueue()
} }
Q_ASSERT(!file.isSequential()); // we need to seek Q_ASSERT(!file.isSequential()); // we need to seek
const size_t bytes = info.doc.size(); const size_t bytes = info.file.size();
QTextStream stream(&file); QTextStream stream(&file);
const size_t byteIndex = info.currentPosition; const size_t byteIndex = info.currentPosition;
if (byteIndex) { if (byteIndex) {
@ -1401,7 +1401,7 @@ void Database::scanQueue()
#if defined(DEBUG) #if defined(DEBUG)
qDebug() << "scanning byteIndex" << byteIndex << "of" << bytes << document_path; qDebug() << "scanning byteIndex" << byteIndex << "of" << bytes << document_path;
#endif #endif
int pos = chunkStream(stream, info.folder, document_id, embedding_model, info.doc.fileName(), int pos = chunkStream(stream, info.folder, document_id, embedding_model, info.file.fileName(),
QString() /*title*/, QString() /*author*/, QString() /*subject*/, QString() /*keywords*/, -1 /*page*/, QString() /*title*/, QString() /*author*/, QString() /*subject*/, QString() /*keywords*/, -1 /*page*/,
100 /*maxChunks*/); 100 /*maxChunks*/);
if (pos < 0) { if (pos < 0) {

View File

@ -41,7 +41,7 @@ static const int LOCALDOCS_VERSION = 2;
struct DocumentInfo struct DocumentInfo
{ {
int folder; int folder;
QFileInfo doc; QFileInfo file;
int currentPage = 0; int currentPage = 0;
size_t currentPosition = 0; size_t currentPosition = 0;
bool currentlyProcessing = false; bool currentlyProcessing = false;