fixed update of DirHash list and FileHash list when files and dirs are removed, while keeping the cost low

This commit is contained in:
csoler 2016-09-27 23:13:59 +02:00
parent 5547d71ac7
commit bf0777fd9e
2 changed files with 62 additions and 32 deletions

View File

@ -61,8 +61,6 @@ InternalFileHierarchyStorage::InternalFileHierarchyStorage() : mRoot(0)
de->dir_hash=RsFileHash() ; // null hash is root by convention.
mNodes.push_back(de) ;
#warning not very elegant. We should remove the leading /
mDirHashes[de->dir_hash] = 0 ;
}
@ -75,24 +73,43 @@ bool InternalFileHierarchyStorage::getDirHashFromIndex(const DirectoryStorage::E
return true;
}
bool InternalFileHierarchyStorage::getIndexFromDirHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) const
bool InternalFileHierarchyStorage::getIndexFromDirHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index)
{
std::map<RsFileHash,DirectoryStorage::EntryIndex>::const_iterator it = mDirHashes.find(hash) ;
std::map<RsFileHash,DirectoryStorage::EntryIndex>::iterator it = mDirHashes.find(hash) ;
if(it == mDirHashes.end())
return false;
index = it->second;
// make sure the hash actually points to some existing file. If not, remove it. This is a lazy update of dir hashes: when we need them, we check them.
if(!checkIndex(index, FileStorageNode::TYPE_DIR) || static_cast<DirEntry*>(mNodes[index])->dir_hash != hash)
{
std::cerr << "(II) removing non existing hash from dir hash list: " << hash << std::endl;
mDirHashes.erase(it) ;
return false ;
}
return true;
}
bool InternalFileHierarchyStorage::getIndexFromFileHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) const
bool InternalFileHierarchyStorage::getIndexFromFileHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index)
{
std::map<RsFileHash,DirectoryStorage::EntryIndex>::const_iterator it = mFileHashes.find(hash) ;
std::map<RsFileHash,DirectoryStorage::EntryIndex>::iterator it = mFileHashes.find(hash) ;
if(it == mFileHashes.end())
return false;
index = it->second;
// make sure the hash actually points to some existing file. If not, remove it. This is a lazy update of file hashes: when we need them, we check them.
if(!checkIndex(it->second, FileStorageNode::TYPE_FILE) || static_cast<FileEntry*>(mNodes[index])->file_hash != hash)
{
std::cerr << "(II) removing non existing hash from file hash list: " << hash << std::endl;
mFileHashes.erase(it) ;
return false ;
}
return true;
}
@ -226,7 +243,7 @@ bool InternalFileHierarchyStorage::removeDirectory(DirectoryStorage::EntryIndex
parent_dir.subdirs[i] = parent_dir.subdirs.back() ;
parent_dir.subdirs.pop_back();
recursRemoveDirectory(indx) ;
recursRemoveDirectory(indx) ;
#ifdef DEBUG_DIRECTORY_STORAGE
print();
std::string err ;
@ -633,12 +650,12 @@ DirectoryStorage::EntryIndex InternalFileHierarchyStorage::getSubDirIndex(Direct
bool InternalFileHierarchyStorage::searchHash(const RsFileHash& hash,std::list<DirectoryStorage::EntryIndex>& results)
{
std::map<RsFileHash,DirectoryStorage::EntryIndex>::const_iterator it = mFileHashes.find(hash);
DirectoryStorage::EntryIndex indx ;
if( it != mFileHashes.end() )
if(getIndexFromFileHash(hash,indx))
{
results.clear();
results.push_back(it->second) ;
results.push_back(indx) ;
return true ;
}
else
@ -699,55 +716,68 @@ int InternalFileHierarchyStorage::searchTerms(const std::list<std::string>& term
return 0 ;
}
bool InternalFileHierarchyStorage::check(std::string& error_string) const // checks consistency of storage.
bool InternalFileHierarchyStorage::check(std::string& error_string) // checks consistency of storage.
{
// recurs go through all entries, check that all
std::vector<uint32_t> hits(mNodes.size(),0) ; // count hits of children. Should be 1 for all in the end. Otherwise there's an error.
hits[0] = 1 ; // because 0 is never the child of anyone
std::map<RsFileHash,DirectoryStorage::EntryIndex> tmp_hashes ;
for(uint32_t i=0;i<mNodes.size();++i)
if(mNodes[i] != NULL && mNodes[i]->type() == FileStorageNode::TYPE_DIR)
{
// stamp the kids
const DirEntry& de = *static_cast<DirEntry*>(mNodes[i]) ;
DirEntry& de = *static_cast<DirEntry*>(mNodes[i]) ;
for(uint32_t j=0;j<de.subdirs.size();++j)
for(uint32_t j=0;j<de.subdirs.size();)
{
if(de.subdirs[j] >= mNodes.size())
{
error_string = "Node child out of tab!" ;
return false ;
error_string += " - Node child out of tab!" ;
de.subdirs[j] = de.subdirs.back() ;
de.subdirs.pop_back();
}
if(hits[de.subdirs[j]] != 0)
else if(hits[de.subdirs[j]] != 0)
{
error_string = "Double hit on a single node" ;
return false;
error_string += " - Double hit on a single node" ;
de.subdirs[j] = de.subdirs.back() ;
de.subdirs.pop_back();
}
else
{
hits[de.subdirs[j]] = 1;
++j ;
}
hits[de.subdirs[j]] = 1;
}
for(uint32_t j=0;j<de.subfiles.size();++j)
for(uint32_t j=0;j<de.subfiles.size();)
{
if(de.subfiles[j] >= mNodes.size())
{
error_string = "Node child out of tab!" ;
return false ;
error_string += " - Node child out of tab!" ;
de.subfiles[j] = de.subfiles.back() ;
de.subfiles.pop_back();
}
if(hits[de.subfiles[j]] != 0)
else if(hits[de.subfiles[j]] != 0)
{
error_string = "Double hit on a single node" ;
return false;
error_string += " - Double hit on a single node" ;
de.subfiles[j] = de.subfiles.back() ;
de.subfiles.pop_back();
}
else
{
hits[de.subfiles[j]] = 1;
++j ;
}
hits[de.subfiles[j]] = 1;
}
}
for(uint32_t i=0;i<hits.size();++i)
if(hits[i] == 0 && mNodes[i] != NULL)
{
error_string = "Orphean node!" ;
return false;
error_string += " - Orphean node!" ;
delete mNodes[i] ;
mNodes[i] = NULL ;
}
return true;

View File

@ -115,8 +115,8 @@ public:
// hash stuff
bool getDirHashFromIndex(const DirectoryStorage::EntryIndex& index,RsFileHash& hash) const ;
bool getIndexFromDirHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) const ;
bool getIndexFromFileHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) const ;
bool getIndexFromDirHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) ;
bool getIndexFromFileHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) ;
// file/dir access and modification
bool findSubDirectory(DirectoryStorage::EntryIndex e,const std::string& s) const ; // returns true when s is the name of a sub-directory in the given entry e
@ -146,7 +146,7 @@ public:
int searchBoolExp(RsRegularExpression::Expression * exp, std::list<DirectoryStorage::EntryIndex> &results) const ;
int searchTerms(const std::list<std::string>& terms, std::list<DirectoryStorage::EntryIndex> &results) const ; // does a logical OR between items of the list of terms
bool check(std::string& error_string) const ;// checks consistency of storage.
bool check(std::string& error_string) ;// checks consistency of storage.
void print() const;