fixed update of DirHash list and FileHash list when files and dirs are removed, while keeping the cost low

This commit is contained in:
csoler 2016-09-27 23:13:59 +02:00
parent 5547d71ac7
commit bf0777fd9e
2 changed files with 62 additions and 32 deletions

View file

@ -61,8 +61,6 @@ InternalFileHierarchyStorage::InternalFileHierarchyStorage() : mRoot(0)
de->dir_hash=RsFileHash() ; // null hash is root by convention. de->dir_hash=RsFileHash() ; // null hash is root by convention.
mNodes.push_back(de) ; mNodes.push_back(de) ;
#warning not very elegant. We should remove the leading /
mDirHashes[de->dir_hash] = 0 ; mDirHashes[de->dir_hash] = 0 ;
} }
@ -75,24 +73,43 @@ bool InternalFileHierarchyStorage::getDirHashFromIndex(const DirectoryStorage::E
return true; return true;
} }
bool InternalFileHierarchyStorage::getIndexFromDirHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) const bool InternalFileHierarchyStorage::getIndexFromDirHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index)
{ {
std::map<RsFileHash,DirectoryStorage::EntryIndex>::const_iterator it = mDirHashes.find(hash) ; std::map<RsFileHash,DirectoryStorage::EntryIndex>::iterator it = mDirHashes.find(hash) ;
if(it == mDirHashes.end()) if(it == mDirHashes.end())
return false; return false;
index = it->second; index = it->second;
// make sure the hash actually points to some existing file. If not, remove it. This is a lazy update of dir hashes: when we need them, we check them.
if(!checkIndex(index, FileStorageNode::TYPE_DIR) || static_cast<DirEntry*>(mNodes[index])->dir_hash != hash)
{
std::cerr << "(II) removing non existing hash from dir hash list: " << hash << std::endl;
mDirHashes.erase(it) ;
return false ;
}
return true; return true;
} }
bool InternalFileHierarchyStorage::getIndexFromFileHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) const bool InternalFileHierarchyStorage::getIndexFromFileHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index)
{ {
std::map<RsFileHash,DirectoryStorage::EntryIndex>::const_iterator it = mFileHashes.find(hash) ; std::map<RsFileHash,DirectoryStorage::EntryIndex>::iterator it = mFileHashes.find(hash) ;
if(it == mFileHashes.end()) if(it == mFileHashes.end())
return false; return false;
index = it->second; index = it->second;
// make sure the hash actually points to some existing file. If not, remove it. This is a lazy update of file hashes: when we need them, we check them.
if(!checkIndex(it->second, FileStorageNode::TYPE_FILE) || static_cast<FileEntry*>(mNodes[index])->file_hash != hash)
{
std::cerr << "(II) removing non existing hash from file hash list: " << hash << std::endl;
mFileHashes.erase(it) ;
return false ;
}
return true; return true;
} }
@ -633,12 +650,12 @@ DirectoryStorage::EntryIndex InternalFileHierarchyStorage::getSubDirIndex(Direct
bool InternalFileHierarchyStorage::searchHash(const RsFileHash& hash,std::list<DirectoryStorage::EntryIndex>& results) bool InternalFileHierarchyStorage::searchHash(const RsFileHash& hash,std::list<DirectoryStorage::EntryIndex>& results)
{ {
std::map<RsFileHash,DirectoryStorage::EntryIndex>::const_iterator it = mFileHashes.find(hash); DirectoryStorage::EntryIndex indx ;
if( it != mFileHashes.end() ) if(getIndexFromFileHash(hash,indx))
{ {
results.clear(); results.clear();
results.push_back(it->second) ; results.push_back(indx) ;
return true ; return true ;
} }
else else
@ -699,55 +716,68 @@ int InternalFileHierarchyStorage::searchTerms(const std::list<std::string>& term
return 0 ; return 0 ;
} }
bool InternalFileHierarchyStorage::check(std::string& error_string) const // checks consistency of storage. bool InternalFileHierarchyStorage::check(std::string& error_string) // checks consistency of storage.
{ {
// recurs go through all entries, check that all // recurs go through all entries, check that all
std::vector<uint32_t> hits(mNodes.size(),0) ; // count hits of children. Should be 1 for all in the end. Otherwise there's an error. std::vector<uint32_t> hits(mNodes.size(),0) ; // count hits of children. Should be 1 for all in the end. Otherwise there's an error.
hits[0] = 1 ; // because 0 is never the child of anyone hits[0] = 1 ; // because 0 is never the child of anyone
std::map<RsFileHash,DirectoryStorage::EntryIndex> tmp_hashes ;
for(uint32_t i=0;i<mNodes.size();++i) for(uint32_t i=0;i<mNodes.size();++i)
if(mNodes[i] != NULL && mNodes[i]->type() == FileStorageNode::TYPE_DIR) if(mNodes[i] != NULL && mNodes[i]->type() == FileStorageNode::TYPE_DIR)
{ {
// stamp the kids // stamp the kids
const DirEntry& de = *static_cast<DirEntry*>(mNodes[i]) ; DirEntry& de = *static_cast<DirEntry*>(mNodes[i]) ;
for(uint32_t j=0;j<de.subdirs.size();++j) for(uint32_t j=0;j<de.subdirs.size();)
{ {
if(de.subdirs[j] >= mNodes.size()) if(de.subdirs[j] >= mNodes.size())
{ {
error_string = "Node child out of tab!" ; error_string += " - Node child out of tab!" ;
return false ; de.subdirs[j] = de.subdirs.back() ;
de.subdirs.pop_back();
} }
if(hits[de.subdirs[j]] != 0) else if(hits[de.subdirs[j]] != 0)
{ {
error_string = "Double hit on a single node" ; error_string += " - Double hit on a single node" ;
return false; de.subdirs[j] = de.subdirs.back() ;
de.subdirs.pop_back();
} }
else
{
hits[de.subdirs[j]] = 1; hits[de.subdirs[j]] = 1;
++j ;
} }
for(uint32_t j=0;j<de.subfiles.size();++j) }
for(uint32_t j=0;j<de.subfiles.size();)
{ {
if(de.subfiles[j] >= mNodes.size()) if(de.subfiles[j] >= mNodes.size())
{ {
error_string = "Node child out of tab!" ; error_string += " - Node child out of tab!" ;
return false ; de.subfiles[j] = de.subfiles.back() ;
de.subfiles.pop_back();
} }
if(hits[de.subfiles[j]] != 0) else if(hits[de.subfiles[j]] != 0)
{ {
error_string = "Double hit on a single node" ; error_string += " - Double hit on a single node" ;
return false; de.subfiles[j] = de.subfiles.back() ;
de.subfiles.pop_back();
} }
else
{
hits[de.subfiles[j]] = 1; hits[de.subfiles[j]] = 1;
++j ;
}
} }
} }
for(uint32_t i=0;i<hits.size();++i) for(uint32_t i=0;i<hits.size();++i)
if(hits[i] == 0 && mNodes[i] != NULL) if(hits[i] == 0 && mNodes[i] != NULL)
{ {
error_string = "Orphean node!" ; error_string += " - Orphean node!" ;
return false; delete mNodes[i] ;
mNodes[i] = NULL ;
} }
return true; return true;

View file

@ -115,8 +115,8 @@ public:
// hash stuff // hash stuff
bool getDirHashFromIndex(const DirectoryStorage::EntryIndex& index,RsFileHash& hash) const ; bool getDirHashFromIndex(const DirectoryStorage::EntryIndex& index,RsFileHash& hash) const ;
bool getIndexFromDirHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) const ; bool getIndexFromDirHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) ;
bool getIndexFromFileHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) const ; bool getIndexFromFileHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) ;
// file/dir access and modification // file/dir access and modification
bool findSubDirectory(DirectoryStorage::EntryIndex e,const std::string& s) const ; // returns true when s is the name of a sub-directory in the given entry e bool findSubDirectory(DirectoryStorage::EntryIndex e,const std::string& s) const ; // returns true when s is the name of a sub-directory in the given entry e
@ -146,7 +146,7 @@ public:
int searchBoolExp(RsRegularExpression::Expression * exp, std::list<DirectoryStorage::EntryIndex> &results) const ; int searchBoolExp(RsRegularExpression::Expression * exp, std::list<DirectoryStorage::EntryIndex> &results) const ;
int searchTerms(const std::list<std::string>& terms, std::list<DirectoryStorage::EntryIndex> &results) const ; // does a logical OR between items of the list of terms int searchTerms(const std::list<std::string>& terms, std::list<DirectoryStorage::EntryIndex> &results) const ; // does a logical OR between items of the list of terms
bool check(std::string& error_string) const ;// checks consistency of storage. bool check(std::string& error_string) ;// checks consistency of storage.
void print() const; void print() const;