fixed last bugs in hash-based dir list exchange. It works!

This commit is contained in:
mr-alice 2016-09-10 20:47:51 +02:00
parent 88f85383df
commit 3718e7e3e8
4 changed files with 133 additions and 118 deletions

View File

@ -285,7 +285,26 @@ bool InternalFileHierarchyStorage::updateFile(const DirectoryStorage::EntryIndex
return true;
}
bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryIndex& indx,const std::string& dir_name,time_t most_recent_time,time_t dir_modtime,const std::vector<RsFileHash>& subdirs_hash,const std::vector<RsFileHash>& subfiles_hash)
DirectoryStorage::EntryIndex InternalFileHierarchyStorage::allocateNewIndex()
{
int found = -1;
for(uint32_t j=0;j<mNodes.size();++j)
if(mNodes[j] == NULL)
{
found = j;
break;
}
if(found < 0)
{
mNodes.push_back(NULL) ;
return mNodes.size()-1 ;
}
else
return found ;
}
bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryIndex& indx,const std::string& dir_name,time_t most_recent_time,time_t dir_modtime,const std::vector<RsFileHash>& subdirs_hash,const std::vector<FileEntry>& subfiles_array)
{
if(!checkIndex(indx,FileStorageNode::TYPE_DIR))
{
@ -301,118 +320,131 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI
d.dir_update_time = time(NULL);
d.dir_name = dir_name;
std::map<RsFileHash,DirectoryStorage::EntryIndex> existing_subdirs ;
for(uint32_t i=0;i<d.subdirs.size();++i)
existing_subdirs[static_cast<DirEntry*>(mNodes[d.subdirs[i]])->dir_hash] = d.subdirs[i] ;
d.subdirs.clear();
d.subfiles.clear();
// check that all subdirs already exist. If not, create.
for(uint32_t i=0;i<subdirs_hash.size();++i)
{
std::cerr << " subdir hash " << i << ": " << subdirs_hash[i] ;
std::cerr << " subdir hash = " << subdirs_hash[i] << ": " ;
std::map<RsFileHash,DirectoryStorage::EntryIndex>::iterator it = existing_subdirs.find(subdirs_hash[i]) ;
DirectoryStorage::EntryIndex dir_index = 0;
std::map<RsFileHash,DirectoryStorage::EntryIndex>::const_iterator it = mDirHashes.find(subdirs_hash[i]) ;
if(it == mDirHashes.end() || it->second >= mNodes.size())
if(it != existing_subdirs.end() && mNodes[it->second] != NULL && mNodes[it->second]->type() == FileStorageNode::TYPE_DIR)
{
// find an epty slot
int found = -1 ;
dir_index = it->second ;
for(uint32_t j=0;j<mNodes.size();++j)
if(mNodes[i] == NULL)
{
found = j;
break;
}
std::cerr << " already exists, at index " << dir_index << std::endl;
if(found < 0)
{
dir_index = mNodes.size() ;
mNodes.push_back(NULL) ;
}
else
dir_index = found;
existing_subdirs.erase(it) ;
}
else
{
dir_index = allocateNewIndex() ;
DirEntry *de = new DirEntry("") ;
mNodes[dir_index] = de ;
de->dir_parent_path = d.dir_parent_path + "/" + dir_name ;
de->dir_hash = subdirs_hash[i];
mDirHashes[subdirs_hash[i]] = dir_index ;
std::cerr << " created, at new index " << dir_index << std::endl;
}
else
{
dir_index = it->second;
if(mNodes[dir_index] != NULL && mNodes[dir_index]->type() != FileStorageNode::TYPE_DIR)
{
delete mNodes[dir_index] ;
mNodes[dir_index] = NULL ;
}
std::cerr << " already exists, index=" << dir_index << "." << std::endl;
}
FileStorageNode *& node(mNodes[dir_index]);
if(!node)
node = new DirEntry("");
d.subdirs.push_back(dir_index) ;
((DirEntry*&)node)->dir_parent_path = d.dir_parent_path + "/" + dir_name ;
((DirEntry*&)node)->dir_hash = subdirs_hash[i];
node->row = i ;
node->parent_index = indx ;
mDirHashes[subdirs_hash[i]] = dir_index ;
}
for(uint32_t i=0;i<subfiles_hash.size();++i)
// remove subdirs that do not exist anymore
for(std::map<RsFileHash,DirectoryStorage::EntryIndex>::const_iterator it = existing_subdirs.begin();it!=existing_subdirs.end();++it)
{
DirectoryStorage::EntryIndex file_index = 0;
std::map<RsFileHash,DirectoryStorage::EntryIndex>::const_iterator it = mFileHashes.find(subfiles_hash[i]) ;
std::cerr << " removing existing subfile that is not in the dirctory anymore: name=" << it->first << " index=" << it->second << std::endl;
std::cerr << " subfile hash " << i << ": " << subfiles_hash[i] ;
if(it == mFileHashes.end())
if(!checkIndex(it->second,FileStorageNode::TYPE_DIR))
{
// find an epty slot
int found = -1;
for(uint32_t j=0;j<mNodes.size();++j)
if(mNodes[i] == NULL)
{
found = j;
break;
}
if(found < 0)
{
file_index = mNodes.size() ;
mNodes.push_back(NULL) ;
}
else
file_index = found;
mFileHashes[subfiles_hash[i]] = file_index ;
std::cerr << " created, at new index " << file_index << std::endl;
std::cerr << "(EE) Cannot delete node of index " << it->second << " because it is not a file. Inconsistency error!" << std::endl;
continue ;
}
else
recursRemoveDirectory(it->second) ;
}
// now update subfiles. This is more stricky because we need to not suppress hash duplicates
std::map<std::string,DirectoryStorage::EntryIndex> existing_subfiles ;
for(uint32_t i=0;i<d.subfiles.size();++i)
existing_subfiles[static_cast<FileEntry*>(mNodes[d.subfiles[i]])->file_name] = d.subfiles[i] ;
d.subfiles.clear();
for(uint32_t i=0;i<subfiles_array.size();++i)
{
std::map<std::string,DirectoryStorage::EntryIndex>::iterator it = existing_subfiles.find(subfiles_array[i].file_name) ;
const FileEntry& f(subfiles_array[i]) ;
DirectoryStorage::EntryIndex file_index ;
std::cerr << " subfile name = " << subfiles_array[i].file_name << ": " ;
if(it != existing_subfiles.end() && mNodes[it->second] != NULL && mNodes[it->second]->type() == FileStorageNode::TYPE_FILE)
{
file_index = it->second ;
if(mNodes[file_index] != NULL && mNodes[file_index]->type() != FileStorageNode::TYPE_FILE)
{
delete mNodes[file_index] ;
mNodes[file_index] = NULL ;
}
std::cerr << " already exists, at index " << file_index << std::endl;
file_index = it->second;
if(!updateFile(file_index,f.file_hash,f.file_name,f.file_size,f.file_modtime))
std::cerr << "(EE) Cannot update file with index " << it->second <<" and hash " << f.file_hash << ". This is very weird. Entry should have just been created and therefore should exist. Skipping." << std::endl;
std::cerr << " already exists, index=" << file_index << "." << std::endl;
existing_subfiles.erase(it) ;
}
FileStorageNode *& node(mNodes[file_index]);
else
{
file_index = allocateNewIndex() ;
if(!node)
node = new FileEntry("",0,0,subfiles_hash[i]);
mNodes[file_index] = new FileEntry(f.file_name,f.file_size,f.file_modtime,f.file_hash) ;
mFileHashes[f.file_hash] = file_index ;
std::cerr << " created, at new index " << file_index << std::endl;
}
d.subfiles.push_back(file_index) ;
node->row = subdirs_hash.size()+i ;
node->parent_index = indx ;
}
// remove subfiles that do not exist anymore
for(std::map<std::string,DirectoryStorage::EntryIndex>::const_iterator it = existing_subfiles.begin();it!=existing_subfiles.end();++it)
{
std::cerr << " removing existing subfile that is not in the dirctory anymore: name=" << it->first << " index=" << it->second << std::endl;
if(!checkIndex(it->second,FileStorageNode::TYPE_FILE))
{
std::cerr << "(EE) Cannot delete node of index " << it->second << " because it is not a file. Inconsistency error!" << std::endl;
continue ;
}
delete mNodes[it->second] ;
mNodes[it->second] = NULL ;
}
// now update row and parent index for all subnodes
uint32_t n=0;
for(uint32_t i=0;i<d.subdirs.size();++i)
{
mNodes[d.subdirs[i]]->parent_index = indx ;
mNodes[d.subdirs[i]]->row = n++ ;
}
for(uint32_t i=0;i<d.subfiles.size();++i)
{
mNodes[d.subfiles[i]]->parent_index = indx ;
mNodes[d.subfiles[i]]->row = n++ ;
}
return true;
}

View File

@ -25,6 +25,7 @@ public:
class FileEntry: public FileStorageNode
{
public:
FileEntry() : file_size(0), file_modtime(0) {}
FileEntry(const std::string& name,uint64_t size,time_t modtime) : file_name(name),file_size(size),file_modtime(modtime) {}
FileEntry(const std::string& name,uint64_t size,time_t modtime,const RsFileHash& hash) : file_name(name),file_size(size),file_modtime(modtime),file_hash(hash) {}
@ -74,7 +75,7 @@ public:
bool updateSubFilesList(const DirectoryStorage::EntryIndex& indx,const std::map<std::string,DirectoryStorage::FileTS>& subfiles,std::map<std::string,DirectoryStorage::FileTS>& new_files);
bool updateHash(const DirectoryStorage::EntryIndex& file_index,const RsFileHash& hash);
bool updateFile(const DirectoryStorage::EntryIndex& file_index,const RsFileHash& hash, const std::string& fname,uint64_t size, const time_t modf_time);
bool updateDirEntry(const DirectoryStorage::EntryIndex& indx, const std::string& dir_name, time_t most_recent_time, time_t dir_modtime, const std::vector<RsFileHash> &subdirs_hash, const std::vector<RsFileHash> &subfiles_hash);
bool updateDirEntry(const DirectoryStorage::EntryIndex& indx, const std::string& dir_name, time_t most_recent_time, time_t dir_modtime, const std::vector<RsFileHash> &subdirs_hash, const std::vector<FileEntry> &subfiles_array);
bool getDirUpdateTS(const DirectoryStorage::EntryIndex& index,time_t& recurs_max_modf_TS,time_t& local_update_TS);
bool setDirUpdateTS(const DirectoryStorage::EntryIndex& index,time_t& recurs_max_modf_TS,time_t& local_update_TS);
@ -120,14 +121,19 @@ private:
static bool nodeAccessError(const std::string& s);
static RsFileHash createDirHash(const std::string& dir_name,const std::string& dir_parent_path) ;
// Allocates a new entry in mNodes, possible re-using an empty slot and returns its index.
DirectoryStorage::EntryIndex allocateNewIndex();
// Removes the given subdirectory from the parent node and all its pendign subdirs. Files are kept, and will go during the cleaning
// phase. That allows to keep file information when moving them around.
bool recursRemoveDirectory(DirectoryStorage::EntryIndex dir);
// Map of the hash of all files and all directories. The file hashes are the sha1sum of the file data.
// Map of the hash of all files. The file hashes are the sha1sum of the file data.
// is used for fast search access for FT.
// Note: We should try something faster than std::map. hash_map??
// Unlike directories, multiple files may have the same hash. So this cannot be used for anything else than FT.
std::map<RsFileHash,DirectoryStorage::EntryIndex> mFileHashes ;

View File

@ -671,11 +671,8 @@ bool RemoteDirectoryStorage::deserialiseUpdateDirEntry(const EntryIndex& indx,co
}
// deserialise directory subfiles, with info for each of them
std::vector<EntryIndex> subfiles_array ;
std::vector<std::string> subfiles_name ;
std::vector<uint64_t> subfiles_size ;
std::vector<RsFileHash> subfiles_hash ;
std::vector<time_t> subfiles_modtime ;
std::vector<InternalFileHierarchyStorage::FileEntry> subfiles_array ;
for(uint32_t i=0;i<n_subfiles;++i)
{
@ -688,22 +685,19 @@ bool RemoteDirectoryStorage::deserialiseUpdateDirEntry(const EntryIndex& indx,co
uint32_t file_section_offset = 0 ;
std::string entry_name ;
uint64_t entry_size ;
RsFileHash entry_hash ;
uint32_t entry_modtime ;
InternalFileHierarchyStorage::FileEntry f;
uint32_t modtime =0;
if(!FileListIO::readField(file_section_data,file_section_size,file_section_offset,FILE_LIST_IO_TAG_FILE_NAME ,entry_name )) return false ;
if(!FileListIO::readField(file_section_data,file_section_size,file_section_offset,FILE_LIST_IO_TAG_FILE_SIZE ,entry_size )) return false ;
if(!FileListIO::readField(file_section_data,file_section_size,file_section_offset,FILE_LIST_IO_TAG_FILE_SHA1_HASH,entry_hash )) return false ;
if(!FileListIO::readField(file_section_data,file_section_size,file_section_offset,FILE_LIST_IO_TAG_MODIF_TS ,entry_modtime)) return false ;
if(!FileListIO::readField(file_section_data,file_section_size,file_section_offset,FILE_LIST_IO_TAG_FILE_NAME ,f.file_name )) return false ;
if(!FileListIO::readField(file_section_data,file_section_size,file_section_offset,FILE_LIST_IO_TAG_FILE_SIZE ,f.file_size )) return false ;
if(!FileListIO::readField(file_section_data,file_section_size,file_section_offset,FILE_LIST_IO_TAG_FILE_SHA1_HASH,f.file_hash )) return false ;
if(!FileListIO::readField(file_section_data,file_section_size,file_section_offset,FILE_LIST_IO_TAG_MODIF_TS ,modtime )) return false ;
f.file_modtime = modtime ;
free(file_section_data) ;
subfiles_name.push_back(entry_name) ;
subfiles_size.push_back(entry_size) ;
subfiles_hash.push_back(entry_hash) ;
subfiles_modtime.push_back(entry_modtime) ;
subfiles_array.push_back(f) ;
}
RS_STACK_MUTEX(mDirStorageMtx) ;
@ -711,27 +705,12 @@ bool RemoteDirectoryStorage::deserialiseUpdateDirEntry(const EntryIndex& indx,co
std::cerr << " updating dir entry..." << std::endl;
// First create the entries for each subdir and each subfile, if needed.
if(!mFileHierarchy->updateDirEntry(indx,dir_name,most_recent_time,dir_modtime,subdirs_hashes,subfiles_hash))
if(!mFileHierarchy->updateDirEntry(indx,dir_name,most_recent_time,dir_modtime,subdirs_hashes,subfiles_array))
{
std::cerr << "(EE) Cannot update dir entry with index " << indx << ": entry does not exist." << std::endl;
return false ;
}
// Then update the subfiles
for(uint32_t i=0;i<subfiles_hash.size();++i)
{
DirectoryStorage::EntryIndex file_index ;
if(!mFileHierarchy->getIndexFromFileHash(subfiles_hash[i],file_index))
{
std::cerr << "(EE) Cannot obtain file entry index for hash " << subfiles_hash[i] << ". This is very unexpected." << std::endl;
continue;
}
std::cerr << " updating file entry " << subfiles_hash[i] << std::endl;
if(!mFileHierarchy->updateFile(file_index,subfiles_hash[i],subfiles_name[i],subfiles_size[i],subfiles_modtime[i]))
std::cerr << "(EE) Cannot update file with index " << file_index <<" and hash " << subfiles_hash[i] << ". This is very weird. Entry should have just been created and therefore should exist. Skipping." << std::endl;
}
mChanged = true ;
return true ;

View File

@ -1106,6 +1106,7 @@ void p3FileDatabase::locked_recursSweepRemoteDirectory(RemoteDirectoryStorage *r
p3FileDatabase::DirSyncRequestId p3FileDatabase::makeDirSyncReqId(const RsPeerId& peer_id,DirectoryStorage::EntryIndex e)
{
#warning needs to be improved. It's quite likely that random_bias and then e can be bruteforced from the result of this function
static uint64_t random_bias = RSRandom::random_u64();
uint64_t r = e ;
@ -1117,11 +1118,8 @@ p3FileDatabase::DirSyncRequestId p3FileDatabase::makeDirSyncReqId(const RsPeerId
r ^= (0x011933ff92892a94 * e + peer_id.toByteArray()[i] * 0x1001fff92ee640f9) ;
r <<= 8 ;
r ^= 0xf392843890321808;
std::cerr << std::hex << "r=" << r << std::endl;
}
std::cerr << "making request ID: peer_id=" << peer_id << ", e=" << e << ", random_bias=" << std::hex<< random_bias << " returning " << (r^random_bias) << std::dec << std::endl;
return r ^ random_bias;
}