progress in hash-based sync. Not there yet.

This commit is contained in:
mr-alice 2016-09-10 13:57:05 +02:00
parent 0ea695beb3
commit 384d001f3b
6 changed files with 147 additions and 42 deletions

View File

@ -28,26 +28,20 @@ InternalFileHierarchyStorage::InternalFileHierarchyStorage() : mRoot(0)
de->row=0;
de->parent_index=0;
de->dir_modtime=0;
de->dir_hash=RsFileHash() ; // null hash is root by convention.
mNodes.push_back(de) ;
#warning not very elegant. We should remove the leading /
mDirHashes[computeDirHash("/")] = 0 ;
mDirHashes[de->dir_hash] = 0 ;
}
RsFileHash InternalFileHierarchyStorage::computeDirHash(const std::string& dir_path)
{
return RsDirUtil::sha1sum((unsigned char*)dir_path.c_str(),dir_path.length()) ;
}
bool InternalFileHierarchyStorage::getDirHashFromIndex(const DirectoryStorage::EntryIndex& index,RsFileHash& hash) const
{
if(!checkIndex(index,FileStorageNode::TYPE_DIR))
return false ;
DirEntry& d = *static_cast<DirEntry*>(mNodes[index]) ;
hash = computeDirHash( d.dir_parent_path + "/" + d.dir_name ) ;
std::cerr << "Computing dir hash from index " << index << ". Dir=\"" << d.dir_parent_path + "/" + d.dir_name << "\" hash=" << hash << std::endl;
hash = static_cast<DirEntry*>(mNodes[index])->dir_hash ;
return true;
}
@ -119,6 +113,10 @@ bool InternalFileHierarchyStorage::updateSubDirectoryList(const DirectoryStorage
de->row = mNodes.size();
de->parent_index = indx;
de->dir_modtime = it->second;
de->dir_parent_path = d.dir_parent_path + "/" + d.dir_name ;
de->dir_hash = createDirHash(de->dir_name,de->dir_parent_path) ;
mDirHashes[de->dir_hash] = mNodes.size() ;
d.subdirs.push_back(mNodes.size()) ;
mNodes.push_back(de) ;
@ -126,6 +124,24 @@ bool InternalFileHierarchyStorage::updateSubDirectoryList(const DirectoryStorage
return true;
}
RsFileHash InternalFileHierarchyStorage::createDirHash(const std::string& dir_name,const std::string& dir_parent_path)
{
// What we need here: a unique identifier
// - that cannot be bruteforced to find the real directory name and path
// - that is used by friends to refer to a specific directory.
// Option 1: compute H(some_secret_salt + dir_name + dir_parent_path)
// and keep the same salt so that we can re-create the hash
//
// Option 2: compute H(virtual_path). That only works at the level of LocalDirectoryStorage
//
// Option 3: just compute something random, but then we need to store it so as to not
// confuse friends when restarting.
return RsFileHash::random();
}
bool InternalFileHierarchyStorage::removeDirectory(DirectoryStorage::EntryIndex indx) // no reference here! Very important. Otherwise, the messign we do inside can change the value of indx!!
{
// check that it's a directory
@ -268,6 +284,8 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI
}
DirEntry& d(*static_cast<DirEntry*>(mNodes[indx])) ;
std::cerr << "Updating dir entry: name=\"" << dir_name << "\", most_recent_time=" << most_recent_time << ", modtime=" << dir_modtime << std::endl;
d.dir_most_recent_time = most_recent_time;
d.dir_modtime = dir_modtime;
d.dir_update_time = time(NULL);
@ -279,6 +297,8 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI
// check that all subdirs already exist. If not, create.
for(uint32_t i=0;i<subdirs_hash.size();++i)
{
std::cerr << " subdir hash " << i << ": " << subdirs_hash[i] ;
DirectoryStorage::EntryIndex dir_index = 0;
std::map<RsFileHash,DirectoryStorage::EntryIndex>::const_iterator it = mDirHashes.find(subdirs_hash[i]) ;
@ -303,6 +323,8 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI
dir_index = found;
mDirHashes[subdirs_hash[i]] = dir_index ;
std::cerr << " created, at new index " << dir_index << std::endl;
}
else
{
@ -313,6 +335,7 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI
delete mNodes[dir_index] ;
mNodes[dir_index] = NULL ;
}
std::cerr << " already exists, index=" << dir_index << "." << std::endl;
}
FileStorageNode *& node(mNodes[dir_index]);
if(!node)
@ -329,6 +352,8 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI
DirectoryStorage::EntryIndex file_index = 0;
std::map<RsFileHash,DirectoryStorage::EntryIndex>::const_iterator it = mFileHashes.find(subfiles_hash[i]) ;
std::cerr << " subfile hash " << i << ": " << subfiles_hash[i] ;
if(it == mFileHashes.end())
{
// find an epty slot
@ -350,6 +375,8 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI
file_index = found;
mFileHashes[subfiles_hash[i]] = file_index ;
std::cerr << " created, at new index " << file_index << std::endl;
}
else
{
@ -362,8 +389,11 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI
}
file_index = it->second;
std::cerr << " already exists, index=" << file_index << "." << std::endl;
}
FileStorageNode *& node(mNodes[file_index]);
if(!node)
node = new FileEntry("",0,0,subfiles_hash[i]);
@ -579,6 +609,14 @@ void InternalFileHierarchyStorage::print() const
std::cerr << "Total nodes: " << mNodes.size() << " (" << nfiles << " files, " << ndirs << " dirs, " << nempty << " empty slots)" << std::endl;
recursPrint(0,DirectoryStorage::EntryIndex(0));
std::cerr << "Known dir hashes: " << std::endl;
for(std::map<RsFileHash,DirectoryStorage::EntryIndex>::const_iterator it(mDirHashes.begin());it!=mDirHashes.end();++it)
std::cerr << " " << it->first << " at index " << it->second << std::endl;
std::cerr << "Known file hashes: " << std::endl;
for(std::map<RsFileHash,DirectoryStorage::EntryIndex>::const_iterator it(mFileHashes.begin());it!=mFileHashes.end();++it)
std::cerr << " " << it->first << " at index " << it->second << std::endl;
}
void InternalFileHierarchyStorage::recursPrint(int depth,DirectoryStorage::EntryIndex node) const
{
@ -591,7 +629,7 @@ void InternalFileHierarchyStorage::recursPrint(int depth,DirectoryStorage::Entry
}
DirEntry& d(*static_cast<DirEntry*>(mNodes[node]));
std::cerr << indent << "dir:" << d.dir_name << ", modf time: " << d.dir_modtime << ", recurs_last_modf_time: " << d.dir_most_recent_time << ", parent: " << d.parent_index << ", row: " << d.row << ", subdirs: " ;
std::cerr << indent << "dir hash=" << d.dir_hash << ". name:" << d.dir_name << ", parent_path:" << d.dir_parent_path << ", modf time: " << d.dir_modtime << ", recurs_last_modf_time: " << d.dir_most_recent_time << ", parent: " << d.parent_index << ", row: " << d.row << ", subdirs: " ;
for(uint32_t i=0;i<d.subdirs.size();++i)
std::cerr << d.subdirs[i] << " " ;
@ -620,6 +658,8 @@ bool InternalFileHierarchyStorage::recursRemoveDirectory(DirectoryStorage::Entry
{
DirEntry& d(*static_cast<DirEntry*>(mNodes[dir])) ;
RsFileHash hash = d.dir_hash ;
for(uint32_t i=0;i<d.subdirs.size();++i)
recursRemoveDirectory(d.subdirs[i]);
@ -631,6 +671,8 @@ bool InternalFileHierarchyStorage::recursRemoveDirectory(DirectoryStorage::Entry
delete mNodes[dir] ;
mNodes[dir] = NULL ;
mDirHashes.erase(hash) ;
return true ;
}
@ -682,6 +724,7 @@ bool InternalFileHierarchyStorage::save(const std::string& fname)
if(!FileListIO::writeField(dir_section_data,dir_section_size,dir_section_offset,FILE_LIST_IO_TAG_ROW ,(uint32_t)de.row )) throw std::runtime_error("Write error") ;
if(!FileListIO::writeField(dir_section_data,dir_section_size,dir_section_offset,FILE_LIST_IO_TAG_ENTRY_INDEX ,(uint32_t)i )) throw std::runtime_error("Write error") ;
if(!FileListIO::writeField(dir_section_data,dir_section_size,dir_section_offset,FILE_LIST_IO_TAG_FILE_NAME ,de.dir_name )) throw std::runtime_error("Write error") ;
if(!FileListIO::writeField(dir_section_data,dir_section_size,dir_section_offset,FILE_LIST_IO_TAG_DIR_HASH ,de.dir_hash )) throw std::runtime_error("Write error") ;
if(!FileListIO::writeField(dir_section_data,dir_section_size,dir_section_offset,FILE_LIST_IO_TAG_FILE_SIZE ,de.dir_parent_path )) throw std::runtime_error("Write error") ;
if(!FileListIO::writeField(dir_section_data,dir_section_size,dir_section_offset,FILE_LIST_IO_TAG_MODIF_TS ,(uint32_t)de.dir_modtime )) throw std::runtime_error("Write error") ;
if(!FileListIO::writeField(dir_section_data,dir_section_size,dir_section_offset,FILE_LIST_IO_TAG_UPDATE_TS ,(uint32_t)de.dir_update_time )) throw std::runtime_error("Write error") ;
@ -779,12 +822,14 @@ bool InternalFileHierarchyStorage::load(const std::string& fname)
fe->row = row ;
mNodes[node_index] = fe ;
mFileHashes[fe->file_hash] = node_index ;
}
else if(FileListIO::readField(buffer,buffer_size,buffer_offset,FILE_LIST_IO_TAG_LOCAL_DIR_ENTRY,node_section_data,node_section_size))
{
uint32_t node_index ;
std::string dir_name ;
std::string dir_parent_path ;
RsFileHash dir_hash ;
uint32_t dir_modtime ;
uint32_t dir_update_time ;
uint32_t dir_most_recent_time ;
@ -795,6 +840,7 @@ bool InternalFileHierarchyStorage::load(const std::string& fname)
if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_ROW ,row )) throw std::runtime_error("Read error") ;
if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_ENTRY_INDEX ,node_index )) throw std::runtime_error("Read error") ;
if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_FILE_NAME ,dir_name )) throw std::runtime_error("Read error") ;
if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_DIR_HASH ,dir_hash )) throw std::runtime_error("Read error") ;
if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_FILE_SIZE ,dir_parent_path )) throw std::runtime_error("Read error") ;
if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_MODIF_TS ,dir_modtime )) throw std::runtime_error("Read error") ;
if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_UPDATE_TS ,dir_update_time )) throw std::runtime_error("Read error") ;
@ -806,6 +852,7 @@ bool InternalFileHierarchyStorage::load(const std::string& fname)
DirEntry *de = new DirEntry(dir_name) ;
de->dir_name = dir_name ;
de->dir_parent_path = dir_parent_path ;
de->dir_hash = dir_hash ;
de->dir_modtime = dir_modtime ;
de->dir_update_time = dir_update_time ;
de->dir_most_recent_time = dir_most_recent_time ;
@ -834,6 +881,7 @@ bool InternalFileHierarchyStorage::load(const std::string& fname)
de->subfiles.push_back(fi) ;
}
mNodes[node_index] = de ;
mDirHashes[de->dir_hash] = node_index ;
}
else
throw std::runtime_error("Unknown node section.") ;

View File

@ -49,6 +49,7 @@ public:
// local stuff
std::string dir_name ;
std::string dir_parent_path ;
RsFileHash dir_hash ;
std::vector<DirectoryStorage::EntryIndex> subdirs ;
std::vector<DirectoryStorage::EntryIndex> subfiles ;
@ -83,8 +84,6 @@ public:
// hash stuff
static RsFileHash computeDirHash(const std::string& dir_path);
bool getDirHashFromIndex(const DirectoryStorage::EntryIndex& index,RsFileHash& hash) const ;
bool getIndexFromDirHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) const ;
@ -118,6 +117,7 @@ public:
private:
void recursPrint(int depth,DirectoryStorage::EntryIndex node) const;
static bool nodeAccessError(const std::string& s);
static RsFileHash createDirHash(const std::string& dir_name,const std::string& dir_parent_path) ;
// Removes the given subdirectory from the parent node and all its pendign subdirs. Files are kept, and will go during the cleaning
// phase. That allows to keep file information when moving them around.

View File

@ -467,6 +467,58 @@ bool LocalDirectoryStorage::locked_getFileSharingPermissions(const EntryIndex& i
return true;
}
std::string LocalDirectoryStorage::locked_getVirtualDirName(EntryIndex indx) const
{
if(indx == 0)
return std::string() ;
const InternalFileHierarchyStorage::DirEntry *dir = mFileHierarchy->getDirEntry(indx);
if(dir->parent_index != 0)
return dir->dir_name ;
std::map<std::string,SharedDirInfo>::const_iterator it = mLocalDirs.find(dir->dir_name) ;
if(it == mLocalDirs.end())
{
std::cerr << "(EE) Cannot find real name " << dir->dir_name << " at level 1 among shared dirs. Bug?" << std::endl;
return std::string() ;
}
return it->second.virtualname ;
}
std::string LocalDirectoryStorage::locked_getVirtualPath(EntryIndex indx) const
{
if(indx == 0)
return std::string() ;
std::string res ;
const InternalFileHierarchyStorage::DirEntry *dir = mFileHierarchy->getDirEntry(indx);
while(dir->parent_index != 0)
{
dir = mFileHierarchy->getDirEntry(dir->parent_index) ;
res += dir->dir_name + "/"+ res ;
}
std::map<std::string,SharedDirInfo>::const_iterator it = mLocalDirs.find(dir->dir_name) ;
if(it == mLocalDirs.end())
{
std::cerr << "(EE) Cannot find real name " << dir->dir_name << " at level 1 among shared dirs. Bug?" << std::endl;
return std::string() ;
}
return it->second.virtualname + "/" + res;
}
RsFileHash LocalDirectoryStorage::locked_getDirHashFromIndex(EntryIndex indx) const
{
// hash the full virtual path
std::string virtual_path = locked_getVirtualPath(indx) ;
return RsDirUtil::sha1sum((unsigned char*)virtual_path.c_str(),virtual_path.length()) ;
}
bool LocalDirectoryStorage::serialiseDirEntry(const EntryIndex& indx,RsTlvBinaryData& bindata,const RsPeerId& client_id)
{
RS_STACK_MUTEX(mDirStorageMtx) ;
@ -489,13 +541,7 @@ bool LocalDirectoryStorage::serialiseDirEntry(const EntryIndex& indx,RsTlvBinary
for(uint32_t i=0;i<dir->subdirs.size();++i)
if(indx != 0 || (locked_getFileSharingPermissions(dir->subdirs[i],node_flags,node_groups) && (rsPeers->computePeerPermissionFlags(client_id,node_flags,node_groups) & RS_FILE_HINTS_BROWSABLE)))
{
RsFileHash hash ;
if(!mFileHierarchy->getDirHashFromIndex(dir->subdirs[i],hash))
{
std::cerr << "(EE) cannot get hash from index for subdir " << dir->subdirs[i] << " at position " << i << " in subdirs list. Weird." << std::endl;
continue ;
}
RsFileHash hash = locked_getDirHashFromIndex(dir->subdirs[i]) ;
allowed_subdirs.push_back(hash) ;
}
@ -508,10 +554,11 @@ bool LocalDirectoryStorage::serialiseDirEntry(const EntryIndex& indx,RsTlvBinary
// - the index entry for each subdir (the updte TS are exchanged at a higher level)
// - the file info for each subfile
//
std::string virtual_dir_name = locked_getVirtualDirName(indx) ;
if(!FileListIO::writeField(section_data,section_size,section_offset,FILE_LIST_IO_TAG_DIR_NAME ,dir->dir_name )) return false ;
if(!FileListIO::writeField(section_data,section_size,section_offset,FILE_LIST_IO_TAG_DIR_NAME ,virtual_dir_name )) return false ;
if(!FileListIO::writeField(section_data,section_size,section_offset,FILE_LIST_IO_TAG_RECURS_MODIF_TS,(uint32_t)dir->dir_most_recent_time)) return false ;
if(!FileListIO::writeField(section_data,section_size,section_offset,FILE_LIST_IO_TAG_MODIF_TS ,(uint32_t)dir->dir_modtime )) return false ;
if(!FileListIO::writeField(section_data,section_size,section_offset,FILE_LIST_IO_TAG_MODIF_TS ,(uint32_t)dir->dir_modtime )) return false ;
// serialise number of subdirs and number of subfiles

View File

@ -205,6 +205,10 @@ public:
bool serialiseDirEntry(const EntryIndex& indx, RsTlvBinaryData& bindata, const RsPeerId &client_id) ;
private:
RsFileHash locked_getDirHashFromIndex(EntryIndex indx) const ;
std::string locked_getVirtualPath(EntryIndex indx) const ;
std::string locked_getVirtualDirName(EntryIndex indx) const ;
bool locked_getFileSharingPermissions(const EntryIndex& indx, FileStorageFlags &flags, std::list<RsNodeGroupId>& parent_groups);
std::string locked_findRealRootFromVirtualFilename(const std::string& virtual_rootdir) const;

View File

@ -11,25 +11,31 @@
static const uint32_t FILE_LIST_IO_LOCAL_DIRECTORY_STORAGE_VERSION_0001 = 0x00000001 ;
static const uint8_t FILE_LIST_IO_TAG_UNKNOWN = 0x00 ;
static const uint8_t FILE_LIST_IO_TAG_UNKNOWN = 0x00 ;
static const uint8_t FILE_LIST_IO_TAG_LOCAL_DIRECTORY_VERSION = 0x01 ;
static const uint8_t FILE_LIST_IO_TAG_FILE_SHA1_HASH = 0x01 ;
static const uint8_t FILE_LIST_IO_TAG_FILE_NAME = 0x02 ;
static const uint8_t FILE_LIST_IO_TAG_FILE_SIZE = 0x03 ;
static const uint8_t FILE_LIST_IO_TAG_DIR_NAME = 0x04 ;
static const uint8_t FILE_LIST_IO_TAG_MODIF_TS = 0x05 ;
static const uint8_t FILE_LIST_IO_TAG_RECURS_MODIF_TS = 0x06 ;
static const uint8_t FILE_LIST_IO_TAG_HASH_STORAGE_ENTRY = 0x07 ;
static const uint8_t FILE_LIST_IO_TAG_UPDATE_TS = 0x08 ;
static const uint8_t FILE_LIST_IO_TAG_BINARY_DATA = 0x09 ;
static const uint8_t FILE_LIST_IO_TAG_RAW_NUMBER = 0x0a ;
static const uint8_t FILE_LIST_IO_TAG_ENTRY_INDEX = 0x0b ;
static const uint8_t FILE_LIST_IO_TAG_REMOTE_FILE_ENTRY = 0x0c ;
static const uint8_t FILE_LIST_IO_TAG_LOCAL_FILE_ENTRY = 0x0d ;
static const uint8_t FILE_LIST_IO_TAG_LOCAL_DIR_ENTRY = 0x0e ;
static const uint8_t FILE_LIST_IO_TAG_LOCAL_DIRECTORY_VERSION = 0x0f ;
static const uint8_t FILE_LIST_IO_TAG_PARENT_INDEX = 0x10 ;
static const uint8_t FILE_LIST_IO_TAG_ROW = 0x11 ;
static const uint8_t FILE_LIST_IO_TAG_HASH_STORAGE_ENTRY = 0x10 ;
static const uint8_t FILE_LIST_IO_TAG_LOCAL_FILE_ENTRY = 0x11 ;
static const uint8_t FILE_LIST_IO_TAG_LOCAL_DIR_ENTRY = 0x12 ;
static const uint8_t FILE_LIST_IO_TAG_REMOTE_FILE_ENTRY = 0x13 ;
static const uint8_t FILE_LIST_IO_TAG_FILE_SHA1_HASH = 0x20 ;
static const uint8_t FILE_LIST_IO_TAG_FILE_NAME = 0x21 ;
static const uint8_t FILE_LIST_IO_TAG_FILE_SIZE = 0x22 ;
static const uint8_t FILE_LIST_IO_TAG_MODIF_TS = 0x30 ;
static const uint8_t FILE_LIST_IO_TAG_RECURS_MODIF_TS = 0x31 ;
static const uint8_t FILE_LIST_IO_TAG_UPDATE_TS = 0x32 ;
static const uint8_t FILE_LIST_IO_TAG_ENTRY_INDEX = 0x40 ;
static const uint8_t FILE_LIST_IO_TAG_PARENT_INDEX = 0x41 ;
static const uint8_t FILE_LIST_IO_TAG_DIR_HASH = 0x50 ;
static const uint8_t FILE_LIST_IO_TAG_DIR_NAME = 0x51 ;
static const uint8_t FILE_LIST_IO_TAG_ROW = 0x60 ;
static const uint8_t FILE_LIST_IO_TAG_BINARY_DATA = 0x61 ;
static const uint8_t FILE_LIST_IO_TAG_RAW_NUMBER = 0x62 ;
static const uint32_t SECTION_HEADER_MAX_SIZE = 6 ; // section tag (1 byte) + size (max = 5 bytes)

View File

@ -154,9 +154,9 @@ int p3FileDatabase::tick()
{
RS_STACK_MUTEX(mFLSMtx) ;
#ifdef DEBUG_FILE_HIERARCHY
//#ifdef DEBUG_FILE_HIERARCHY
mLocalSharedDirs->print();
#endif
//#endif
last_print_time = now ;
//#warning this should be removed, but it's necessary atm for updating the GUI