From 384d001f3bd9a5ba22ad16a0d8c6879216cf96bb Mon Sep 17 00:00:00 2001 From: mr-alice Date: Sat, 10 Sep 2016 13:57:05 +0200 Subject: [PATCH] progress in hash-based sync. Not there yet. --- .../src/file_sharing/dir_hierarchy.cc | 70 ++++++++++++++++--- .../src/file_sharing/dir_hierarchy.h | 4 +- .../src/file_sharing/directory_storage.cc | 65 ++++++++++++++--- .../src/file_sharing/directory_storage.h | 4 ++ libretroshare/src/file_sharing/filelist_io.h | 42 ++++++----- libretroshare/src/file_sharing/p3filelists.cc | 4 +- 6 files changed, 147 insertions(+), 42 deletions(-) diff --git a/libretroshare/src/file_sharing/dir_hierarchy.cc b/libretroshare/src/file_sharing/dir_hierarchy.cc index 8cf824110..f62000343 100644 --- a/libretroshare/src/file_sharing/dir_hierarchy.cc +++ b/libretroshare/src/file_sharing/dir_hierarchy.cc @@ -28,26 +28,20 @@ InternalFileHierarchyStorage::InternalFileHierarchyStorage() : mRoot(0) de->row=0; de->parent_index=0; de->dir_modtime=0; + de->dir_hash=RsFileHash() ; // null hash is root by convention. mNodes.push_back(de) ; + #warning not very elegant. We should remove the leading / - mDirHashes[computeDirHash("/")] = 0 ; + mDirHashes[de->dir_hash] = 0 ; } -RsFileHash InternalFileHierarchyStorage::computeDirHash(const std::string& dir_path) -{ - return RsDirUtil::sha1sum((unsigned char*)dir_path.c_str(),dir_path.length()) ; -} bool InternalFileHierarchyStorage::getDirHashFromIndex(const DirectoryStorage::EntryIndex& index,RsFileHash& hash) const { if(!checkIndex(index,FileStorageNode::TYPE_DIR)) return false ; - DirEntry& d = *static_cast(mNodes[index]) ; - - hash = computeDirHash( d.dir_parent_path + "/" + d.dir_name ) ; - - std::cerr << "Computing dir hash from index " << index << ". Dir=\"" << d.dir_parent_path + "/" + d.dir_name << "\" hash=" << hash << std::endl; + hash = static_cast(mNodes[index])->dir_hash ; return true; } @@ -119,6 +113,10 @@ bool InternalFileHierarchyStorage::updateSubDirectoryList(const DirectoryStorage de->row = mNodes.size(); de->parent_index = indx; de->dir_modtime = it->second; + de->dir_parent_path = d.dir_parent_path + "/" + d.dir_name ; + de->dir_hash = createDirHash(de->dir_name,de->dir_parent_path) ; + + mDirHashes[de->dir_hash] = mNodes.size() ; d.subdirs.push_back(mNodes.size()) ; mNodes.push_back(de) ; @@ -126,6 +124,24 @@ bool InternalFileHierarchyStorage::updateSubDirectoryList(const DirectoryStorage return true; } + +RsFileHash InternalFileHierarchyStorage::createDirHash(const std::string& dir_name,const std::string& dir_parent_path) +{ + // What we need here: a unique identifier + // - that cannot be bruteforced to find the real directory name and path + // - that is used by friends to refer to a specific directory. + + // Option 1: compute H(some_secret_salt + dir_name + dir_parent_path) + // and keep the same salt so that we can re-create the hash + // + // Option 2: compute H(virtual_path). That only works at the level of LocalDirectoryStorage + // + // Option 3: just compute something random, but then we need to store it so as to not + // confuse friends when restarting. + + return RsFileHash::random(); +} + bool InternalFileHierarchyStorage::removeDirectory(DirectoryStorage::EntryIndex indx) // no reference here! Very important. Otherwise, the messign we do inside can change the value of indx!! { // check that it's a directory @@ -268,6 +284,8 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI } DirEntry& d(*static_cast(mNodes[indx])) ; + std::cerr << "Updating dir entry: name=\"" << dir_name << "\", most_recent_time=" << most_recent_time << ", modtime=" << dir_modtime << std::endl; + d.dir_most_recent_time = most_recent_time; d.dir_modtime = dir_modtime; d.dir_update_time = time(NULL); @@ -279,6 +297,8 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI // check that all subdirs already exist. If not, create. for(uint32_t i=0;i::const_iterator it = mDirHashes.find(subdirs_hash[i]) ; @@ -303,6 +323,8 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI dir_index = found; mDirHashes[subdirs_hash[i]] = dir_index ; + + std::cerr << " created, at new index " << dir_index << std::endl; } else { @@ -313,6 +335,7 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI delete mNodes[dir_index] ; mNodes[dir_index] = NULL ; } + std::cerr << " already exists, index=" << dir_index << "." << std::endl; } FileStorageNode *& node(mNodes[dir_index]); if(!node) @@ -329,6 +352,8 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI DirectoryStorage::EntryIndex file_index = 0; std::map::const_iterator it = mFileHashes.find(subfiles_hash[i]) ; + std::cerr << " subfile hash " << i << ": " << subfiles_hash[i] ; + if(it == mFileHashes.end()) { // find an epty slot @@ -350,6 +375,8 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI file_index = found; mFileHashes[subfiles_hash[i]] = file_index ; + + std::cerr << " created, at new index " << file_index << std::endl; } else { @@ -362,8 +389,11 @@ bool InternalFileHierarchyStorage::updateDirEntry(const DirectoryStorage::EntryI } file_index = it->second; + + std::cerr << " already exists, index=" << file_index << "." << std::endl; } FileStorageNode *& node(mNodes[file_index]); + if(!node) node = new FileEntry("",0,0,subfiles_hash[i]); @@ -579,6 +609,14 @@ void InternalFileHierarchyStorage::print() const std::cerr << "Total nodes: " << mNodes.size() << " (" << nfiles << " files, " << ndirs << " dirs, " << nempty << " empty slots)" << std::endl; recursPrint(0,DirectoryStorage::EntryIndex(0)); + + std::cerr << "Known dir hashes: " << std::endl; + for(std::map::const_iterator it(mDirHashes.begin());it!=mDirHashes.end();++it) + std::cerr << " " << it->first << " at index " << it->second << std::endl; + + std::cerr << "Known file hashes: " << std::endl; + for(std::map::const_iterator it(mFileHashes.begin());it!=mFileHashes.end();++it) + std::cerr << " " << it->first << " at index " << it->second << std::endl; } void InternalFileHierarchyStorage::recursPrint(int depth,DirectoryStorage::EntryIndex node) const { @@ -591,7 +629,7 @@ void InternalFileHierarchyStorage::recursPrint(int depth,DirectoryStorage::Entry } DirEntry& d(*static_cast(mNodes[node])); - std::cerr << indent << "dir:" << d.dir_name << ", modf time: " << d.dir_modtime << ", recurs_last_modf_time: " << d.dir_most_recent_time << ", parent: " << d.parent_index << ", row: " << d.row << ", subdirs: " ; + std::cerr << indent << "dir hash=" << d.dir_hash << ". name:" << d.dir_name << ", parent_path:" << d.dir_parent_path << ", modf time: " << d.dir_modtime << ", recurs_last_modf_time: " << d.dir_most_recent_time << ", parent: " << d.parent_index << ", row: " << d.row << ", subdirs: " ; for(uint32_t i=0;i(mNodes[dir])) ; + RsFileHash hash = d.dir_hash ; + for(uint32_t i=0;irow = row ; mNodes[node_index] = fe ; + mFileHashes[fe->file_hash] = node_index ; } else if(FileListIO::readField(buffer,buffer_size,buffer_offset,FILE_LIST_IO_TAG_LOCAL_DIR_ENTRY,node_section_data,node_section_size)) { uint32_t node_index ; std::string dir_name ; std::string dir_parent_path ; + RsFileHash dir_hash ; uint32_t dir_modtime ; uint32_t dir_update_time ; uint32_t dir_most_recent_time ; @@ -795,6 +840,7 @@ bool InternalFileHierarchyStorage::load(const std::string& fname) if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_ROW ,row )) throw std::runtime_error("Read error") ; if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_ENTRY_INDEX ,node_index )) throw std::runtime_error("Read error") ; if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_FILE_NAME ,dir_name )) throw std::runtime_error("Read error") ; + if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_DIR_HASH ,dir_hash )) throw std::runtime_error("Read error") ; if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_FILE_SIZE ,dir_parent_path )) throw std::runtime_error("Read error") ; if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_MODIF_TS ,dir_modtime )) throw std::runtime_error("Read error") ; if(!FileListIO::readField(node_section_data,node_section_size,node_section_offset,FILE_LIST_IO_TAG_UPDATE_TS ,dir_update_time )) throw std::runtime_error("Read error") ; @@ -806,6 +852,7 @@ bool InternalFileHierarchyStorage::load(const std::string& fname) DirEntry *de = new DirEntry(dir_name) ; de->dir_name = dir_name ; de->dir_parent_path = dir_parent_path ; + de->dir_hash = dir_hash ; de->dir_modtime = dir_modtime ; de->dir_update_time = dir_update_time ; de->dir_most_recent_time = dir_most_recent_time ; @@ -834,6 +881,7 @@ bool InternalFileHierarchyStorage::load(const std::string& fname) de->subfiles.push_back(fi) ; } mNodes[node_index] = de ; + mDirHashes[de->dir_hash] = node_index ; } else throw std::runtime_error("Unknown node section.") ; diff --git a/libretroshare/src/file_sharing/dir_hierarchy.h b/libretroshare/src/file_sharing/dir_hierarchy.h index 5d7fb033a..d84e9e8d4 100644 --- a/libretroshare/src/file_sharing/dir_hierarchy.h +++ b/libretroshare/src/file_sharing/dir_hierarchy.h @@ -49,6 +49,7 @@ public: // local stuff std::string dir_name ; std::string dir_parent_path ; + RsFileHash dir_hash ; std::vector subdirs ; std::vector subfiles ; @@ -83,8 +84,6 @@ public: // hash stuff - static RsFileHash computeDirHash(const std::string& dir_path); - bool getDirHashFromIndex(const DirectoryStorage::EntryIndex& index,RsFileHash& hash) const ; bool getIndexFromDirHash(const RsFileHash& hash,DirectoryStorage::EntryIndex& index) const ; @@ -118,6 +117,7 @@ public: private: void recursPrint(int depth,DirectoryStorage::EntryIndex node) const; static bool nodeAccessError(const std::string& s); + static RsFileHash createDirHash(const std::string& dir_name,const std::string& dir_parent_path) ; // Removes the given subdirectory from the parent node and all its pendign subdirs. Files are kept, and will go during the cleaning // phase. That allows to keep file information when moving them around. diff --git a/libretroshare/src/file_sharing/directory_storage.cc b/libretroshare/src/file_sharing/directory_storage.cc index c8a0b0cc3..f27bfb16b 100644 --- a/libretroshare/src/file_sharing/directory_storage.cc +++ b/libretroshare/src/file_sharing/directory_storage.cc @@ -467,6 +467,58 @@ bool LocalDirectoryStorage::locked_getFileSharingPermissions(const EntryIndex& i return true; } +std::string LocalDirectoryStorage::locked_getVirtualDirName(EntryIndex indx) const +{ + if(indx == 0) + return std::string() ; + + const InternalFileHierarchyStorage::DirEntry *dir = mFileHierarchy->getDirEntry(indx); + + if(dir->parent_index != 0) + return dir->dir_name ; + + std::map::const_iterator it = mLocalDirs.find(dir->dir_name) ; + + if(it == mLocalDirs.end()) + { + std::cerr << "(EE) Cannot find real name " << dir->dir_name << " at level 1 among shared dirs. Bug?" << std::endl; + return std::string() ; + } + + return it->second.virtualname ; +} +std::string LocalDirectoryStorage::locked_getVirtualPath(EntryIndex indx) const +{ + if(indx == 0) + return std::string() ; + + std::string res ; + const InternalFileHierarchyStorage::DirEntry *dir = mFileHierarchy->getDirEntry(indx); + + while(dir->parent_index != 0) + { + dir = mFileHierarchy->getDirEntry(dir->parent_index) ; + res += dir->dir_name + "/"+ res ; + } + + std::map::const_iterator it = mLocalDirs.find(dir->dir_name) ; + + if(it == mLocalDirs.end()) + { + std::cerr << "(EE) Cannot find real name " << dir->dir_name << " at level 1 among shared dirs. Bug?" << std::endl; + return std::string() ; + } + return it->second.virtualname + "/" + res; +} +RsFileHash LocalDirectoryStorage::locked_getDirHashFromIndex(EntryIndex indx) const +{ + // hash the full virtual path + + std::string virtual_path = locked_getVirtualPath(indx) ; + + return RsDirUtil::sha1sum((unsigned char*)virtual_path.c_str(),virtual_path.length()) ; +} + bool LocalDirectoryStorage::serialiseDirEntry(const EntryIndex& indx,RsTlvBinaryData& bindata,const RsPeerId& client_id) { RS_STACK_MUTEX(mDirStorageMtx) ; @@ -489,13 +541,7 @@ bool LocalDirectoryStorage::serialiseDirEntry(const EntryIndex& indx,RsTlvBinary for(uint32_t i=0;isubdirs.size();++i) if(indx != 0 || (locked_getFileSharingPermissions(dir->subdirs[i],node_flags,node_groups) && (rsPeers->computePeerPermissionFlags(client_id,node_flags,node_groups) & RS_FILE_HINTS_BROWSABLE))) { - RsFileHash hash ; - - if(!mFileHierarchy->getDirHashFromIndex(dir->subdirs[i],hash)) - { - std::cerr << "(EE) cannot get hash from index for subdir " << dir->subdirs[i] << " at position " << i << " in subdirs list. Weird." << std::endl; - continue ; - } + RsFileHash hash = locked_getDirHashFromIndex(dir->subdirs[i]) ; allowed_subdirs.push_back(hash) ; } @@ -508,10 +554,11 @@ bool LocalDirectoryStorage::serialiseDirEntry(const EntryIndex& indx,RsTlvBinary // - the index entry for each subdir (the updte TS are exchanged at a higher level) // - the file info for each subfile // + std::string virtual_dir_name = locked_getVirtualDirName(indx) ; - if(!FileListIO::writeField(section_data,section_size,section_offset,FILE_LIST_IO_TAG_DIR_NAME ,dir->dir_name )) return false ; + if(!FileListIO::writeField(section_data,section_size,section_offset,FILE_LIST_IO_TAG_DIR_NAME ,virtual_dir_name )) return false ; if(!FileListIO::writeField(section_data,section_size,section_offset,FILE_LIST_IO_TAG_RECURS_MODIF_TS,(uint32_t)dir->dir_most_recent_time)) return false ; - if(!FileListIO::writeField(section_data,section_size,section_offset,FILE_LIST_IO_TAG_MODIF_TS ,(uint32_t)dir->dir_modtime )) return false ; + if(!FileListIO::writeField(section_data,section_size,section_offset,FILE_LIST_IO_TAG_MODIF_TS ,(uint32_t)dir->dir_modtime )) return false ; // serialise number of subdirs and number of subfiles diff --git a/libretroshare/src/file_sharing/directory_storage.h b/libretroshare/src/file_sharing/directory_storage.h index fc9aadf7f..7a69eb7f6 100644 --- a/libretroshare/src/file_sharing/directory_storage.h +++ b/libretroshare/src/file_sharing/directory_storage.h @@ -205,6 +205,10 @@ public: bool serialiseDirEntry(const EntryIndex& indx, RsTlvBinaryData& bindata, const RsPeerId &client_id) ; private: + RsFileHash locked_getDirHashFromIndex(EntryIndex indx) const ; + std::string locked_getVirtualPath(EntryIndex indx) const ; + std::string locked_getVirtualDirName(EntryIndex indx) const ; + bool locked_getFileSharingPermissions(const EntryIndex& indx, FileStorageFlags &flags, std::list& parent_groups); std::string locked_findRealRootFromVirtualFilename(const std::string& virtual_rootdir) const; diff --git a/libretroshare/src/file_sharing/filelist_io.h b/libretroshare/src/file_sharing/filelist_io.h index db4df001a..06eee527c 100644 --- a/libretroshare/src/file_sharing/filelist_io.h +++ b/libretroshare/src/file_sharing/filelist_io.h @@ -11,25 +11,31 @@ static const uint32_t FILE_LIST_IO_LOCAL_DIRECTORY_STORAGE_VERSION_0001 = 0x00000001 ; -static const uint8_t FILE_LIST_IO_TAG_UNKNOWN = 0x00 ; +static const uint8_t FILE_LIST_IO_TAG_UNKNOWN = 0x00 ; +static const uint8_t FILE_LIST_IO_TAG_LOCAL_DIRECTORY_VERSION = 0x01 ; -static const uint8_t FILE_LIST_IO_TAG_FILE_SHA1_HASH = 0x01 ; -static const uint8_t FILE_LIST_IO_TAG_FILE_NAME = 0x02 ; -static const uint8_t FILE_LIST_IO_TAG_FILE_SIZE = 0x03 ; -static const uint8_t FILE_LIST_IO_TAG_DIR_NAME = 0x04 ; -static const uint8_t FILE_LIST_IO_TAG_MODIF_TS = 0x05 ; -static const uint8_t FILE_LIST_IO_TAG_RECURS_MODIF_TS = 0x06 ; -static const uint8_t FILE_LIST_IO_TAG_HASH_STORAGE_ENTRY = 0x07 ; -static const uint8_t FILE_LIST_IO_TAG_UPDATE_TS = 0x08 ; -static const uint8_t FILE_LIST_IO_TAG_BINARY_DATA = 0x09 ; -static const uint8_t FILE_LIST_IO_TAG_RAW_NUMBER = 0x0a ; -static const uint8_t FILE_LIST_IO_TAG_ENTRY_INDEX = 0x0b ; -static const uint8_t FILE_LIST_IO_TAG_REMOTE_FILE_ENTRY = 0x0c ; -static const uint8_t FILE_LIST_IO_TAG_LOCAL_FILE_ENTRY = 0x0d ; -static const uint8_t FILE_LIST_IO_TAG_LOCAL_DIR_ENTRY = 0x0e ; -static const uint8_t FILE_LIST_IO_TAG_LOCAL_DIRECTORY_VERSION = 0x0f ; -static const uint8_t FILE_LIST_IO_TAG_PARENT_INDEX = 0x10 ; -static const uint8_t FILE_LIST_IO_TAG_ROW = 0x11 ; +static const uint8_t FILE_LIST_IO_TAG_HASH_STORAGE_ENTRY = 0x10 ; +static const uint8_t FILE_LIST_IO_TAG_LOCAL_FILE_ENTRY = 0x11 ; +static const uint8_t FILE_LIST_IO_TAG_LOCAL_DIR_ENTRY = 0x12 ; +static const uint8_t FILE_LIST_IO_TAG_REMOTE_FILE_ENTRY = 0x13 ; + +static const uint8_t FILE_LIST_IO_TAG_FILE_SHA1_HASH = 0x20 ; +static const uint8_t FILE_LIST_IO_TAG_FILE_NAME = 0x21 ; +static const uint8_t FILE_LIST_IO_TAG_FILE_SIZE = 0x22 ; + +static const uint8_t FILE_LIST_IO_TAG_MODIF_TS = 0x30 ; +static const uint8_t FILE_LIST_IO_TAG_RECURS_MODIF_TS = 0x31 ; +static const uint8_t FILE_LIST_IO_TAG_UPDATE_TS = 0x32 ; + +static const uint8_t FILE_LIST_IO_TAG_ENTRY_INDEX = 0x40 ; +static const uint8_t FILE_LIST_IO_TAG_PARENT_INDEX = 0x41 ; + +static const uint8_t FILE_LIST_IO_TAG_DIR_HASH = 0x50 ; +static const uint8_t FILE_LIST_IO_TAG_DIR_NAME = 0x51 ; + +static const uint8_t FILE_LIST_IO_TAG_ROW = 0x60 ; +static const uint8_t FILE_LIST_IO_TAG_BINARY_DATA = 0x61 ; +static const uint8_t FILE_LIST_IO_TAG_RAW_NUMBER = 0x62 ; static const uint32_t SECTION_HEADER_MAX_SIZE = 6 ; // section tag (1 byte) + size (max = 5 bytes) diff --git a/libretroshare/src/file_sharing/p3filelists.cc b/libretroshare/src/file_sharing/p3filelists.cc index 60312124e..c0c603ecc 100644 --- a/libretroshare/src/file_sharing/p3filelists.cc +++ b/libretroshare/src/file_sharing/p3filelists.cc @@ -154,9 +154,9 @@ int p3FileDatabase::tick() { RS_STACK_MUTEX(mFLSMtx) ; -#ifdef DEBUG_FILE_HIERARCHY +//#ifdef DEBUG_FILE_HIERARCHY mLocalSharedDirs->print(); -#endif +//#endif last_print_time = now ; //#warning this should be removed, but it's necessary atm for updating the GUI