Added a filemapper class to store downloaded files sequentially on the HD. The mapper automatically

re-organises (e.g. defragments) data such that
- when the DL is finished, the file is in the correct order (no need to re-order it)
- during the DL, only the n*_chunk_size first bytes of the partial file are written, where n is the total number of downloaded chunks.
- the total amount of copy operations does not exceed the total size of the file. In practice, it's much lower.

That suppresses the lag when downloading large files due to writing isolated chunks in the middle of the file.

Next:
- integration into ftFileCreator
- load/save in ft_transfers
- retrocompatibility with existing transfers.



git-svn-id: http://svn.code.sf.net/p/retroshare/code/trunk@4658 b45a01b8-16f6-495d-af2f-9b41ad6348cc
This commit is contained in:
csoler 2011-11-01 14:20:51 +00:00
parent 9761c4c980
commit 3c2a5e8f42
4 changed files with 495 additions and 2 deletions

View File

@ -0,0 +1,301 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <errno.h>
#include <iostream>
#include "ftfilemapper.h"
//#define DEBUG_FILEMAPPER
ftFileMapper::ftFileMapper(uint64_t file_size,uint32_t chunk_size)
: _file_size(file_size),_chunk_size(chunk_size)
{
int nb_chunks = (int)(file_size / (uint64_t)chunk_size) + ( (file_size % chunk_size)==0 ?0:1 ) ;
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) Creating ftFileMapper for file of size " << file_size << ", with " << nb_chunks << " chunks." << std::endl;
#endif
_first_free_chunk = 0 ;
_mapped_chunks.clear() ;
_mapped_chunks.resize(nb_chunks,-1) ;
_data_chunks.clear() ;
_data_chunks.resize(nb_chunks,-1) ;
}
bool ftFileMapper::computeStorageOffset(uint64_t offset,uint64_t& storage_offset) const
{
// Compute the chunk number for this offset
//
uint32_t cid = (uint32_t)(offset / (uint64_t)_chunk_size) ;
// Check that the cid is in the allowed range. That should always be the case.
//
if(cid < _mapped_chunks.size() && _mapped_chunks[cid] >= 0)
{
storage_offset = _mapped_chunks[cid]*_chunk_size + (offset % (uint64_t)_chunk_size) ;
return true ;
}
else
{
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) ftFileMapper::computeStorageOffset(): offset " << offset << " corresponds to chunk number " << cid << " which is not mapped!!" << std::endl;
#endif
return false ;
}
}
bool ftFileMapper::writeData(uint64_t offset,uint32_t size,void *data,FILE *fd) const
{
if (0 != fseeko64(fd, offset, SEEK_SET))
{
std::cerr << "(EE) ftFileMapper::ftFileMapper::writeData() Bad fseek at offset " << offset << ", fd=" << (void*)fd << ", size=" << size << ", errno=" << errno << std::endl;
return false;
}
if (1 != fwrite(data, size, 1, fd))
{
std::cerr << "(EE) ftFileMapper::ftFileCreator::addFileData() Bad fwrite." << std::endl;
std::cerr << "ERRNO: " << errno << std::endl;
return false;
}
fflush(fd) ;
return true ;
}
bool ftFileMapper::storeData(void *data, uint32_t data_size, uint64_t offset,FILE *fd)
{
uint64_t real_offset = 0;
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) ftFileMapper::storeData(): storing data size " << data_size << " for offset "<< offset << std::endl;
#endif
// we compute the real place of the data in the mapped file. Several cases:
//
// 1 - the place corresponds to a mapped place
// => write there.
// 2 - the place does not correspond to a mapped place.
// 2.0 - we allocate a new chunk at the end of the file.
// 2.0.1 - the chunk corresponds to a mapped chunk somewhere before
// => we move it, and use the other chunk as writing position
// 2.0.2 - the chunk does not correspond to a mapped chunk somewhere before
// => we use it
// 2.1 - the place is in the range of existing data
// => we move the existing data at the end of the file, and update the mapping
// 2.2 - the place is outside the range of existing data
// => we allocate a new chunk at the end of the file, and write there.
// 2.2.1 - we look for the first chunk that is not already mapped before.
//
if(!computeStorageOffset(offset,real_offset))
{
uint32_t cid = (uint32_t)(offset / (uint64_t)_chunk_size) ;
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) real offset unknown. chunk id is " << cid << std::endl;
#endif
uint32_t empty_chunk = allocateNewEmptyChunk(fd) ;
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) allocated new empty chunk " << empty_chunk << std::endl;
#endif
if(cid < _first_free_chunk && cid != empty_chunk) // the place is already occupied by some data
{
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) chunk already in use. " << std::endl;
std::cerr << "(DD) swapping with first free chunk: " << empty_chunk << std::endl;
#endif
if(!moveChunk(cid, empty_chunk,fd))
{
std::cerr << "(EE) ftFileMapper::writeData(): cannot move chunk " << empty_chunk << " and " << cid << std::endl ;
return false ;
}
// Get the old chunk id that was mapping to this place
//
int oid = _data_chunks[cid] ;
if(oid < 0)
{
std::cerr << "(EE) ftFileMapper::writeData(): cannot find chunk that was previously mapped to place " << cid << std::endl ;
return false ;
}
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) old chunk now pointing to: " << empty_chunk << std::endl;
std::cerr << "(DD) new chunk now pointing to: " << cid << std::endl;
#endif
_mapped_chunks[cid] = cid ; // this one is in place, since we swapped it
_mapped_chunks[oid] = empty_chunk ;
_data_chunks[cid] = cid ;
_data_chunks[empty_chunk] = oid ;
}
else // allocate a new chunk at end of the file.
{
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) allocating new storage place at first free chunk: " << empty_chunk << std::endl;
#endif
_mapped_chunks[cid] = empty_chunk ;
_data_chunks[empty_chunk] = cid ;
}
real_offset = _mapped_chunks[cid]*_chunk_size + (offset % (uint64_t)_chunk_size) ;
}
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) real offset = " << real_offset << ", data size=" << data_size << std::endl;
std::cerr << "(DD) writing data " << std::endl;
#endif
return writeData(real_offset,data_size,data,fd) ;
}
uint32_t ftFileMapper::allocateNewEmptyChunk(FILE *fd_out)
{
// look into _first_free_chunk. Is it the place of a chunk already mapped before?
//
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) ftFileMapper::getFirstEmptyChunk()" << std::endl;
#endif
if(_mapped_chunks[_first_free_chunk] >= 0 && _mapped_chunks[_first_free_chunk] < (int)_first_free_chunk)
{
uint32_t old_chunk = _mapped_chunks[_first_free_chunk] ;
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) first free chunk " << _first_free_chunk << " is actually mapped to " << old_chunk << ". Moving it." << std::endl;
#endif
moveChunk(_mapped_chunks[_first_free_chunk],_first_free_chunk,fd_out) ;
_mapped_chunks[_first_free_chunk] = _first_free_chunk ;
_data_chunks[_first_free_chunk] = _first_free_chunk ;
_first_free_chunk++ ;
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) Returning " << old_chunk << std::endl;
#endif
return old_chunk ;
}
else
{
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) first free chunk is fine. Returning " << _first_free_chunk << ", and making room" << std::endl;
#endif
// We need to wipe the entire chunk, since it might be moved before beign completely written, which would cause
// a fread error.
//
wipeChunk(_first_free_chunk,fd_out) ;
return _first_free_chunk++ ;
}
}
bool ftFileMapper::wipeChunk(uint32_t cid,FILE *fd) const
{
uint32_t size = (cid == _mapped_chunks.size()-1)?(_file_size - cid*_chunk_size) : _chunk_size ;
void *buf = malloc(size) ;
if(buf == NULL)
{
std::cerr << "(EE) ftFileMapper::wipeChunk(): cannot allocate temporary buf of size " << size << std::endl;
return false ;
}
if(fseeko64(fd, cid*_chunk_size, SEEK_SET)!= 0)
{
std::cerr << "(EE) ftFileMapper::wipeChunk(): cannot fseek file at position " << cid*_chunk_size << std::endl;
free(buf) ;
return false ;
}
if(1 != fwrite(buf, size, 1, fd))
{
std::cerr << "(EE) ftFileMapper::wipeChunk(): cannot write to file" << std::endl;
free(buf) ;
return false ;
}
free(buf) ;
return true ;
}
bool ftFileMapper::moveChunk(uint32_t to_move, uint32_t new_place,FILE *fd_out)
{
// Read the old chunk, write at the new place
assert(to_move != new_place) ;
fflush(fd_out) ;
#ifdef DEBUG_FILEMAPPER
std::cerr << "(DD) ftFileMapper::moveChunk(): moving chunk " << to_move << " to place " << new_place << std::endl ;
#endif
uint32_t new_place_size = (new_place == _mapped_chunks.size()-1)?(_file_size - (_mapped_chunks.size()-1)*_chunk_size) : _chunk_size ;
uint32_t to_move_size = (new_place == _mapped_chunks.size()-1)?(_file_size - (_mapped_chunks.size()-1)*_chunk_size) : _chunk_size ;
uint32_t size = std::min(new_place_size,to_move_size) ;
void *buff = malloc(size) ;
if(buff == NULL)
{
std::cerr << "(EE) ftFileMapper::moveChunk(): cannot open temporary buffer. Out of memory??" << std::endl;
return false ;
}
if(fseeko64(fd_out, to_move*_chunk_size, SEEK_SET) != 0)
{
std::cerr << "(EE) ftFileMapper::moveChunk(): cannot fseek file at position " << to_move*_chunk_size << std::endl;
return false ;
}
size_t rd ;
if(size != (rd = fread(buff, 1, size, fd_out)))
{
std::cerr << "(EE) ftFileMapper::moveChunk(): cannot read from file" << std::endl;
std::cerr << "(EE) errno = " << errno << std::endl;
std::cerr << "(EE) feof = " << feof(fd_out) << std::endl;
std::cerr << "(EE) size = " << size << std::endl;
std::cerr << "(EE) rd = " << rd << std::endl;
return false ;
}
if(fseeko64(fd_out, new_place*_chunk_size, SEEK_SET)!= 0)
{
std::cerr << "(EE) ftFileMapper::moveChunk(): cannot fseek file at position " << new_place*_chunk_size << std::endl;
return false ;
}
if(1 != fwrite(buff, size, 1, fd_out))
{
std::cerr << "(EE) ftFileMapper::moveChunk(): cannot write to file" << std::endl;
return false ;
}
free(buff) ;
return true ;
}
void ftFileMapper::print() const
{
std::cerr << "ftFileMapper:: [ " ;
for(uint32_t i=0;i<_mapped_chunks.size();++i)
{
std::cerr << _mapped_chunks[i] << " " ;
}
std::cerr << "] - ffc = " << _first_free_chunk << " - [ ";
for(uint32_t i=0;i<_data_chunks.size();++i)
std::cerr << _data_chunks[i] << " " ;
std::cerr << " ] " << std::endl;
}

View File

@ -0,0 +1,98 @@
/*
* libretroshare/src/ft/ftfilemapper.h
*
* File Transfer for RetroShare.
*
* Copyright 2011 by Cyril Soler.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License Version 2 as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA.
*
* Please report all bugs and problems to "csoler@users.sourceforge.net".
*
*/
#pragma once
// This class implements data storage for incoming files. It provides the following functionality:
//
// - linear storage of data
// - record of which order the data is currently stored into
// - automatic (and clever) re-organisation of data as it arrives
//
// The implementation ensures that:
// - when the file is complete, the data is always ordered correctly
// - when data comes, the writes always happen within the range of existing data, plus at most one chunk.
//
// Using this class avoids writting in the middle of large files while downloading them, which removes the lag
// of RS interface when starting the DL of a large file.
//
// The re-organisation of the file data occurs seamlessly during writes, and therefore does not cause any big
// freeze at end of download.
//
// As soon as the first chunk has been downloaded, it is possible to preview a file by directly playing
// the partial file, so no change is needed to preview.
//
#include <stdint.h>
#include <string>
#include <vector>
class ftFileMapper
{
public:
// Name and size of the file to store. This will usually be a file in the Partials/ directory.
ftFileMapper(uint64_t file_size,uint32_t chunk_size) ;
// Storage/retreive of data. All offsets are given in absolute position in the file. The class handles
// the real mapping (hence the name).
// Stores the data in the file, at the given offset. The chunk does not necessarily exist. If not,
// the data is written at the end of the current file. If yes, it is written at the actual place.
// Returned values:
//
// true: the data has correctly been written
// false: the data could not be written
//
bool storeData(void *data, uint32_t data_size, uint64_t offset,FILE *fd) ;
// Gets the data from the storage file. The data should be there. The data is stored into buff, which needs to be
// allocated by the client. Returned values:
//
// true: the data has correctly been read
// false: the data could not beread, or does not exist.
//
bool readData(void *buff, uint32_t data_size, uint64_t offset) ;
// debug
void print() const ;
private:
uint64_t _file_size ; // size of the file
uint32_t _chunk_size ; // size of chunks
uint32_t _first_free_chunk ; // first chunk in the mapped file to be available
// List of chunk ids (0,1,2,3...) stored in the order
std::vector<int> _mapped_chunks ;
std::vector<int> _data_chunks ;
bool writeData(uint64_t offset,uint32_t size,void *data,FILE *fd) const ;
bool readData(uint64_t offset,uint32_t size,void *data,FILE *fd) const ;
bool wipeChunk(uint32_t cid,FILE *fd) const ;
bool computeStorageOffset(uint64_t offset,uint64_t& storage_offset) const ;
bool moveChunk(uint32_t src,uint32_t dst,FILE *fd_out) ;
uint32_t allocateNewEmptyChunk(FILE *fd) ;
};

View File

@ -8,12 +8,15 @@ DHT_TOP_DIR = ../../../../libbitdht/src
include $(RS_TOP_DIR)/tests/scripts/config.mk
###############################################################
TESTOBJ = ftfileprovidertest.o ftfilecreatortest.o ftextralisttest.o ftdataplextest.o fttransfermoduletest.o ftcrc32test.o ftcrossprovidercreatortest.o ftcontrollertest.o ftserver1test.o ftserver2test.o ftserver3test.o
TESTOBJ = ftfilemappertest.o ftfileprovidertest.o ftfilecreatortest.o ftextralisttest.o ftdataplextest.o fttransfermoduletest.o ftcrc32test.o ftcrossprovidercreatortest.o ftcontrollertest.o ftserver1test.o ftserver2test.o ftserver3test.o
TESTS = ftfileprovidertest ftfilecreatortest ftextralisttest ftdataplextest fttransfermoduletest ftcrc32test ftcrossprovidercreatortest ftcontrollertest ftserver1test ftserver2test fttransfermoduletest ftserver3test
TESTS = ftfilemappertest ftfileprovidertest ftfilecreatortest ftextralisttest ftdataplextest fttransfermoduletest ftcrc32test ftcrossprovidercreatortest ftcontrollertest ftserver1test ftserver2test fttransfermoduletest ftserver3test
all: tests
ftfilemappertest : ftfilemappertest.o
$(CC) $(CFLAGS) -o ftfilemappertest ftfilemappertest.o $(LIBS)
ftcontrollertest : ftcontrollertest.o
$(CC) $(CFLAGS) -o ftcontrollertest ftcontrollertest.o $(LIBS)

View File

@ -0,0 +1,91 @@
#include "ft/ftfilemapper.h"
#include "ft/ftchunkmap.h"
#include "retroshare/rstypes.h"
#include <util/utest.h>
#include <util/rsdir.h>
#include <stdlib.h>
#include <iostream>
#include <stdint.h>
INITTEST();
int main()
{
/* Use ftfilemapper to create a file with chunks downloaded on a random direction. */
static const std::string tmpdir = "." ;
static const std::string input_file = tmpdir+"/"+"input.bin" ;
static const std::string output_file = tmpdir+"/"+"output.bin" ;
static const uint64_t size = 1024*1024*12+234;//4357283 ; // some size. Not an integer number of chunks
static const uint32_t chunk_size = 1024*1024 ; // 1MB
pid_t pid = getpid() ;
srand48(pid) ;
srand(pid) ;
std::cerr << "Inited random number generator with seed " << pid << std::endl;
// 0 - create a random file in memory, of size SIZE
void *membuf = malloc(size) ;
CHECK(membuf != NULL) ;
for(int i=0;i<size;++i)
((char*)membuf)[i] = lrand48() & 0xff ;
// also write it to disk
FILE *f = fopen(input_file.c_str(),"w") ;
CHECK(f != NULL) ;
CHECK(fwrite(membuf,size,1,f) == 1) ;
fclose(f) ;
// 1 - allocate a chunkmap for this file
//
ChunkMap chunk_map(size,true) ;
chunk_map.setStrategy(FileChunksInfo::CHUNK_STRATEGY_RANDOM) ;
ftFileMapper fmapper(size,chunk_size);
// Get the chunks one by one
//
FILE *fout = fopen(output_file.c_str(),"w+") ;
CHECK(fout != NULL) ;
ftChunk chunk ;
bool source_map_needed ;
while(chunk_map.getDataChunk("virtual peer",1024*200+(lrand48()%1024),chunk,source_map_needed))
{
//std::cerr << "Got chunk " << chunk.offset << " + " << chunk.size << " from chunkmap." << std::endl;
CHECK(fmapper.storeData( (unsigned char *)membuf+chunk.offset,chunk.size,chunk.offset,fout) ) ;
chunk_map.dataReceived(chunk.id) ;
fmapper.print() ;
delete chunk.ref_cnt ;
}
fclose(fout) ;
// Check the sha1 of both source and destination.
//
std::string sha1_1,sha1_2 ;
uint64_t size_1,size_2 ;
RsDirUtil::getFileHash( input_file,sha1_1,size_1) ;
RsDirUtil::getFileHash(output_file,sha1_2,size_2) ;
std::cerr << "Computed hash of file\t " << input_file << "\t :\t" << sha1_1 << ", size=" << size_1 << std::endl;
std::cerr << "Computed hash of file\t " <<output_file << "\t :\t" << sha1_2 << ", size=" << size_2 << std::endl;
CHECK(size_1 == size_2) ;
CHECK(sha1_1 == sha1_2) ;
FINALREPORT("RsTlvItem Stack Tests");
return TESTRESULT();
}