Added Further debugging to try and catch this 10minute reset bug.

* log p3disc::sendHeartbeat(), successful  p3linkmgr::connectAttempt()
 * log reason for each pqissl::reset().
 * log linkmgr & peermgr  addFriend() and removeFriend().
 * log pqiperson::connect(), p3PeerMgr::cleanUnusedLocations().
 * Modify timing of cleanUnusedLocations (=>8min) & NETSTATE_PARAM_TIMEOUT (=>15min) (see if it has an effect).



git-svn-id: http://svn.code.sf.net/p/retroshare/code/trunk@4539 b45a01b8-16f6-495d-af2f-9b41ad6348cc
This commit is contained in:
drbob 2011-08-06 12:27:23 +00:00
parent d870412523
commit aa63ca1aa6
7 changed files with 61 additions and 15 deletions

View File

@ -628,8 +628,13 @@ bool p3LinkMgrIMPL::connectAttempt(const std::string &id, struct sockaddr_in &ra
std::cerr << "p3LinkMgrIMPL::connectAttempt() Already FLAGGED as connected!!!!" << std::endl;
std::cerr << "p3LinkMgrIMPL::connectAttempt() But allowing anyway!!!" << std::endl;
#endif
rslog(RSL_WARNING, p3connectzone, "p3LinkMgrIMPL::connectAttempt() ERROR ALREADY CONNECTED");
}
rslog(RSL_WARNING, p3connectzone, "p3LinkMgrIMPL::connectAttempt() called id: " + id);
it->second.lastattempt = time(NULL);
it->second.inConnAttempt = true;
it->second.currentConnAddrAttempt = it->second.connAddrs.front();
@ -1237,6 +1242,7 @@ void p3LinkMgrIMPL::peerConnectRequest(std::string id, struct sockaddr_in rad
#ifdef LINKMGR_DEBUG
std::cerr << "p3LinkMgrIMPL::peerConnectRequest() ERROR Peer Already Connected" << std::endl;
#endif
return;
}
/* setup specific attempt for DHT found address. */
locked_ConnectAttempt_SpecificAddress(&(it->second), &raddr);
@ -1932,6 +1938,8 @@ bool p3LinkMgrIMPL::locked_ConnectAttempt_Complete(peerConnectState *peer)
int p3LinkMgrIMPL::addFriend(const std::string &id, bool isVisible)
{
rslog(RSL_WARNING, p3connectzone, "p3LinkMgr::addFriend() id: " + id);
{
RsStackMutex stack(mLinkMtx); /****** STACK LOCK MUTEX *******/
@ -1970,6 +1978,8 @@ int p3LinkMgrIMPL::addFriend(const std::string &id, bool isVisible)
int p3LinkMgrIMPL::removeFriend(const std::string &id)
{
rslog(RSL_WARNING, p3connectzone, "p3LinkMgr::removeFriend() id: " + id);
{
RsStackMutex stack(mLinkMtx); /****** STACK LOCK MUTEX *******/

View File

@ -37,8 +37,8 @@
//#include "util/dnsresolver.h"
#include "util/rsprint.h"
//#include "util/rsdebug.h"
//const int p3connectzone = 3431;
#include "util/rsdebug.h"
const int p3peermgrzone = 9531;
#include "serialiser/rsconfigitems.h"
#include "pqi/pqinotify.h"
@ -177,7 +177,8 @@ void p3PeerMgrIMPL::tick()
{
static time_t last_friends_check = time(NULL) ;
static const time_t INTERVAL_BETWEEN_LOCATION_CLEANING = 600 ; // Remove unused locations every 10 minutes.
//static const time_t INTERVAL_BETWEEN_LOCATION_CLEANING = 600 ; // Remove unused locations every 10 minutes.
static const time_t INTERVAL_BETWEEN_LOCATION_CLEANING = 480 ; // Switch to 8 minutes -> see if it changes
time_t now = time(NULL) ;
@ -185,6 +186,8 @@ void p3PeerMgrIMPL::tick()
{
std::cerr << "p3PeerMgrIMPL::tick(): cleaning unused locations." << std::endl ;
rslog(RSL_WARNING, p3peermgrzone, "p3PeerMgr::tick() cleanUnusedLocations()");
rsPeers->cleanUnusedLocations() ;
last_friends_check = now ;
}
@ -354,6 +357,10 @@ bool p3PeerMgrIMPL::haveOnceConnected()
bool p3PeerMgrIMPL::addFriend(const std::string &id, const std::string &gpg_id, uint32_t netMode, uint32_t visState, time_t lastContact)
{
bool notifyLinkMgr = false;
rslog(RSL_WARNING, p3peermgrzone, "p3PeerMgr::addFriend() id: " + id);
{
RsStackMutex stack(mPeerMtx); /****** STACK LOCK MUTEX *******/
@ -472,6 +479,8 @@ bool p3PeerMgrIMPL::removeFriend(const std::string &id)
std::cerr << "p3PeerMgrIMPL::removeFriend() mFriendList.size() : " << mFriendList.size() << std::endl;
#endif
rslog(RSL_WARNING, p3peermgrzone, "p3PeerMgr::removeFriend() id: " + id);
std::list<std::string> toRemove; // This is a list of SSLIds.
{

View File

@ -219,7 +219,8 @@ void pqiNetStateBox::reset()
}
#define NETSTATE_PARAM_TIMEOUT 600
//#define NETSTATE_PARAM_TIMEOUT 600
#define NETSTATE_PARAM_TIMEOUT 900 // Change to 15 minutes -> see if it has effect on reconnect time
#define NETSTATE_TIMEOUT 60
/* check/update Net State */

View File

@ -117,7 +117,7 @@ int pqiperson::tick()
(time(NULL) - lastHeartbeatReceived) > HEARTBEAT_REPEAT_TIME * 5)
{
std::ostringstream out;
out << "pqiperson::tick() No heartbeat from the peer, assume connection is dead. LastHeartbeat was: ";
out << "pqiperson::tick() No heartbeat from the peer, assume connection is dead. calling pqissl::reset(), LastHeartbeat was: ";
out << time(NULL) - lastHeartbeatReceived << " secs ago";
pqioutput(PQL_WARNING, pqipersonzone, out.str());
this->reset();
@ -307,9 +307,8 @@ int pqiperson::reset()
{
{
std::ostringstream out;
out << "pqiperson::reset() Id: " << PeerId();
out << std::endl;
pqioutput(PQL_DEBUG_BASIC, pqipersonzone, out.str());
out << "pqiperson::reset() resetting all pqiconnect for Id: " << PeerId();
pqioutput(PQL_WARNING, pqipersonzone, out.str());
}
std::map<uint32_t, pqiconnect *>::iterator it;
@ -387,6 +386,7 @@ int pqiperson::connect(uint32_t type, struct sockaddr_in raddr,
uint32_t delay, uint32_t period, uint32_t timeout, uint32_t flags, uint32_t bandwidth)
{
#ifdef PERSON_DEBUG
#endif
{
std::ostringstream out;
out << "pqiperson::connect() Id: " << PeerId();
@ -400,10 +400,9 @@ int pqiperson::connect(uint32_t type, struct sockaddr_in raddr,
out << " flags: " << flags;
out << " bandwidth: " << bandwidth;
out << std::endl;
std::cerr << out.str();
//pqioutput(PQL_DEBUG_BASIC, pqipersonzone, out.str());
//std::cerr << out.str();
pqioutput(PQL_WARNING, pqipersonzone, out.str());
}
#endif
std::map<uint32_t, pqiconnect *>::iterator it;
@ -429,6 +428,7 @@ int pqiperson::connect(uint32_t type, struct sockaddr_in raddr,
#ifdef PERSON_DEBUG
#endif
/* set the parameters */
pqioutput(PQL_WARNING, pqipersonzone, "pqiperson::connect reset() before connection attempt");
(it->second)->reset();
#ifdef PERSON_DEBUG

View File

@ -415,6 +415,7 @@ int pqipersongrp::addPeer(std::string id)
sm -> sp = secpolicy_create();
// reset it to start it working.
pqioutput(PQL_WARNING, pqipersongrpzone, "pqipersongrp::addPeer() => reset() called to initialise new person");
pqip -> reset();
pqip -> listen();
} /* UNLOCKED */
@ -443,6 +444,7 @@ int pqipersongrp::removePeer(std::string id)
secpolicy_delete(mod -> sp);
pqiperson *p = (pqiperson *) mod -> pqi;
p -> stoplistening();
pqioutput(PQL_WARNING, pqipersongrpzone, "pqipersongrp::removePeer() => reset() called before deleting person");
p -> reset();
delete p;
mods.erase(it);

View File

@ -139,6 +139,8 @@ pqissl::pqissl(pqissllistener *l, PQInterface *parent, p3LinkMgr *lm)
rslog(RSL_ALERT, pqisslzone,
"pqissl::~pqissl -> destroying pqissl");
stoplistening(); /* remove from pqissllistener only */
rslog(RSL_ALERT, pqisslzone, "pqissl::~pqissl() -> calling reset()");
reset();
return;
}
@ -177,6 +179,7 @@ int pqissl::stoplistening()
int pqissl::disconnect()
{
rslog(RSL_ALERT, pqisslzone, "pqissl::disconnect() -> calling reset()");
return reset();
}
@ -188,6 +191,7 @@ int pqissl::getConnectAddress(struct sockaddr_in &raddr) {
/* BinInterface version of reset() for pqistreamer */
int pqissl::close()
{
rslog(RSL_ALERT, pqisslzone, "pqissl::close() -> calling reset()");
return reset();
}
@ -441,7 +445,7 @@ int pqissl::ConnectAttempt()
default:
rslog(RSL_ALERT, pqisslzone,
"pqissl::ConnectAttempt() STATE = Unknown - Reset");
"pqissl::ConnectAttempt() STATE = Unknown - calling reset()");
reset();
break;
@ -775,6 +779,7 @@ int pqissl::Basic_Connection_Complete()
rslog(RSL_WARNING, pqisslzone, out.str());
/* as sockfd is valid, this should close it all up */
rslog(RSL_ALERT, pqisslzone, "pqissl::Basic_Connection_Complete() -> calling reset()");
reset();
return -1;
}
@ -791,6 +796,7 @@ int pqissl::Basic_Connection_Complete()
{
rslog(RSL_ALERT, pqisslzone,
"pqissl::Basic_Connection_Complete() problem with the socket descriptor. Aborting");
rslog(RSL_ALERT, pqisslzone, "pqissl::Basic_Connection_Complete() -> calling reset()");
reset();
return -1;
}
@ -1091,6 +1097,7 @@ int pqissl::SSL_Connection_Complete()
// attempt real error.
Extract_Failed_SSL_Certificate();
rslog(RSL_ALERT, pqisslzone, "pqissl::SSL_Connection_Complete() -> calling reset()");
reset();
waiting = WAITING_FAIL_INTERFACE;
@ -1156,9 +1163,10 @@ int pqissl::Authorise_SSL_Connection()
if (time(NULL) > ssl_connect_timeout)
{
rslog(RSL_DEBUG_BASIC, pqisslzone,
rslog(RSL_WARNING, pqisslzone,
"pqissl::Authorise_SSL_Connection() Connection Timed Out!");
/* as sockfd is valid, this should close it all up */
rslog(RSL_ALERT, pqisslzone, "pqissl::Authorise_Connection_Complete() -> calling reset()");
reset();
}
@ -1181,6 +1189,7 @@ int pqissl::Authorise_SSL_Connection()
rslog(RSL_WARNING, pqisslzone,
"pqissl::Authorise_SSL_Connection() Peer Didnt Give Cert");
rslog(RSL_ALERT, pqisslzone, "pqissl::Authorise_Connection_Complete() -> calling reset()");
// Failed completely
reset();
return -1;
@ -1192,6 +1201,7 @@ int pqissl::Authorise_SSL_Connection()
rslog(RSL_WARNING, pqisslzone,
"pqissl::Authorise_SSL_Connection() the cert Id doesn't match the Peer id we're trying to connect to.");
rslog(RSL_ALERT, pqisslzone, "pqissl::Authorise_Connection_Complete() -> calling reset()");
// Failed completely
reset();
return -1;
@ -1229,6 +1239,7 @@ int pqissl::Authorise_SSL_Connection()
}
// else shutdown ssl connection.
rslog(RSL_ALERT, pqisslzone, "pqissl::Authorise_Connection_Complete() -> calling reset()");
reset();
return 0;
@ -1284,6 +1295,7 @@ int pqissl::accept(SSL *ssl, int fd, struct sockaddr_in foreign_addr) // initiat
rslog(RSL_ALERT, pqisslzone,
"pqissl::accept() STATE = Unknown - ignore?");
rslog(RSL_ALERT, pqisslzone, "pqissl::accept() -> calling reset()");
reset();
break;
}
@ -1368,6 +1380,7 @@ int pqissl::accept(SSL *ssl, int fd, struct sockaddr_in foreign_addr) // initiat
active = false;
waiting = WAITING_FAIL_INTERFACE;
// failed completely.
rslog(RSL_ALERT, pqisslzone, "pqissl::accept() -> calling reset()");
reset();
return -1;
}
@ -1443,6 +1456,7 @@ int pqissl::senddata(void *data, int len)
rslog(RSL_ALERT, pqisslzone, out2.str());
}
rslog(RSL_ALERT, pqisslzone, "pqissl::senddata() -> calling reset()");
reset();
return -1;
}
@ -1473,6 +1487,7 @@ int pqissl::senddata(void *data, int len)
std::cerr << out.str() ;
rslog(RSL_ALERT, pqisslzone, out.str());
rslog(RSL_ALERT, pqisslzone, "pqissl::senddata() -> calling reset()");
reset();
return -1;
}
@ -1551,6 +1566,8 @@ int pqissl::readdata(void *data, int len)
{
out << "Count passed Limit, shutting down!";
out << " ReadZero Age: " << time(NULL) - mReadZeroTS;
rslog(RSL_ALERT, pqisslzone, "pqissl::readdata() -> calling reset()");
reset();
}
@ -1587,6 +1604,7 @@ int pqissl::readdata(void *data, int len)
rslog(RSL_ALERT, pqisslzone, out2.str());
}
rslog(RSL_ALERT, pqisslzone, "pqissl::readdata() -> calling reset()");
reset();
std::cerr << out.str() << std::endl ;
return -1;
@ -1613,6 +1631,8 @@ int pqissl::readdata(void *data, int len)
out << "\tResetting!";
rslog(RSL_ALERT, pqisslzone, out.str());
std::cerr << out.str() << std::endl ;
rslog(RSL_ALERT, pqisslzone, "pqissl::readdata() -> calling reset()");
reset();
return -1;
}
@ -1692,6 +1712,7 @@ bool pqissl::moretoread()
// this is a definite bad socket!.
// reset.
rslog(RSL_ALERT, pqisslzone, "pqissl::moretoread() -> calling reset()");
reset();
return 0;
}
@ -1761,6 +1782,7 @@ bool pqissl::cansend()
// this is a definite bad socket!.
// reset.
rslog(RSL_ALERT, pqisslzone, "pqissl::cansend() -> calling reset()");
reset();
return 0;
}

View File

@ -509,14 +509,16 @@ void p3disc::sendOwnVersion(std::string to)
void p3disc::sendHeartbeat(std::string to)
{
{
#ifdef P3DISC_DEBUG
std::ostringstream out;
out << "p3disc::sendHeartbeat()";
out << " Sending tick to : " << to << std::endl;
out << " to : " << to;
#ifdef P3DISC_DEBUG
std::cerr << out.str() << std::endl;
#endif
rslog(RSL_WARNING, pqidisczone, out.str());
}
RsDiscHeartbeat *di = new RsDiscHeartbeat();
di->PeerId(to);