From aa63ca1aa634361f768116cabfcb02d536820793 Mon Sep 17 00:00:00 2001 From: drbob Date: Sat, 6 Aug 2011 12:27:23 +0000 Subject: [PATCH] Added Further debugging to try and catch this 10minute reset bug. * log p3disc::sendHeartbeat(), successful p3linkmgr::connectAttempt() * log reason for each pqissl::reset(). * log linkmgr & peermgr addFriend() and removeFriend(). * log pqiperson::connect(), p3PeerMgr::cleanUnusedLocations(). * Modify timing of cleanUnusedLocations (=>8min) & NETSTATE_PARAM_TIMEOUT (=>15min) (see if it has an effect). git-svn-id: http://svn.code.sf.net/p/retroshare/code/trunk@4539 b45a01b8-16f6-495d-af2f-9b41ad6348cc --- libretroshare/src/pqi/p3linkmgr.cc | 10 ++++++++++ libretroshare/src/pqi/p3peermgr.cc | 15 +++++++++++--- libretroshare/src/pqi/pqinetstatebox.cc | 3 ++- libretroshare/src/pqi/pqiperson.cc | 14 ++++++------- libretroshare/src/pqi/pqipersongrp.cc | 2 ++ libretroshare/src/pqi/pqissl.cc | 26 +++++++++++++++++++++++-- libretroshare/src/services/p3disc.cc | 6 ++++-- 7 files changed, 61 insertions(+), 15 deletions(-) diff --git a/libretroshare/src/pqi/p3linkmgr.cc b/libretroshare/src/pqi/p3linkmgr.cc index 72a875d13..cc1f18cf3 100644 --- a/libretroshare/src/pqi/p3linkmgr.cc +++ b/libretroshare/src/pqi/p3linkmgr.cc @@ -628,8 +628,13 @@ bool p3LinkMgrIMPL::connectAttempt(const std::string &id, struct sockaddr_in &ra std::cerr << "p3LinkMgrIMPL::connectAttempt() Already FLAGGED as connected!!!!" << std::endl; std::cerr << "p3LinkMgrIMPL::connectAttempt() But allowing anyway!!!" << std::endl; #endif + + rslog(RSL_WARNING, p3connectzone, "p3LinkMgrIMPL::connectAttempt() ERROR ALREADY CONNECTED"); + } + rslog(RSL_WARNING, p3connectzone, "p3LinkMgrIMPL::connectAttempt() called id: " + id); + it->second.lastattempt = time(NULL); it->second.inConnAttempt = true; it->second.currentConnAddrAttempt = it->second.connAddrs.front(); @@ -1237,6 +1242,7 @@ void p3LinkMgrIMPL::peerConnectRequest(std::string id, struct sockaddr_in rad #ifdef LINKMGR_DEBUG std::cerr << "p3LinkMgrIMPL::peerConnectRequest() ERROR Peer Already Connected" << std::endl; #endif + return; } /* setup specific attempt for DHT found address. */ locked_ConnectAttempt_SpecificAddress(&(it->second), &raddr); @@ -1932,6 +1938,8 @@ bool p3LinkMgrIMPL::locked_ConnectAttempt_Complete(peerConnectState *peer) int p3LinkMgrIMPL::addFriend(const std::string &id, bool isVisible) { + rslog(RSL_WARNING, p3connectzone, "p3LinkMgr::addFriend() id: " + id); + { RsStackMutex stack(mLinkMtx); /****** STACK LOCK MUTEX *******/ @@ -1970,6 +1978,8 @@ int p3LinkMgrIMPL::addFriend(const std::string &id, bool isVisible) int p3LinkMgrIMPL::removeFriend(const std::string &id) { + rslog(RSL_WARNING, p3connectzone, "p3LinkMgr::removeFriend() id: " + id); + { RsStackMutex stack(mLinkMtx); /****** STACK LOCK MUTEX *******/ diff --git a/libretroshare/src/pqi/p3peermgr.cc b/libretroshare/src/pqi/p3peermgr.cc index 48c325ffc..5445bd90f 100644 --- a/libretroshare/src/pqi/p3peermgr.cc +++ b/libretroshare/src/pqi/p3peermgr.cc @@ -37,8 +37,8 @@ //#include "util/dnsresolver.h" #include "util/rsprint.h" -//#include "util/rsdebug.h" -//const int p3connectzone = 3431; +#include "util/rsdebug.h" +const int p3peermgrzone = 9531; #include "serialiser/rsconfigitems.h" #include "pqi/pqinotify.h" @@ -177,7 +177,8 @@ void p3PeerMgrIMPL::tick() { static time_t last_friends_check = time(NULL) ; - static const time_t INTERVAL_BETWEEN_LOCATION_CLEANING = 600 ; // Remove unused locations every 10 minutes. + //static const time_t INTERVAL_BETWEEN_LOCATION_CLEANING = 600 ; // Remove unused locations every 10 minutes. + static const time_t INTERVAL_BETWEEN_LOCATION_CLEANING = 480 ; // Switch to 8 minutes -> see if it changes time_t now = time(NULL) ; @@ -185,6 +186,8 @@ void p3PeerMgrIMPL::tick() { std::cerr << "p3PeerMgrIMPL::tick(): cleaning unused locations." << std::endl ; + rslog(RSL_WARNING, p3peermgrzone, "p3PeerMgr::tick() cleanUnusedLocations()"); + rsPeers->cleanUnusedLocations() ; last_friends_check = now ; } @@ -354,6 +357,10 @@ bool p3PeerMgrIMPL::haveOnceConnected() bool p3PeerMgrIMPL::addFriend(const std::string &id, const std::string &gpg_id, uint32_t netMode, uint32_t visState, time_t lastContact) { bool notifyLinkMgr = false; + + rslog(RSL_WARNING, p3peermgrzone, "p3PeerMgr::addFriend() id: " + id); + + { RsStackMutex stack(mPeerMtx); /****** STACK LOCK MUTEX *******/ @@ -472,6 +479,8 @@ bool p3PeerMgrIMPL::removeFriend(const std::string &id) std::cerr << "p3PeerMgrIMPL::removeFriend() mFriendList.size() : " << mFriendList.size() << std::endl; #endif + rslog(RSL_WARNING, p3peermgrzone, "p3PeerMgr::removeFriend() id: " + id); + std::list toRemove; // This is a list of SSLIds. { diff --git a/libretroshare/src/pqi/pqinetstatebox.cc b/libretroshare/src/pqi/pqinetstatebox.cc index 38129041b..060e153c4 100644 --- a/libretroshare/src/pqi/pqinetstatebox.cc +++ b/libretroshare/src/pqi/pqinetstatebox.cc @@ -219,7 +219,8 @@ void pqiNetStateBox::reset() } -#define NETSTATE_PARAM_TIMEOUT 600 +//#define NETSTATE_PARAM_TIMEOUT 600 +#define NETSTATE_PARAM_TIMEOUT 900 // Change to 15 minutes -> see if it has effect on reconnect time #define NETSTATE_TIMEOUT 60 /* check/update Net State */ diff --git a/libretroshare/src/pqi/pqiperson.cc b/libretroshare/src/pqi/pqiperson.cc index 44a5cee65..a514404ed 100644 --- a/libretroshare/src/pqi/pqiperson.cc +++ b/libretroshare/src/pqi/pqiperson.cc @@ -117,7 +117,7 @@ int pqiperson::tick() (time(NULL) - lastHeartbeatReceived) > HEARTBEAT_REPEAT_TIME * 5) { std::ostringstream out; - out << "pqiperson::tick() No heartbeat from the peer, assume connection is dead. LastHeartbeat was: "; + out << "pqiperson::tick() No heartbeat from the peer, assume connection is dead. calling pqissl::reset(), LastHeartbeat was: "; out << time(NULL) - lastHeartbeatReceived << " secs ago"; pqioutput(PQL_WARNING, pqipersonzone, out.str()); this->reset(); @@ -307,9 +307,8 @@ int pqiperson::reset() { { std::ostringstream out; - out << "pqiperson::reset() Id: " << PeerId(); - out << std::endl; - pqioutput(PQL_DEBUG_BASIC, pqipersonzone, out.str()); + out << "pqiperson::reset() resetting all pqiconnect for Id: " << PeerId(); + pqioutput(PQL_WARNING, pqipersonzone, out.str()); } std::map::iterator it; @@ -387,6 +386,7 @@ int pqiperson::connect(uint32_t type, struct sockaddr_in raddr, uint32_t delay, uint32_t period, uint32_t timeout, uint32_t flags, uint32_t bandwidth) { #ifdef PERSON_DEBUG +#endif { std::ostringstream out; out << "pqiperson::connect() Id: " << PeerId(); @@ -400,10 +400,9 @@ int pqiperson::connect(uint32_t type, struct sockaddr_in raddr, out << " flags: " << flags; out << " bandwidth: " << bandwidth; out << std::endl; - std::cerr << out.str(); - //pqioutput(PQL_DEBUG_BASIC, pqipersonzone, out.str()); + //std::cerr << out.str(); + pqioutput(PQL_WARNING, pqipersonzone, out.str()); } -#endif std::map::iterator it; @@ -429,6 +428,7 @@ int pqiperson::connect(uint32_t type, struct sockaddr_in raddr, #ifdef PERSON_DEBUG #endif /* set the parameters */ + pqioutput(PQL_WARNING, pqipersonzone, "pqiperson::connect reset() before connection attempt"); (it->second)->reset(); #ifdef PERSON_DEBUG diff --git a/libretroshare/src/pqi/pqipersongrp.cc b/libretroshare/src/pqi/pqipersongrp.cc index 9331a3c76..d0817cce4 100644 --- a/libretroshare/src/pqi/pqipersongrp.cc +++ b/libretroshare/src/pqi/pqipersongrp.cc @@ -415,6 +415,7 @@ int pqipersongrp::addPeer(std::string id) sm -> sp = secpolicy_create(); // reset it to start it working. + pqioutput(PQL_WARNING, pqipersongrpzone, "pqipersongrp::addPeer() => reset() called to initialise new person"); pqip -> reset(); pqip -> listen(); } /* UNLOCKED */ @@ -443,6 +444,7 @@ int pqipersongrp::removePeer(std::string id) secpolicy_delete(mod -> sp); pqiperson *p = (pqiperson *) mod -> pqi; p -> stoplistening(); + pqioutput(PQL_WARNING, pqipersongrpzone, "pqipersongrp::removePeer() => reset() called before deleting person"); p -> reset(); delete p; mods.erase(it); diff --git a/libretroshare/src/pqi/pqissl.cc b/libretroshare/src/pqi/pqissl.cc index fab8f940e..e1c43d0fc 100644 --- a/libretroshare/src/pqi/pqissl.cc +++ b/libretroshare/src/pqi/pqissl.cc @@ -139,6 +139,8 @@ pqissl::pqissl(pqissllistener *l, PQInterface *parent, p3LinkMgr *lm) rslog(RSL_ALERT, pqisslzone, "pqissl::~pqissl -> destroying pqissl"); stoplistening(); /* remove from pqissllistener only */ + + rslog(RSL_ALERT, pqisslzone, "pqissl::~pqissl() -> calling reset()"); reset(); return; } @@ -177,6 +179,7 @@ int pqissl::stoplistening() int pqissl::disconnect() { + rslog(RSL_ALERT, pqisslzone, "pqissl::disconnect() -> calling reset()"); return reset(); } @@ -188,6 +191,7 @@ int pqissl::getConnectAddress(struct sockaddr_in &raddr) { /* BinInterface version of reset() for pqistreamer */ int pqissl::close() { + rslog(RSL_ALERT, pqisslzone, "pqissl::close() -> calling reset()"); return reset(); } @@ -441,7 +445,7 @@ int pqissl::ConnectAttempt() default: rslog(RSL_ALERT, pqisslzone, - "pqissl::ConnectAttempt() STATE = Unknown - Reset"); + "pqissl::ConnectAttempt() STATE = Unknown - calling reset()"); reset(); break; @@ -775,6 +779,7 @@ int pqissl::Basic_Connection_Complete() rslog(RSL_WARNING, pqisslzone, out.str()); /* as sockfd is valid, this should close it all up */ + rslog(RSL_ALERT, pqisslzone, "pqissl::Basic_Connection_Complete() -> calling reset()"); reset(); return -1; } @@ -791,6 +796,7 @@ int pqissl::Basic_Connection_Complete() { rslog(RSL_ALERT, pqisslzone, "pqissl::Basic_Connection_Complete() problem with the socket descriptor. Aborting"); + rslog(RSL_ALERT, pqisslzone, "pqissl::Basic_Connection_Complete() -> calling reset()"); reset(); return -1; } @@ -1091,6 +1097,7 @@ int pqissl::SSL_Connection_Complete() // attempt real error. Extract_Failed_SSL_Certificate(); + rslog(RSL_ALERT, pqisslzone, "pqissl::SSL_Connection_Complete() -> calling reset()"); reset(); waiting = WAITING_FAIL_INTERFACE; @@ -1156,9 +1163,10 @@ int pqissl::Authorise_SSL_Connection() if (time(NULL) > ssl_connect_timeout) { - rslog(RSL_DEBUG_BASIC, pqisslzone, + rslog(RSL_WARNING, pqisslzone, "pqissl::Authorise_SSL_Connection() Connection Timed Out!"); /* as sockfd is valid, this should close it all up */ + rslog(RSL_ALERT, pqisslzone, "pqissl::Authorise_Connection_Complete() -> calling reset()"); reset(); } @@ -1181,6 +1189,7 @@ int pqissl::Authorise_SSL_Connection() rslog(RSL_WARNING, pqisslzone, "pqissl::Authorise_SSL_Connection() Peer Didnt Give Cert"); + rslog(RSL_ALERT, pqisslzone, "pqissl::Authorise_Connection_Complete() -> calling reset()"); // Failed completely reset(); return -1; @@ -1192,6 +1201,7 @@ int pqissl::Authorise_SSL_Connection() rslog(RSL_WARNING, pqisslzone, "pqissl::Authorise_SSL_Connection() the cert Id doesn't match the Peer id we're trying to connect to."); + rslog(RSL_ALERT, pqisslzone, "pqissl::Authorise_Connection_Complete() -> calling reset()"); // Failed completely reset(); return -1; @@ -1229,6 +1239,7 @@ int pqissl::Authorise_SSL_Connection() } // else shutdown ssl connection. + rslog(RSL_ALERT, pqisslzone, "pqissl::Authorise_Connection_Complete() -> calling reset()"); reset(); return 0; @@ -1284,6 +1295,7 @@ int pqissl::accept(SSL *ssl, int fd, struct sockaddr_in foreign_addr) // initiat rslog(RSL_ALERT, pqisslzone, "pqissl::accept() STATE = Unknown - ignore?"); + rslog(RSL_ALERT, pqisslzone, "pqissl::accept() -> calling reset()"); reset(); break; } @@ -1368,6 +1380,7 @@ int pqissl::accept(SSL *ssl, int fd, struct sockaddr_in foreign_addr) // initiat active = false; waiting = WAITING_FAIL_INTERFACE; // failed completely. + rslog(RSL_ALERT, pqisslzone, "pqissl::accept() -> calling reset()"); reset(); return -1; } @@ -1443,6 +1456,7 @@ int pqissl::senddata(void *data, int len) rslog(RSL_ALERT, pqisslzone, out2.str()); } + rslog(RSL_ALERT, pqisslzone, "pqissl::senddata() -> calling reset()"); reset(); return -1; } @@ -1473,6 +1487,7 @@ int pqissl::senddata(void *data, int len) std::cerr << out.str() ; rslog(RSL_ALERT, pqisslzone, out.str()); + rslog(RSL_ALERT, pqisslzone, "pqissl::senddata() -> calling reset()"); reset(); return -1; } @@ -1551,6 +1566,8 @@ int pqissl::readdata(void *data, int len) { out << "Count passed Limit, shutting down!"; out << " ReadZero Age: " << time(NULL) - mReadZeroTS; + + rslog(RSL_ALERT, pqisslzone, "pqissl::readdata() -> calling reset()"); reset(); } @@ -1587,6 +1604,7 @@ int pqissl::readdata(void *data, int len) rslog(RSL_ALERT, pqisslzone, out2.str()); } + rslog(RSL_ALERT, pqisslzone, "pqissl::readdata() -> calling reset()"); reset(); std::cerr << out.str() << std::endl ; return -1; @@ -1613,6 +1631,8 @@ int pqissl::readdata(void *data, int len) out << "\tResetting!"; rslog(RSL_ALERT, pqisslzone, out.str()); std::cerr << out.str() << std::endl ; + + rslog(RSL_ALERT, pqisslzone, "pqissl::readdata() -> calling reset()"); reset(); return -1; } @@ -1692,6 +1712,7 @@ bool pqissl::moretoread() // this is a definite bad socket!. // reset. + rslog(RSL_ALERT, pqisslzone, "pqissl::moretoread() -> calling reset()"); reset(); return 0; } @@ -1761,6 +1782,7 @@ bool pqissl::cansend() // this is a definite bad socket!. // reset. + rslog(RSL_ALERT, pqisslzone, "pqissl::cansend() -> calling reset()"); reset(); return 0; } diff --git a/libretroshare/src/services/p3disc.cc b/libretroshare/src/services/p3disc.cc index dc6e8c1fa..82b05633e 100644 --- a/libretroshare/src/services/p3disc.cc +++ b/libretroshare/src/services/p3disc.cc @@ -509,14 +509,16 @@ void p3disc::sendOwnVersion(std::string to) void p3disc::sendHeartbeat(std::string to) { { -#ifdef P3DISC_DEBUG std::ostringstream out; out << "p3disc::sendHeartbeat()"; - out << " Sending tick to : " << to << std::endl; + out << " to : " << to; +#ifdef P3DISC_DEBUG std::cerr << out.str() << std::endl; #endif + rslog(RSL_WARNING, pqidisczone, out.str()); } + RsDiscHeartbeat *di = new RsDiscHeartbeat(); di->PeerId(to);