From 1a1c23f8d40116741f0e26cdf22688fd91c923fc Mon Sep 17 00:00:00 2001 From: John Newbery Date: Mon, 15 Jun 2020 10:15:08 -0400 Subject: [PATCH 1/4] [net processing] Change cs_main TRY_LOCK to LOCK in SendMessages() This was changed to TRY_LOCK in #1117 to fix a potential deadlock between cs_main and cs_vSend. cs_vSend was split into cs_vSend and cs_sendProcessing in #9535 (and cs_sendProcessing was changed from a TRY_LOCK to a LOCK in the same PR). Since cs_vSend can no longer be taken before cs_main, revert this to a LOCK(). This commit leaves part of the code with bad indentation. That is fixed by the next (whitespace change only) commit. --- src/net_processing.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 8ef79cd719..f5cff6297d 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -3838,7 +3838,7 @@ public: bool PeerLogicValidation::SendMessages(CNode* pto) { const Consensus::Params& consensusParams = Params().GetConsensus(); - { + // Don't send anything until the version handshake is complete if (!pto->fSuccessfullyConnected || pto->fDisconnect) return true; @@ -3875,9 +3875,8 @@ bool PeerLogicValidation::SendMessages(CNode* pto) } } - TRY_LOCK(cs_main, lockMain); - if (!lockMain) - return true; + { + LOCK(cs_main); if (MaybeDiscourageAndDisconnect(*pto)) return true; @@ -4416,7 +4415,7 @@ bool PeerLogicValidation::SendMessages(CNode* pto) pto->m_tx_relay->nextSendTimeFeeFilter = timeNow + GetRandInt(MAX_FEEFILTER_CHANGE_DELAY) * 1000000; } } - } + } // release cs_main return true; } From a1d5a428a24afe4f600be29e9d0d3bb4c720e816 Mon Sep 17 00:00:00 2001 From: John Newbery Date: Mon, 15 Jun 2020 10:19:56 -0400 Subject: [PATCH 2/4] [net processing] Fix bad indentation in SendMessages() Hint for reviewers: review ignoring whitespace changes. --- src/net_processing.cpp | 64 +++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/src/net_processing.cpp b/src/net_processing.cpp index f5cff6297d..f8b22cd7c8 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -3839,41 +3839,41 @@ bool PeerLogicValidation::SendMessages(CNode* pto) { const Consensus::Params& consensusParams = Params().GetConsensus(); - // Don't send anything until the version handshake is complete - if (!pto->fSuccessfullyConnected || pto->fDisconnect) - return true; + // Don't send anything until the version handshake is complete + if (!pto->fSuccessfullyConnected || pto->fDisconnect) + return true; - // If we get here, the outgoing message serialization version is set and can't change. - const CNetMsgMaker msgMaker(pto->GetSendVersion()); + // If we get here, the outgoing message serialization version is set and can't change. + const CNetMsgMaker msgMaker(pto->GetSendVersion()); - // - // Message: ping - // - bool pingSend = false; - if (pto->fPingQueued) { - // RPC ping request by user - pingSend = true; - } - if (pto->nPingNonceSent == 0 && pto->m_ping_start.load() + PING_INTERVAL < GetTime()) { - // Ping automatically sent as a latency probe & keepalive. - pingSend = true; - } - if (pingSend) { - uint64_t nonce = 0; - while (nonce == 0) { - GetRandBytes((unsigned char*)&nonce, sizeof(nonce)); - } - pto->fPingQueued = false; - pto->m_ping_start = GetTime(); - if (pto->nVersion > BIP0031_VERSION) { - pto->nPingNonceSent = nonce; - connman->PushMessage(pto, msgMaker.Make(NetMsgType::PING, nonce)); - } else { - // Peer is too old to support ping command with nonce, pong will never arrive. - pto->nPingNonceSent = 0; - connman->PushMessage(pto, msgMaker.Make(NetMsgType::PING)); - } + // + // Message: ping + // + bool pingSend = false; + if (pto->fPingQueued) { + // RPC ping request by user + pingSend = true; + } + if (pto->nPingNonceSent == 0 && pto->m_ping_start.load() + PING_INTERVAL < GetTime()) { + // Ping automatically sent as a latency probe & keepalive. + pingSend = true; + } + if (pingSend) { + uint64_t nonce = 0; + while (nonce == 0) { + GetRandBytes((unsigned char*)&nonce, sizeof(nonce)); + } + pto->fPingQueued = false; + pto->m_ping_start = GetTime(); + if (pto->nVersion > BIP0031_VERSION) { + pto->nPingNonceSent = nonce; + connman->PushMessage(pto, msgMaker.Make(NetMsgType::PING, nonce)); + } else { + // Peer is too old to support ping command with nonce, pong will never arrive. + pto->nPingNonceSent = 0; + connman->PushMessage(pto, msgMaker.Make(NetMsgType::PING)); } + } { LOCK(cs_main); From a49781e56d2bd6a61ec027a09c1db9ee1a4abf2e Mon Sep 17 00:00:00 2001 From: John Newbery Date: Mon, 15 Jun 2020 09:28:56 -0400 Subject: [PATCH 3/4] [net processing] Only call MaybeDiscourageAndDisconnect from SendMessages `nMisbehavior` is a tally in `CNodeState` that can be incremented from anywhere. That almost always happens inside a `ProcessMessages()` call (because we increment the misbehavior score when receiving a bad messages from a peer), but not always. See, for example, the call to `MaybePunishNodeForBlock()` inside `BlockChecked()`, which is an asynchronous callback from the validation interface, executed on the scheduler thread. As long as `MaybeDiscourageAndDisconnect()` is called regularly for the node, then the misbehavior score exceeding the 100 threshold will eventually result in the peer being punished. It doesn't really matter where that `MaybeDiscourageAndDisconnect()` happens, but it makes most sense in `SendMessages()` which is where we do general peer housekeeping/maintenance. Therefore, remove the `MaybeDiscourageAndDisconnect()` call in `ProcessMessages()` and move the `MaybeDiscourageAndDisconnect()` call in `SendMessages()` to the top of the function. This moves it out of the cs_main lock scope, so take that lock directly inside `MaybeDiscourageAndDisconnect()`. Historic note: `MaybeDiscourageAndDisconnect()` was previously `SendRejectsAndCheckIfBanned()`, and before that was just sending rejects. All of those things required cs_main, which is why `MaybeDiscourageAndDisconnect()` was called after the ping logic. --- src/net_processing.cpp | 11 +++++------ src/net_processing.h | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/net_processing.cpp b/src/net_processing.cpp index f8b22cd7c8..570cca0c66 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -3559,7 +3559,7 @@ void ProcessMessage( bool PeerLogicValidation::MaybeDiscourageAndDisconnect(CNode& pnode) { - AssertLockHeld(cs_main); + LOCK(cs_main); CNodeState &state = *State(pnode.GetId()); if (state.m_should_discourage) { @@ -3675,9 +3675,6 @@ bool PeerLogicValidation::ProcessMessages(CNode* pfrom, std::atomic& inter LogPrint(BCLog::NET, "%s(%s, %u bytes): Unknown exception caught\n", __func__, SanitizeString(msg_type), nMessageSize); } - LOCK(cs_main); - MaybeDiscourageAndDisconnect(*pfrom); - return fMoreWork; } @@ -3839,6 +3836,10 @@ bool PeerLogicValidation::SendMessages(CNode* pto) { const Consensus::Params& consensusParams = Params().GetConsensus(); + // We must call MaybeDiscourageAndDisconnect first, to ensure that we'll + // disconnect misbehaving peers even before the version handshake is complete. + if (MaybeDiscourageAndDisconnect(*pto)) return true; + // Don't send anything until the version handshake is complete if (!pto->fSuccessfullyConnected || pto->fDisconnect) return true; @@ -3878,8 +3879,6 @@ bool PeerLogicValidation::SendMessages(CNode* pto) { LOCK(cs_main); - if (MaybeDiscourageAndDisconnect(*pto)) return true; - CNodeState &state = *State(pto->GetId()); // Address refresh broadcast diff --git a/src/net_processing.h b/src/net_processing.h index eadf29e59f..48bcbc1a6c 100644 --- a/src/net_processing.h +++ b/src/net_processing.h @@ -31,7 +31,7 @@ private: ChainstateManager& m_chainman; CTxMemPool& m_mempool; - bool MaybeDiscourageAndDisconnect(CNode& pnode) EXCLUSIVE_LOCKS_REQUIRED(cs_main); + bool MaybeDiscourageAndDisconnect(CNode& pnode); public: PeerLogicValidation(CConnman* connman, BanMan* banman, CScheduler& scheduler, ChainstateManager& chainman, CTxMemPool& pool); From 655b1957470c39bcab64917747c9f467444bd809 Mon Sep 17 00:00:00 2001 From: John Newbery Date: Mon, 15 Jun 2020 11:33:14 -0400 Subject: [PATCH 4/4] [net processing] Continue SendMessages processing if not disconnecting peer If we don't disconnect a peer in MaybeDiscourageAndDisconnect because it has NOBAN permissions or it's a manual connection, continue SendMessages processing rather than exiting early. The previous behaviour was that we'd miss the SendMessages processing on this iteration of the MessageHandler loop. That's not a problem since SendMessages() would just be called again on the next iteration, but it was slightly inefficient and confusing. --- src/net_processing.cpp | 57 +++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 570cca0c66..4321005212 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -3557,32 +3557,49 @@ void ProcessMessage( return; } +/** Maybe disconnect a peer and discourage future connections from its address. + * + * @param[in] pnode The node to check. + * @return True if the peer was marked for disconnection in this function + */ bool PeerLogicValidation::MaybeDiscourageAndDisconnect(CNode& pnode) { - LOCK(cs_main); - CNodeState &state = *State(pnode.GetId()); + NodeId peer_id{pnode.GetId()}; + { + LOCK(cs_main); + CNodeState &state = *State(peer_id); - if (state.m_should_discourage) { + // There's nothing to do if the m_should_discourage flag isn't set + if (!state.m_should_discourage) return false; + + // Reset m_should_discourage state.m_should_discourage = false; - if (pnode.HasPermission(PF_NOBAN)) { - LogPrintf("Warning: not punishing whitelisted peer %s!\n", pnode.addr.ToString()); - } else if (pnode.m_manual_connection) { - LogPrintf("Warning: not punishing manually-connected peer %s!\n", pnode.addr.ToString()); - } else if (pnode.addr.IsLocal()) { - // Disconnect but don't discourage this local node - LogPrintf("Warning: disconnecting but not discouraging local peer %s!\n", pnode.addr.ToString()); - pnode.fDisconnect = true; - } else { - // Disconnect and discourage all nodes sharing the address - LogPrintf("Disconnecting and discouraging peer %s!\n", pnode.addr.ToString()); - if (m_banman) { - m_banman->Discourage(pnode.addr); - } - connman->DisconnectNode(pnode.addr); - } + } // cs_main + + if (pnode.HasPermission(PF_NOBAN)) { + // Peer has the NOBAN permission flag - log but don't disconnect + LogPrintf("Warning: not punishing noban peer %d!\n", peer_id); + return false; + } + + if (pnode.m_manual_connection) { + // Peer is a manual connection - log but don't disconnect + LogPrintf("Warning: not punishing manually connected peer %d!\n", peer_id); + return false; + } + + if (pnode.addr.IsLocal()) { + // Peer is on a local address. Disconnect this peer, but don't discourage the local address + LogPrintf("Warning: disconnecting but not discouraging local peer %d!\n", peer_id); + pnode.fDisconnect = true; return true; } - return false; + + // Normal case: Disconnect the peer and discourage all nodes sharing the address + LogPrintf("Disconnecting and discouraging peer %d!\n", peer_id); + if (m_banman) m_banman->Discourage(pnode.addr); + connman->DisconnectNode(pnode.addr); + return true; } bool PeerLogicValidation::ProcessMessages(CNode* pfrom, std::atomic& interruptMsgProc)