From 218697b645b23249c16afa29b4ada20c1739c502 Mon Sep 17 00:00:00 2001 From: Suhas Daftuar Date: Mon, 22 Apr 2019 12:17:20 -0400 Subject: [PATCH 1/5] Improve NOTFOUND comment --- src/net_processing.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 74e33189dc..f76e5d64a0 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -1455,12 +1455,19 @@ void static ProcessGetData(CNode* pfrom, const CChainParams& chainparams, CConnm if (!vNotFound.empty()) { // Let the peer know that we didn't find what it asked for, so it doesn't - // have to wait around forever. Currently only SPV clients actually care - // about this message: it's needed when they are recursively walking the - // dependencies of relevant unconfirmed transactions. SPV clients want to - // do that because they want to know about (and store and rebroadcast and - // risk analyze) the dependencies of transactions relevant to them, without - // having to download the entire memory pool. + // have to wait around forever. + // SPV clients care about this message: it's needed when they are + // recursively walking the dependencies of relevant unconfirmed + // transactions. SPV clients want to do that because they want to know + // about (and store and rebroadcast and risk analyze) the dependencies + // of transactions relevant to them, without having to download the + // entire memory pool. + // Also, other nodes can use these messages to automatically request a + // transaction from some other peer that annnounced it, and stop + // waiting for us to respond. + // In normal operation, we often send NOTFOUND messages for parents of + // transactions that we relay; if a peer is missing a parent, they may + // assume we have them and request the parents from us. connman->PushMessage(pfrom, msgMaker.Make(NetMsgType::NOTFOUND, vNotFound)); } } From 23163b759354b84c5a076e3e2ae6ae6338106035 Mon Sep 17 00:00:00 2001 From: Suhas Daftuar Date: Mon, 22 Apr 2019 12:10:16 -0400 Subject: [PATCH 2/5] Add an explicit memory bound to m_tx_process_time Previously there was an implicit bound based on the handling of m_tx_announced, but that approach is error-prone (particularly if we start automatically removing things from that set). --- src/net_processing.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/net_processing.cpp b/src/net_processing.cpp index f76e5d64a0..f114981cc7 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -699,7 +699,9 @@ void UpdateTxRequestTime(const uint256& txid, int64_t request_time) EXCLUSIVE_LO void RequestTx(CNodeState* state, const uint256& txid, int64_t nNow) EXCLUSIVE_LOCKS_REQUIRED(cs_main) { CNodeState::TxDownloadState& peer_download_state = state->m_tx_download; - if (peer_download_state.m_tx_announced.size() >= MAX_PEER_TX_ANNOUNCEMENTS || peer_download_state.m_tx_announced.count(txid)) { + if (peer_download_state.m_tx_announced.size() >= MAX_PEER_TX_ANNOUNCEMENTS || + peer_download_state.m_tx_process_time.size() >= MAX_PEER_TX_ANNOUNCEMENTS || + peer_download_state.m_tx_announced.count(txid)) { // Too many queued announcements from this peer, or we already have // this announcement return; From e32e08407e2781d881b9da92aa06494525ddd085 Mon Sep 17 00:00:00 2001 From: Suhas Daftuar Date: Tue, 16 Apr 2019 15:13:29 -0400 Subject: [PATCH 3/5] Remove NOTFOUND transactions from in-flight data structures This prevents a bug where the in-flight queue for our peers will not be drained, resulting in not downloading any new transactions from our peers. Thanks to ajtowns for reporting this bug. --- src/net_processing.cpp | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/net_processing.cpp b/src/net_processing.cpp index f114981cc7..60e16bc413 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -3120,8 +3120,27 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr } if (strCommand == NetMsgType::NOTFOUND) { - // We do not care about the NOTFOUND message, but logging an Unknown Command - // message would be undesirable as we transmit it ourselves. + // Remove the NOTFOUND transactions from the peer + LOCK(cs_main); + CNodeState *state = State(pfrom->GetId()); + std::vector vInv; + vRecv >> vInv; + if (vInv.size() <= MAX_PEER_TX_IN_FLIGHT + MAX_BLOCKS_IN_TRANSIT_PER_PEER) { + for (CInv &inv : vInv) { + if (inv.type == MSG_TX || inv.type == MSG_WITNESS_TX) { + // If we receive a NOTFOUND message for a txid we requested, erase + // it from our data structures for this peer. + auto in_flight_it = state->m_tx_download.m_tx_in_flight.find(inv.hash); + if (in_flight_it == state->m_tx_download.m_tx_in_flight.end()) { + // Skip any further work if this is a spurious NOTFOUND + // message. + continue; + } + state->m_tx_download.m_tx_in_flight.erase(in_flight_it); + state->m_tx_download.m_tx_announced.erase(inv.hash); + } + } + } return true; } From f635a3ba118aeaf172534f9f802721a6ef07cafc Mon Sep 17 00:00:00 2001 From: Suhas Daftuar Date: Tue, 16 Apr 2019 15:48:55 -0400 Subject: [PATCH 4/5] Expire old entries from the in-flight tx map If a peer hasn't responded to a getdata request, eventually time out the request and remove it from the in-flight data structures. This is to prevent any bugs in our handling of those in-flight data structures from filling up the in-flight map and preventing us from requesting more transactions (such as the NOTFOUND bug, fixed in a previous commit). Co-authored-by: Anthony Towns --- src/net_processing.cpp | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 60e16bc413..979f3c146c 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -70,11 +70,13 @@ static constexpr int32_t MAX_PEER_TX_IN_FLIGHT = 100; /** Maximum number of announced transactions from a peer */ static constexpr int32_t MAX_PEER_TX_ANNOUNCEMENTS = 2 * MAX_INV_SZ; /** How many microseconds to delay requesting transactions from inbound peers */ -static constexpr int64_t INBOUND_PEER_TX_DELAY = 2 * 1000000; +static constexpr int64_t INBOUND_PEER_TX_DELAY = 2 * 1000000; // 2 seconds /** How long to wait (in microseconds) before downloading a transaction from an additional peer */ -static constexpr int64_t GETDATA_TX_INTERVAL = 60 * 1000000; +static constexpr int64_t GETDATA_TX_INTERVAL = 60 * 1000000; // 1 minute /** Maximum delay (in microseconds) for transaction requests to avoid biasing some peers over others. */ -static constexpr int64_t MAX_GETDATA_RANDOM_DELAY = 2 * 1000000; +static constexpr int64_t MAX_GETDATA_RANDOM_DELAY = 2 * 1000000; // 2 seconds +/** How long to wait (in microseconds) before expiring an in-flight getdata request to a peer */ +static constexpr int64_t TX_EXPIRY_INTERVAL = 10 * GETDATA_TX_INTERVAL; static_assert(INBOUND_PEER_TX_DELAY >= MAX_GETDATA_RANDOM_DELAY, "To preserve security, MAX_GETDATA_RANDOM_DELAY should not exceed INBOUND_PEER_DELAY"); /** Limit to avoid sending big packets. Not used in processing incoming GETDATA for compatibility */ @@ -345,8 +347,11 @@ struct CNodeState { //! Store all the transactions a peer has recently announced std::set m_tx_announced; - //! Store transactions which were requested by us - std::set m_tx_in_flight; + //! Store transactions which were requested by us, with timestamp + std::map m_tx_in_flight; + + //! Periodically check for stuck getdata requests + int64_t m_check_expiry_timer{0}; }; TxDownloadState m_tx_download; @@ -3939,6 +3944,27 @@ bool PeerLogicValidation::SendMessages(CNode* pto) // // Message: getdata (non-blocks) // + + // For robustness, expire old requests after a long timeout, so that + // we can resume downloading transactions from a peer even if they + // were unresponsive in the past. + // Eventually we should consider disconnecting peers, but this is + // conservative. + if (state.m_tx_download.m_check_expiry_timer <= nNow) { + for (auto it=state.m_tx_download.m_tx_in_flight.begin(); it != state.m_tx_download.m_tx_in_flight.end();) { + if (it->second <= nNow - TX_EXPIRY_INTERVAL) { + LogPrint(BCLog::NET, "timeout of inflight tx %s from peer=%d\n", it->first.ToString(), pto->GetId()); + state.m_tx_download.m_tx_announced.erase(it->first); + state.m_tx_download.m_tx_in_flight.erase(it++); + } else { + ++it; + } + } + // On average, we do this check every TX_EXPIRY_INTERVAL. Randomize + // so that we're not doing this for all peers at the same time. + state.m_tx_download.m_check_expiry_timer = nNow + TX_EXPIRY_INTERVAL/2 + GetRand(TX_EXPIRY_INTERVAL); + } + auto& tx_process_time = state.m_tx_download.m_tx_process_time; while (!tx_process_time.empty() && tx_process_time.begin()->first <= nNow && state.m_tx_download.m_tx_in_flight.size() < MAX_PEER_TX_IN_FLIGHT) { const uint256& txid = tx_process_time.begin()->second; @@ -3955,7 +3981,7 @@ bool PeerLogicValidation::SendMessages(CNode* pto) vGetData.clear(); } UpdateTxRequestTime(inv.hash, nNow); - state.m_tx_download.m_tx_in_flight.insert(inv.hash); + state.m_tx_download.m_tx_in_flight.emplace(inv.hash, nNow); } else { // This transaction is in flight from someone else; queue // up processing to happen after the download times out From 308b76732f97020c86977e29c854e8e27262cf7c Mon Sep 17 00:00:00 2001 From: Suhas Daftuar Date: Mon, 22 Apr 2019 14:13:30 -0400 Subject: [PATCH 5/5] Fix bug around transaction requests If a transaction is already in-flight when a peer announces a new tx to us, we schedule a time in the future to reconsider whether to download. At that future time, there was a bug that would prevent transactions from being rescheduled for potential download again (ie if the transaction was still in-flight at the time of reconsideration, such as from some other peer). Fix this. --- src/net_processing.cpp | 43 ++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 979f3c146c..1ed7058811 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -700,6 +700,24 @@ void UpdateTxRequestTime(const uint256& txid, int64_t request_time) EXCLUSIVE_LO } } +int64_t CalculateTxGetDataTime(const uint256& txid, int64_t current_time, bool use_inbound_delay) EXCLUSIVE_LOCKS_REQUIRED(cs_main) +{ + int64_t process_time; + int64_t last_request_time = GetTxRequestTime(txid); + // First time requesting this tx + if (last_request_time == 0) { + process_time = current_time; + } else { + // Randomize the delay to avoid biasing some peers over others (such as due to + // fixed ordering of peer processing in ThreadMessageHandler) + process_time = last_request_time + GETDATA_TX_INTERVAL + GetRand(MAX_GETDATA_RANDOM_DELAY); + } + + // We delay processing announcements from inbound peers + if (use_inbound_delay) process_time += INBOUND_PEER_TX_DELAY; + + return process_time; +} void RequestTx(CNodeState* state, const uint256& txid, int64_t nNow) EXCLUSIVE_LOCKS_REQUIRED(cs_main) { @@ -713,19 +731,9 @@ void RequestTx(CNodeState* state, const uint256& txid, int64_t nNow) EXCLUSIVE_L } peer_download_state.m_tx_announced.insert(txid); - int64_t process_time; - int64_t last_request_time = GetTxRequestTime(txid); - // First time requesting this tx - if (last_request_time == 0) { - process_time = nNow; - } else { - // Randomize the delay to avoid biasing some peers over others (such as due to - // fixed ordering of peer processing in ThreadMessageHandler) - process_time = last_request_time + GETDATA_TX_INTERVAL + GetRand(MAX_GETDATA_RANDOM_DELAY); - } - - // We delay processing announcements from non-preferred (eg inbound) peers - if (!state->fPreferredDownload) process_time += INBOUND_PEER_TX_DELAY; + // Calculate the time to try requesting this transaction. Use + // fPreferredDownload as a proxy for outbound peers. + int64_t process_time = CalculateTxGetDataTime(txid, nNow, !state->fPreferredDownload); peer_download_state.m_tx_process_time.emplace(process_time, txid); } @@ -3967,7 +3975,10 @@ bool PeerLogicValidation::SendMessages(CNode* pto) auto& tx_process_time = state.m_tx_download.m_tx_process_time; while (!tx_process_time.empty() && tx_process_time.begin()->first <= nNow && state.m_tx_download.m_tx_in_flight.size() < MAX_PEER_TX_IN_FLIGHT) { - const uint256& txid = tx_process_time.begin()->second; + const uint256 txid = tx_process_time.begin()->second; + // Erase this entry from tx_process_time (it may be added back for + // processing at a later time, see below) + tx_process_time.erase(tx_process_time.begin()); CInv inv(MSG_TX | GetFetchFlags(pto), txid); if (!AlreadyHave(inv)) { // If this transaction was last requested more than 1 minute ago, @@ -3987,14 +3998,14 @@ bool PeerLogicValidation::SendMessages(CNode* pto) // up processing to happen after the download times out // (with a slight delay for inbound peers, to prefer // requests to outbound peers). - RequestTx(&state, txid, nNow); + int64_t next_process_time = CalculateTxGetDataTime(txid, nNow, !state.fPreferredDownload); + tx_process_time.emplace(next_process_time, txid); } } else { // We have already seen this transaction, no need to download. state.m_tx_download.m_tx_announced.erase(inv.hash); state.m_tx_download.m_tx_in_flight.erase(inv.hash); } - tx_process_time.erase(tx_process_time.begin()); }