Merge bitcoin/bitcoin#22577: Close minor startup race between main and scheduler threads

703b1e612a Close minor startup race between main and scheduler threads (Larry Ruane)

Pull request description:

  This is a low-priority bug fix. The scheduler thread runs `CheckForStaleTipAndEvictPeers()` every 45 seconds (EXTRA_PEER_CHECK_INTERVAL). If its first run happens before the active chain is set up (`CChain::SetTip()`), `bitcoind` will assert:
  ```
  (...)
  2021-07-28T22:16:49Z init message: Loading block index…
  bitcoind: validation.cpp:4968: CChainState& ChainstateManager::ActiveChainstate() const: Assertion `m_active_chainstate' failed.
  Aborted (core dumped)
  ```
  I ran into this while using the debugger to investigate an unrelated problem. Single-stepping through threads with a debugger can cause the relative thread execution timing to be very different than usual. I don't think any automated tests are needed for this PR. I'll give reproduction steps in the next PR comment.

ACKs for top commit:
  MarcoFalke:
    cr ACK 703b1e612a
  tryphe:
    tested ACK 703b1e612a
  0xB10C:
    ACK 703b1e612a
  glozow:
    code review ACK 703b1e612a - it makes sense to me to start peerman's background tasks here, after `chainstate->LoadChainTip()` and `node.connman->Start()` have been called.

Tree-SHA512: 9316ad768cba3b171f62e2eb400e3790af66c47d1886d7965edb38d9710fc8c8f8e4fb38232811c9346732ce311d39f740c5c2aaf5f6ca390ddc48c51a8d633b
pull/22604/head
MarcoFalke 3 years ago
commit 5b2d8661c9
No known key found for this signature in database
GPG Key ID: CE2B75697E69A548

@ -1185,7 +1185,7 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
assert(!node.peerman);
node.peerman = PeerManager::make(chainparams, *node.connman, *node.addrman, node.banman.get(),
*node.scheduler, chainman, *node.mempool, ignores_incoming_txs);
chainman, *node.mempool, ignores_incoming_txs);
RegisterValidationInterface(node.peerman.get());
// sanitize comments per BIP-0014, format user agent and check total size
@ -1794,6 +1794,8 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
banman->DumpBanlist();
}, DUMP_BANS_INTERVAL);
if (node.peerman) node.peerman->StartScheduledTasks(*node.scheduler);
#if HAVE_SYSTEM
StartupNotify(args);
#endif

@ -292,7 +292,7 @@ class PeerManagerImpl final : public PeerManager
{
public:
PeerManagerImpl(const CChainParams& chainparams, CConnman& connman, CAddrMan& addrman,
BanMan* banman, CScheduler& scheduler, ChainstateManager& chainman,
BanMan* banman, ChainstateManager& chainman,
CTxMemPool& pool, bool ignore_incoming_txs);
/** Overridden from CValidationInterface. */
@ -309,6 +309,7 @@ public:
bool SendMessages(CNode* pto) override EXCLUSIVE_LOCKS_REQUIRED(pto->cs_sendProcessing);
/** Implement PeerManager */
void StartScheduledTasks(CScheduler& scheduler) override;
void CheckForStaleTipAndEvictPeers() override;
bool GetNodeStateStats(NodeId nodeid, CNodeStateStats& stats) const override;
bool IgnoresIncomingTxs() override { return m_ignore_incoming_txs; }
@ -1419,14 +1420,14 @@ bool PeerManagerImpl::BlockRequestAllowed(const CBlockIndex* pindex)
}
std::unique_ptr<PeerManager> PeerManager::make(const CChainParams& chainparams, CConnman& connman, CAddrMan& addrman,
BanMan* banman, CScheduler& scheduler, ChainstateManager& chainman,
BanMan* banman, ChainstateManager& chainman,
CTxMemPool& pool, bool ignore_incoming_txs)
{
return std::make_unique<PeerManagerImpl>(chainparams, connman, addrman, banman, scheduler, chainman, pool, ignore_incoming_txs);
return std::make_unique<PeerManagerImpl>(chainparams, connman, addrman, banman, chainman, pool, ignore_incoming_txs);
}
PeerManagerImpl::PeerManagerImpl(const CChainParams& chainparams, CConnman& connman, CAddrMan& addrman,
BanMan* banman, CScheduler& scheduler, ChainstateManager& chainman,
BanMan* banman, ChainstateManager& chainman,
CTxMemPool& pool, bool ignore_incoming_txs)
: m_chainparams(chainparams),
m_connman(connman),
@ -1435,6 +1436,10 @@ PeerManagerImpl::PeerManagerImpl(const CChainParams& chainparams, CConnman& conn
m_chainman(chainman),
m_mempool(pool),
m_ignore_incoming_txs(ignore_incoming_txs)
{
}
void PeerManagerImpl::StartScheduledTasks(CScheduler& scheduler)
{
// Stale tip checking and peer eviction are on two different timers, but we
// don't want them to get out of sync due to drift in the scheduler, so we

@ -38,10 +38,13 @@ class PeerManager : public CValidationInterface, public NetEventsInterface
{
public:
static std::unique_ptr<PeerManager> make(const CChainParams& chainparams, CConnman& connman, CAddrMan& addrman,
BanMan* banman, CScheduler& scheduler, ChainstateManager& chainman,
BanMan* banman, ChainstateManager& chainman,
CTxMemPool& pool, bool ignore_incoming_txs);
virtual ~PeerManager() { }
/** Begin running background tasks, should only be called once */
virtual void StartScheduledTasks(CScheduler& scheduler) = 0;
/** Get statistics from node state */
virtual bool GetNodeStateStats(NodeId nodeid, CNodeStateStats& stats) const = 0;

@ -53,7 +53,7 @@ BOOST_AUTO_TEST_CASE(outbound_slow_chain_eviction)
const CChainParams& chainparams = Params();
auto connman = std::make_unique<CConnman>(0x1337, 0x1337, *m_node.addrman);
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, nullptr,
*m_node.scheduler, *m_node.chainman, *m_node.mempool, false);
*m_node.chainman, *m_node.mempool, false);
// Mock an outbound peer
CAddress addr1(ip(0xa0b0c001), NODE_NONE);
@ -121,7 +121,7 @@ BOOST_AUTO_TEST_CASE(stale_tip_peer_management)
const CChainParams& chainparams = Params();
auto connman = std::make_unique<ConnmanTestMsg>(0x1337, 0x1337, *m_node.addrman);
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, nullptr,
*m_node.scheduler, *m_node.chainman, *m_node.mempool, false);
*m_node.chainman, *m_node.mempool, false);
constexpr int max_outbound_full_relay = MAX_OUTBOUND_FULL_RELAY_CONNECTIONS;
CConnman::Options options;
@ -194,7 +194,7 @@ BOOST_AUTO_TEST_CASE(peer_discouragement)
auto banman = std::make_unique<BanMan>(m_args.GetDataDirBase() / "banlist", nullptr, DEFAULT_MISBEHAVING_BANTIME);
auto connman = std::make_unique<ConnmanTestMsg>(0x1337, 0x1337, *m_node.addrman);
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, banman.get(),
*m_node.scheduler, *m_node.chainman, *m_node.mempool, false);
*m_node.chainman, *m_node.mempool, false);
CNetAddr tor_netaddr;
BOOST_REQUIRE(
@ -288,7 +288,7 @@ BOOST_AUTO_TEST_CASE(DoS_bantime)
auto banman = std::make_unique<BanMan>(m_args.GetDataDirBase() / "banlist", nullptr, DEFAULT_MISBEHAVING_BANTIME);
auto connman = std::make_unique<CConnman>(0x1337, 0x1337, *m_node.addrman);
auto peerLogic = PeerManager::make(chainparams, *connman, *m_node.addrman, banman.get(),
*m_node.scheduler, *m_node.chainman, *m_node.mempool, false);
*m_node.chainman, *m_node.mempool, false);
banman->ClearBanned();
int64_t nStartTime = GetTime();

@ -197,7 +197,7 @@ TestingSetup::TestingSetup(const std::string& chainName, const std::vector<const
m_node.banman = std::make_unique<BanMan>(m_args.GetDataDirBase() / "banlist", nullptr, DEFAULT_MISBEHAVING_BANTIME);
m_node.connman = std::make_unique<CConnman>(0x1337, 0x1337, *m_node.addrman); // Deterministic randomness for tests.
m_node.peerman = PeerManager::make(chainparams, *m_node.connman, *m_node.addrman,
m_node.banman.get(), *m_node.scheduler, *m_node.chainman,
m_node.banman.get(), *m_node.chainman,
*m_node.mempool, false);
{
CConnman::Options options;

Loading…
Cancel
Save