Merge bitcoin/bitcoin#24832: index: Verify the block filter hash when reading the filter from disk.

e734228d85 Update GCSFilter benchmarks (Calvin Kim)
aee9a8140b Add GCSFilterDecodeSkipCheck benchmark (Patrick Strateman)
299023c1d9 Add GCSFilterDecode and GCSBlockFilterGetHash benchmarks. (Patrick Strateman)
b0a53d50d9 Make sanity check in GCSFilter constructor optional (Patrick Strateman)

Pull request description:

  This PR picks up the abandoned #19280

  BlockFilterIndex was depending on `GolombRiceDecode()` during the filter decode to sanity check that the filter wasn't corrupt. However, we can check for corruption by ensuring that the encoded blockfilter's hash matches up with the one stored in the index database.

  Benchmarks that were added in #19280 showed that checking the hash is much faster.

  The benchmarks were changed to nanobench and the relevant benchmarks were like below, showing a clear win for the hash check method.

  ```
  |             ns/elem |              elem/s |    err% |        ins/elem |       bra/elem |   miss% |     total | benchmark
  |--------------------:|--------------------:|--------:|----------------:|---------------:|--------:|----------:|:----------
  |              531.40 |        1,881,819.43 |    0.3% |        3,527.01 |         411.00 |    0.2% |      0.01 | `DecodeCheckedGCSFilter`
  |          258,220.50 |            3,872.66 |    0.1% |    2,990,092.00 |     586,706.00 |    1.7% |      0.01 | `DecodeGCSFilter`
  |           13,036.77 |           76,706.09 |    0.3% |       64,238.24 |         513.04 |    0.2% |      0.01 | `BlockFilterGetHash`
  ```

ACKs for top commit:
  mzumsande:
    Code Review ACK e734228d85
  theStack:
    Code-review ACK e734228d85
  stickies-v:
    ACK e734228d85
  ryanofsky:
    Code review ACK e734228d85, with caveat that I mostly paid attention to the main code, not the changes to the benchmark. Only changes since last review were changes to the benchmark code.

Tree-SHA512: 02b86eab7b554e1a57a15b17a4d6d71faa91b556c637b0da29f0c9ee76597a110be8e3b4d0c158d4cab04af0623de18b764837be0ec2a72afcfe1ad9c78a83c6
24.x
fanquake 2 years ago
commit 5abbc9afec
No known key found for this signature in database
GPG Key ID: 2EEB9F5CC09526C1

@ -5,39 +5,84 @@
#include <bench/bench.h>
#include <blockfilter.h>
static void ConstructGCSFilter(benchmark::Bench& bench)
static const GCSFilter::ElementSet GenerateGCSTestElements()
{
GCSFilter::ElementSet elements;
for (int i = 0; i < 10000; ++i) {
// Testing the benchmarks with different number of elements show that a filter
// with at least 100,000 elements results in benchmarks that have the same
// ns/op. This makes it easy to reason about how long (in nanoseconds) a single
// filter element takes to process.
for (int i = 0; i < 100000; ++i) {
GCSFilter::Element element(32);
element[0] = static_cast<unsigned char>(i);
element[1] = static_cast<unsigned char>(i >> 8);
elements.insert(std::move(element));
}
return elements;
}
static void GCSBlockFilterGetHash(benchmark::Bench& bench)
{
auto elements = GenerateGCSTestElements();
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
BlockFilter block_filter(BlockFilterType::BASIC, {}, filter.GetEncoded(), /*skip_decode_check=*/false);
bench.run([&] {
block_filter.GetHash();
});
}
static void GCSFilterConstruct(benchmark::Bench& bench)
{
auto elements = GenerateGCSTestElements();
uint64_t siphash_k0 = 0;
bench.batch(elements.size()).unit("elem").run([&] {
GCSFilter filter({siphash_k0, 0, 20, 1 << 20}, elements);
bench.run([&]{
GCSFilter filter({siphash_k0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
siphash_k0++;
});
}
static void MatchGCSFilter(benchmark::Bench& bench)
static void GCSFilterDecode(benchmark::Bench& bench)
{
GCSFilter::ElementSet elements;
for (int i = 0; i < 10000; ++i) {
GCSFilter::Element element(32);
element[0] = static_cast<unsigned char>(i);
element[1] = static_cast<unsigned char>(i >> 8);
elements.insert(std::move(element));
}
GCSFilter filter({0, 0, 20, 1 << 20}, elements);
auto elements = GenerateGCSTestElements();
bench.unit("elem").run([&] {
filter.Match(GCSFilter::Element());
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
auto encoded = filter.GetEncoded();
bench.run([&] {
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, encoded, /*skip_decode_check=*/false);
});
}
BENCHMARK(ConstructGCSFilter);
BENCHMARK(MatchGCSFilter);
static void GCSFilterDecodeSkipCheck(benchmark::Bench& bench)
{
auto elements = GenerateGCSTestElements();
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
auto encoded = filter.GetEncoded();
bench.run([&] {
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, encoded, /*skip_decode_check=*/true);
});
}
static void GCSFilterMatch(benchmark::Bench& bench)
{
auto elements = GenerateGCSTestElements();
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
bench.run([&] {
filter.Match(GCSFilter::Element());
});
}
BENCHMARK(GCSBlockFilterGetHash);
BENCHMARK(GCSFilterConstruct);
BENCHMARK(GCSFilterDecode);
BENCHMARK(GCSFilterDecodeSkipCheck);
BENCHMARK(GCSFilterMatch);

@ -47,7 +47,7 @@ GCSFilter::GCSFilter(const Params& params)
: m_params(params), m_N(0), m_F(0), m_encoded{0}
{}
GCSFilter::GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter)
GCSFilter::GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter, bool skip_decode_check)
: m_params(params), m_encoded(std::move(encoded_filter))
{
SpanReader stream{GCS_SER_TYPE, GCS_SER_VERSION, m_encoded};
@ -59,6 +59,8 @@ GCSFilter::GCSFilter(const Params& params, std::vector<unsigned char> encoded_fi
}
m_F = static_cast<uint64_t>(m_N) * static_cast<uint64_t>(m_params.m_M);
if (skip_decode_check) return;
// Verify that the encoded filter contains exactly N elements. If it has too much or too little
// data, a std::ios_base::failure exception will be raised.
BitStreamReader<SpanReader> bitreader{stream};
@ -219,14 +221,14 @@ static GCSFilter::ElementSet BasicFilterElements(const CBlock& block,
}
BlockFilter::BlockFilter(BlockFilterType filter_type, const uint256& block_hash,
std::vector<unsigned char> filter)
std::vector<unsigned char> filter, bool skip_decode_check)
: m_filter_type(filter_type), m_block_hash(block_hash)
{
GCSFilter::Params params;
if (!BuildParams(params)) {
throw std::invalid_argument("unknown filter_type");
}
m_filter = GCSFilter(params, std::move(filter));
m_filter = GCSFilter(params, std::move(filter), skip_decode_check);
}
BlockFilter::BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo)

@ -59,7 +59,7 @@ public:
explicit GCSFilter(const Params& params = Params());
/** Reconstructs an already-created filter from an encoding. */
GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter);
GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter, bool skip_decode_check);
/** Builds a new filter from the params and set of elements. */
GCSFilter(const Params& params, const ElementSet& elements);
@ -122,7 +122,7 @@ public:
//! Reconstruct a BlockFilter from parts.
BlockFilter(BlockFilterType filter_type, const uint256& block_hash,
std::vector<unsigned char> filter);
std::vector<unsigned char> filter, bool skip_decode_check);
//! Construct a new BlockFilter of the specified type from a block.
BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo);
@ -164,7 +164,7 @@ public:
if (!BuildParams(params)) {
throw std::ios_base::failure("unknown filter_type");
}
m_filter = GCSFilter(params, std::move(encoded_filter));
m_filter = GCSFilter(params, std::move(encoded_filter), /*skip_decode_check=*/false);
}
};

@ -5,6 +5,7 @@
#include <map>
#include <dbwrapper.h>
#include <hash.h>
#include <index/blockfilterindex.h>
#include <node/blockstorage.h>
#include <util/system.h>
@ -143,18 +144,22 @@ bool BlockFilterIndex::CommitInternal(CDBBatch& batch)
return BaseIndex::CommitInternal(batch);
}
bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, BlockFilter& filter) const
bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const
{
CAutoFile filein(m_filter_fileseq->Open(pos, true), SER_DISK, CLIENT_VERSION);
if (filein.IsNull()) {
return false;
}
// Check that the hash of the encoded_filter matches the one stored in the db.
uint256 block_hash;
std::vector<uint8_t> encoded_filter;
try {
filein >> block_hash >> encoded_filter;
filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter));
uint256 result;
CHash256().Write(encoded_filter).Finalize(result);
if (result != hash) return error("Checksum mismatch in filter decode.");
filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter), /*skip_decode_check=*/true);
}
catch (const std::exception& e) {
return error("%s: Failed to deserialize block filter from disk: %s", __func__, e.what());
@ -381,7 +386,7 @@ bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter&
return false;
}
return ReadFilterFromDisk(entry.pos, filter_out);
return ReadFilterFromDisk(entry.pos, entry.hash, filter_out);
}
bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out)
@ -425,7 +430,7 @@ bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* st
filters_out.resize(entries.size());
auto filter_pos_it = filters_out.begin();
for (const auto& entry : entries) {
if (!ReadFilterFromDisk(entry.pos, *filter_pos_it)) {
if (!ReadFilterFromDisk(entry.pos, entry.hash, *filter_pos_it)) {
return false;
}
++filter_pos_it;

@ -31,7 +31,7 @@ private:
FlatFilePos m_next_filter_pos;
std::unique_ptr<FlatFileSeq> m_filter_fileseq;
bool ReadFilterFromDisk(const FlatFilePos& pos, BlockFilter& filter) const;
bool ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const;
size_t WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter);
Mutex m_cs_headers_cache;

Loading…
Cancel
Save