diff --git a/test/functional/test_framework/bdb.py b/test/functional/test_framework/bdb.py new file mode 100644 index 00000000000..9de358aa0a6 --- /dev/null +++ b/test/functional/test_framework/bdb.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +# Copyright (c) 2020 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +""" +Utilities for working directly with the wallet's BDB database file + +This is specific to the configuration of BDB used in this project: + - pagesize: 4096 bytes + - Outer database contains single subdatabase named 'main' + - btree + - btree leaf pages + +Each key-value pair is two entries in a btree leaf. The first is the key, the one that follows +is the value. And so on. Note that the entry data is itself not in the correct order. Instead +entry offsets are stored in the correct order and those offsets are needed to then retrieve +the data itself. + +Page format can be found in BDB source code dbinc/db_page.h +This only implements the deserialization of btree metadata pages and normal btree pages. Overflow +pages are not implemented but may be needed in the future if dealing with wallets with large +transactions. + +`db_dump -da wallet.dat` is useful to see the data in a wallet.dat BDB file +""" + +import binascii +import struct + +# Important constants +PAGESIZE = 4096 +OUTER_META_PAGE = 0 +INNER_META_PAGE = 2 + +# Page type values +BTREE_INTERNAL = 3 +BTREE_LEAF = 5 +BTREE_META = 9 + +# Some magic numbers for sanity checking +BTREE_MAGIC = 0x053162 +DB_VERSION = 9 + +# Deserializes a leaf page into a dict. +# Btree internal pages have the same header, for those, return None. +# For the btree leaf pages, deserialize them and put all the data into a dict +def dump_leaf_page(data): + page_info = {} + page_header = data[0:26] + _, pgno, prev_pgno, next_pgno, entries, hf_offset, level, pg_type = struct.unpack('QIIIHHBB', page_header) + page_info['pgno'] = pgno + page_info['prev_pgno'] = prev_pgno + page_info['next_pgno'] = next_pgno + page_info['entries'] = entries + page_info['hf_offset'] = hf_offset + page_info['level'] = level + page_info['pg_type'] = pg_type + page_info['entry_offsets'] = struct.unpack('{}H'.format(entries), data[26:26 + entries * 2]) + page_info['entries'] = [] + + if pg_type == BTREE_INTERNAL: + # Skip internal pages. These are the internal nodes of the btree and don't contain anything relevant to us + return None + + assert pg_type == BTREE_LEAF, 'A non-btree leaf page has been encountered while dumping leaves' + + for i in range(0, entries): + offset = page_info['entry_offsets'][i] + entry = {'offset': offset} + page_data_header = data[offset:offset + 3] + e_len, pg_type = struct.unpack('HB', page_data_header) + entry['len'] = e_len + entry['pg_type'] = pg_type + entry['data'] = data[offset + 3:offset + 3 + e_len] + page_info['entries'].append(entry) + + return page_info + +# Deserializes a btree metadata page into a dict. +# Does a simple sanity check on the magic value, type, and version +def dump_meta_page(page): + # metadata page + # general metadata + metadata = {} + meta_page = page[0:72] + _, pgno, magic, version, pagesize, encrypt_alg, pg_type, metaflags, _, free, last_pgno, nparts, key_count, record_count, flags, uid = struct.unpack('QIIIIBBBBIIIIII20s', meta_page) + metadata['pgno'] = pgno + metadata['magic'] = magic + metadata['version'] = version + metadata['pagesize'] = pagesize + metadata['encrypt_alg'] = encrypt_alg + metadata['pg_type'] = pg_type + metadata['metaflags'] = metaflags + metadata['free'] = free + metadata['last_pgno'] = last_pgno + metadata['nparts'] = nparts + metadata['key_count'] = key_count + metadata['record_count'] = record_count + metadata['flags'] = flags + metadata['uid'] = binascii.hexlify(uid) + + assert magic == BTREE_MAGIC, 'bdb magic does not match bdb btree magic' + assert pg_type == BTREE_META, 'Metadata page is not a btree metadata page' + assert version == DB_VERSION, 'Database too new' + + # btree metadata + btree_meta_page = page[72:512] + _, minkey, re_len, re_pad, root, _, crypto_magic, _, iv, chksum = struct.unpack('IIIII368sI12s16s20s', btree_meta_page) + metadata['minkey'] = minkey + metadata['re_len'] = re_len + metadata['re_pad'] = re_pad + metadata['root'] = root + metadata['crypto_magic'] = crypto_magic + metadata['iv'] = binascii.hexlify(iv) + metadata['chksum'] = binascii.hexlify(chksum) + return metadata + +# Given the dict from dump_leaf_page, get the key-value pairs and put them into a dict +def extract_kv_pairs(page_data): + out = {} + last_key = None + for i, entry in enumerate(page_data['entries']): + # By virtue of these all being pairs, even number entries are keys, and odd are values + if i % 2 == 0: + out[entry['data']] = b'' + last_key = entry['data'] + else: + out[last_key] = entry['data'] + return out + +# Extract the key-value pairs of the BDB file given in filename +def dump_bdb_kv(filename): + # Read in the BDB file and start deserializing it + pages = [] + with open(filename, 'rb') as f: + data = f.read(PAGESIZE) + while len(data) > 0: + pages.append(data) + data = f.read(PAGESIZE) + + # Sanity check the meta pages + dump_meta_page(pages[OUTER_META_PAGE]) + dump_meta_page(pages[INNER_META_PAGE]) + + # Fetch the kv pairs from the leaf pages + kv = {} + for i in range(3, len(pages)): + info = dump_leaf_page(pages[i]) + if info is not None: + info_kv = extract_kv_pairs(info) + kv = {**kv, **info_kv} + return kv