From 4536148b15595229d0563fb60913b23cb78788ed Mon Sep 17 00:00:00 2001 From: "Wladimir J. van der Laan" Date: Sun, 18 Sep 2016 09:55:14 +0200 Subject: [PATCH] support: Add LockedPool Add a pool for locked memory chunks, replacing LockedPageManager. This is something I've been wanting to do for a long time. The current approach of locking objects where they happen to be on the stack or heap in-place causes a lot of mlock/munlock system call overhead, slowing down any handling of keys. Also locked memory is a limited resource on many operating systems (and using a lot of it bogs down the system), so the previous approach of locking every page that may contain any key information (but also other information) is wasteful. --- src/Makefile.am | 4 +- src/support/allocators/secure.h | 12 +- src/support/lockedpool.cpp | 383 ++++++++++++++++++++++++++++++++ src/support/lockedpool.h | 251 +++++++++++++++++++++ src/support/pagelocker.cpp | 70 ------ src/support/pagelocker.h | 160 ------------- src/test/allocator_tests.cpp | 280 +++++++++++++++-------- 7 files changed, 832 insertions(+), 328 deletions(-) create mode 100644 src/support/lockedpool.cpp create mode 100644 src/support/lockedpool.h delete mode 100644 src/support/pagelocker.cpp delete mode 100644 src/support/pagelocker.h diff --git a/src/Makefile.am b/src/Makefile.am index e7f1d82b8b..54abd2ce46 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -133,7 +133,7 @@ BITCOIN_CORE_H = \ support/allocators/secure.h \ support/allocators/zeroafterfree.h \ support/cleanse.h \ - support/pagelocker.h \ + support/lockedpool.h \ sync.h \ threadsafety.h \ timedata.h \ @@ -310,7 +310,7 @@ libbitcoin_common_a_SOURCES = \ libbitcoin_util_a_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES) libbitcoin_util_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS) libbitcoin_util_a_SOURCES = \ - support/pagelocker.cpp \ + support/lockedpool.cpp \ chainparamsbase.cpp \ clientversion.cpp \ compat/glibc_sanity.cpp \ diff --git a/src/support/allocators/secure.h b/src/support/allocators/secure.h index 1ec40fe830..67064314ef 100644 --- a/src/support/allocators/secure.h +++ b/src/support/allocators/secure.h @@ -6,7 +6,8 @@ #ifndef BITCOIN_SUPPORT_ALLOCATORS_SECURE_H #define BITCOIN_SUPPORT_ALLOCATORS_SECURE_H -#include "support/pagelocker.h" +#include "support/lockedpool.h" +#include "support/cleanse.h" #include @@ -39,20 +40,15 @@ struct secure_allocator : public std::allocator { T* allocate(std::size_t n, const void* hint = 0) { - T* p; - p = std::allocator::allocate(n, hint); - if (p != NULL) - LockedPageManager::Instance().LockRange(p, sizeof(T) * n); - return p; + return static_cast(LockedPoolManager::Instance().alloc(sizeof(T) * n)); } void deallocate(T* p, std::size_t n) { if (p != NULL) { memory_cleanse(p, sizeof(T) * n); - LockedPageManager::Instance().UnlockRange(p, sizeof(T) * n); } - std::allocator::deallocate(p, n); + LockedPoolManager::Instance().free(p); } }; diff --git a/src/support/lockedpool.cpp b/src/support/lockedpool.cpp new file mode 100644 index 0000000000..63050f006b --- /dev/null +++ b/src/support/lockedpool.cpp @@ -0,0 +1,383 @@ +// Copyright (c) 2016 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include "support/lockedpool.h" +#include "support/cleanse.h" + +#if defined(HAVE_CONFIG_H) +#include "config/bitcoin-config.h" +#endif + +#ifdef WIN32 +#ifdef _WIN32_WINNT +#undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0501 +#define WIN32_LEAN_AND_MEAN 1 +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#else +#include // for mmap +#include // for getrlimit +#include // for PAGESIZE +#include // for sysconf +#endif + +LockedPoolManager* LockedPoolManager::_instance = NULL; +std::once_flag LockedPoolManager::init_flag; + +/*******************************************************************************/ +// Utilities +// +/** Align up to power of 2 */ +static inline size_t align_up(size_t x, size_t align) +{ + return (x + align - 1) & ~(align - 1); +} + +/*******************************************************************************/ +// Implementation: Arena + +Arena::Arena(void *base_in, size_t size_in, size_t alignment_in): + base(static_cast(base_in)), end(static_cast(base_in) + size_in), alignment(alignment_in) +{ + // Start with one free chunk that covers the entire arena + chunks.emplace(base, Chunk(size_in, false)); +} + +Arena::~Arena() +{ +} + +void* Arena::alloc(size_t size) +{ + // Round to next multiple of alignment + size = align_up(size, alignment); + + // Don't handle zero-sized chunks, or those bigger than MAX_SIZE + if (size == 0 || size >= Chunk::MAX_SIZE) { + return nullptr; + } + + for (auto& chunk: chunks) { + if (!chunk.second.isInUse() && size <= chunk.second.getSize()) { + char* base = chunk.first; + size_t leftover = chunk.second.getSize() - size; + if (leftover > 0) { // Split chunk + chunks.emplace(base + size, Chunk(leftover, false)); + chunk.second.setSize(size); + } + chunk.second.setInUse(true); + return reinterpret_cast(base); + } + } + return nullptr; +} + +void Arena::free(void *ptr) +{ + // Freeing the NULL pointer is OK. + if (ptr == nullptr) { + return; + } + auto i = chunks.find(static_cast(ptr)); + if (i == chunks.end() || !i->second.isInUse()) { + throw std::runtime_error("Arena: invalid or double free"); + } + + i->second.setInUse(false); + + if (i != chunks.begin()) { // Absorb into previous chunk if exists and free + auto prev = i; + --prev; + if (!prev->second.isInUse()) { + // Absorb current chunk size into previous chunk. + prev->second.setSize(prev->second.getSize() + i->second.getSize()); + // Erase current chunk. Erasing does not invalidate current + // iterators for a map, except for that pointing to the object + // itself, which will be overwritten in the next statement. + chunks.erase(i); + // From here on, the previous chunk is our current chunk. + i = prev; + } + } + auto next = i; + ++next; + if (next != chunks.end()) { // Absorb next chunk if exists and free + if (!next->second.isInUse()) { + // Absurb next chunk size into current chunk + i->second.setSize(i->second.getSize() + next->second.getSize()); + // Erase next chunk. + chunks.erase(next); + } + } +} + +Arena::Stats Arena::stats() const +{ + Arena::Stats r; + r.used = r.free = r.total = r.chunks_used = r.chunks_free = 0; + for (const auto& chunk: chunks) { + if (chunk.second.isInUse()) { + r.used += chunk.second.getSize(); + r.chunks_used += 1; + } else { + r.free += chunk.second.getSize(); + r.chunks_free += 1; + } + r.total += chunk.second.getSize(); + } + return r; +} + +#ifdef ARENA_DEBUG +void Arena::walk() const +{ + for (const auto& chunk: chunks) { + std::cout << + "0x" << std::hex << std::setw(16) << std::setfill('0') << chunk.first << + " 0x" << std::hex << std::setw(16) << std::setfill('0') << chunk.second.getSize() << + " 0x" << chunk.second.isInUse() << std::endl; + } + std::cout << std::endl; +} +#endif + +/*******************************************************************************/ +// Implementation: Win32LockedPageAllocator + +#ifdef WIN32 +/** LockedPageAllocator specialized for Windows. + */ +class Win32LockedPageAllocator: public LockedPageAllocator +{ +public: + Win32LockedPageAllocator(); + void* AllocateLocked(size_t len, bool *lockingSuccess); + void FreeLocked(void* addr, size_t len); + size_t GetLimit(); +private: + size_t page_size; +}; + +Win32LockedPageAllocator::Win32LockedPageAllocator() +{ + // Determine system page size in bytes + SYSTEM_INFO sSysInfo; + GetSystemInfo(&sSysInfo); + page_size = sSysInfo.dwPageSize; +} +void *Win32LockedPageAllocator::AllocateLocked(size_t len, bool *lockingSuccess) +{ + len = align_up(len, page_size); + void *addr = VirtualAlloc(nullptr, len, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (addr) { + // VirtualLock is used to attempt to keep keying material out of swap. Note + // that it does not provide this as a guarantee, but, in practice, memory + // that has been VirtualLock'd almost never gets written to the pagefile + // except in rare circumstances where memory is extremely low. + *lockingSuccess = VirtualLock(const_cast(addr), len) != 0; + } + return addr; +} +void Win32LockedPageAllocator::FreeLocked(void* addr, size_t len) +{ + len = align_up(len, page_size); + memory_cleanse(addr, len); + VirtualUnlock(const_cast(addr), len); +} + +size_t Win32LockedPageAllocator::GetLimit() +{ + // TODO is there a limit on windows, how to get it? + return std::numeric_limits::max(); +} +#endif + +/*******************************************************************************/ +// Implementation: PosixLockedPageAllocator + +#ifndef WIN32 +/** LockedPageAllocator specialized for OSes that don't try to be + * special snowflakes. + */ +class PosixLockedPageAllocator: public LockedPageAllocator +{ +public: + PosixLockedPageAllocator(); + void* AllocateLocked(size_t len, bool *lockingSuccess); + void FreeLocked(void* addr, size_t len); + size_t GetLimit(); +private: + size_t page_size; +}; + +PosixLockedPageAllocator::PosixLockedPageAllocator() +{ + // Determine system page size in bytes +#if defined(PAGESIZE) // defined in limits.h + page_size = PAGESIZE; +#else // assume some POSIX OS + page_size = sysconf(_SC_PAGESIZE); +#endif +} +void *PosixLockedPageAllocator::AllocateLocked(size_t len, bool *lockingSuccess) +{ + void *addr; + len = align_up(len, page_size); + addr = mmap(nullptr, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (addr) { + *lockingSuccess = mlock(addr, len) == 0; + } + return addr; +} +void PosixLockedPageAllocator::FreeLocked(void* addr, size_t len) +{ + len = align_up(len, page_size); + memory_cleanse(addr, len); + munlock(addr, len); + munmap(addr, len); +} +size_t PosixLockedPageAllocator::GetLimit() +{ +#ifdef RLIMIT_MEMLOCK + struct rlimit rlim; + if (getrlimit(RLIMIT_MEMLOCK, &rlim) == 0) { + if (rlim.rlim_cur != RLIM_INFINITY) { + return rlim.rlim_cur; + } + } +#endif + return std::numeric_limits::max(); +} +#endif + +/*******************************************************************************/ +// Implementation: LockedPool + +LockedPool::LockedPool(std::unique_ptr allocator_in, LockingFailed_Callback lf_cb_in): + allocator(std::move(allocator_in)), lf_cb(lf_cb_in), cumulative_bytes_locked(0) +{ +} + +LockedPool::~LockedPool() +{ +} +void* LockedPool::alloc(size_t size) +{ + std::lock_guard lock(mutex); + // Try allocating from each current arena + for (auto &arena: arenas) { + void *addr = arena.alloc(size); + if (addr) { + return addr; + } + } + // If that fails, create a new one + if (new_arena(ARENA_SIZE, ARENA_ALIGN)) { + return arenas.back().alloc(size); + } + return nullptr; +} + +void LockedPool::free(void *ptr) +{ + std::lock_guard lock(mutex); + // TODO we can do better than this linear search by keeping a map of arena + // extents to arena, and looking up the address. + for (auto &arena: arenas) { + if (arena.addressInArena(ptr)) { + arena.free(ptr); + return; + } + } + throw std::runtime_error("LockedPool: invalid address not pointing to any arena"); +} + +LockedPool::Stats LockedPool::stats() const +{ + std::lock_guard lock(mutex); + LockedPool::Stats r; + r.used = r.free = r.total = r.chunks_used = r.chunks_free = 0; + r.locked = cumulative_bytes_locked; + for (const auto &arena: arenas) { + Arena::Stats i = arena.stats(); + r.used += i.used; + r.free += i.free; + r.total += i.total; + r.chunks_used += i.chunks_used; + r.chunks_free += i.chunks_free; + } + return r; +} + +bool LockedPool::new_arena(size_t size, size_t align) +{ + bool locked; + // If this is the first arena, handle this specially: Cap the upper size + // by the process limit. This makes sure that the first arena will at least + // be locked. An exception to this is if the process limit is 0: + // in this case no memory can be locked at all so we'll skip past this logic. + if (arenas.empty()) { + size_t limit = allocator->GetLimit(); + if (limit > 0) { + size = std::min(size, limit); + } + } + void *addr = allocator->AllocateLocked(size, &locked); + if (!addr) { + return false; + } + if (locked) { + cumulative_bytes_locked += size; + } else if (lf_cb) { // Call the locking-failed callback if locking failed + if (!lf_cb()) { // If the callback returns false, free the memory and fail, otherwise consider the user warned and proceed. + allocator->FreeLocked(addr, size); + return false; + } + } + arenas.emplace_back(allocator.get(), addr, size, align); + return true; +} + +LockedPool::LockedPageArena::LockedPageArena(LockedPageAllocator *allocator_in, void *base_in, size_t size_in, size_t align_in): + Arena(base_in, size_in, align_in), base(base_in), size(size_in), allocator(allocator_in) +{ +} +LockedPool::LockedPageArena::~LockedPageArena() +{ + allocator->FreeLocked(base, size); +} + +/*******************************************************************************/ +// Implementation: LockedPoolManager +// +LockedPoolManager::LockedPoolManager(std::unique_ptr allocator): + LockedPool(std::move(allocator), &LockedPoolManager::LockingFailed) +{ +} + +bool LockedPoolManager::LockingFailed() +{ + // TODO: log something but how? without including util.h + return true; +} + +void LockedPoolManager::CreateInstance() +{ + // Using a local static instance guarantees that the object is initialized + // when it's first needed and also deinitialized after all objects that use + // it are done with it. I can think of one unlikely scenario where we may + // have a static deinitialization order/problem, but the check in + // LockedPoolManagerBase's destructor helps us detect if that ever happens. +#ifdef WIN32 + std::unique_ptr allocator(new Win32LockedPageAllocator()); +#else + std::unique_ptr allocator(new PosixLockedPageAllocator()); +#endif + static LockedPoolManager instance(std::move(allocator)); + LockedPoolManager::_instance = &instance; +} diff --git a/src/support/lockedpool.h b/src/support/lockedpool.h new file mode 100644 index 0000000000..526c17a73f --- /dev/null +++ b/src/support/lockedpool.h @@ -0,0 +1,251 @@ +// Copyright (c) 2016 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_SUPPORT_LOCKEDPOOL_H +#define BITCOIN_SUPPORT_LOCKEDPOOL_H + +#include +#include +#include +#include +#include + +/** + * OS-dependent allocation and deallocation of locked/pinned memory pages. + * Abstract base class. + */ +class LockedPageAllocator +{ +public: + virtual ~LockedPageAllocator() {} + /** Allocate and lock memory pages. + * If len is not a multiple of the system page size, it is rounded up. + * Returns 0 in case of allocation failure. + * + * If locking the memory pages could not be accomplished it will still + * return the memory, however the lockingSuccess flag will be false. + * lockingSuccess is undefined if the allocation fails. + */ + virtual void* AllocateLocked(size_t len, bool *lockingSuccess) = 0; + + /** Unlock and free memory pages. + * Clear the memory before unlocking. + */ + virtual void FreeLocked(void* addr, size_t len) = 0; + + /** Get the total limit on the amount of memory that may be locked by this + * process, in bytes. Return size_t max if there is no limit or the limit + * is unknown. Return 0 if no memory can be locked at all. + */ + virtual size_t GetLimit() = 0; +}; + +/* An arena manages a contiguous region of memory by dividing it into + * chunks. + */ +class Arena +{ +public: + Arena(void *base, size_t size, size_t alignment); + virtual ~Arena(); + + /** A chunk of memory. + */ + struct Chunk + { + /** Most significant bit of size_t. This is used to mark + * in-usedness of chunk. + */ + const static size_t SIZE_MSB = 1LLU << ((sizeof(size_t)*8)-1); + /** Maximum size of a chunk */ + const static size_t MAX_SIZE = SIZE_MSB - 1; + + Chunk(size_t size_in, bool used_in): + size(size_in | (used_in ? SIZE_MSB : 0)) {} + + bool isInUse() const { return size & SIZE_MSB; } + void setInUse(bool used_in) { size = (size & ~SIZE_MSB) | (used_in ? SIZE_MSB : 0); } + size_t getSize() const { return size & ~SIZE_MSB; } + void setSize(size_t size_in) { size = (size & SIZE_MSB) | size_in; } + private: + size_t size; + }; + /** Memory statistics. */ + struct Stats + { + size_t used; + size_t free; + size_t total; + size_t chunks_used; + size_t chunks_free; + }; + + /** Allocate size bytes from this arena. + * Returns pointer on success, or 0 if memory is full or + * the application tried to allocate 0 bytes. + */ + void* alloc(size_t size); + + /** Free a previously allocated chunk of memory. + * Freeing the zero pointer has no effect. + * Raises std::runtime_error in case of error. + */ + void free(void *ptr); + + /** Get arena usage statistics */ + Stats stats() const; + +#ifdef ARENA_DEBUG + void walk() const; +#endif + + /** Return whether a pointer points inside this arena. + * This returns base <= ptr < (base+size) so only use it for (inclusive) + * chunk starting addresses. + */ + bool addressInArena(void *ptr) const { return ptr >= base && ptr < end; } +private: + Arena(const Arena& other) = delete; // non construction-copyable + Arena& operator=(const Arena&) = delete; // non copyable + + /** Map of chunk address to chunk information. This class makes use of the + * sorted order to merge previous and next chunks during deallocation. + */ + std::map chunks; + /** Base address of arena */ + char* base; + /** End address of arena */ + char* end; + /** Minimum chunk alignment */ + size_t alignment; +}; + +/** Pool for locked memory chunks. + * + * To avoid sensitive key data from being swapped to disk, the memory in this pool + * is locked/pinned. + * + * An arena manages a contiguous region of memory. The pool starts out with one arena + * but can grow to multiple arenas if the need arises. + * + * Unlike a normal C heap, the administrative structures are seperate from the managed + * memory. This has been done as the sizes and bases of objects are not in themselves sensitive + * information, as to conserve precious locked memory. In some operating systems + * the amount of memory that can be locked is small. + */ +class LockedPool +{ +public: + /** Size of one arena of locked memory. This is a compromise. + * Do not set this too low, as managing many arenas will increase + * allocation and deallocation overhead. Setting it too high allocates + * more locked memory from the OS than strictly necessary. + */ + static const size_t ARENA_SIZE = 256*1024; + /** Chunk alignment. Another compromise. Setting this too high will waste + * memory, setting it too low will facilitate fragmentation. + */ + static const size_t ARENA_ALIGN = 16; + + /** Callback when allocation succeeds but locking fails. + */ + typedef bool (*LockingFailed_Callback)(); + + /** Memory statistics. */ + struct Stats + { + size_t used; + size_t free; + size_t total; + size_t locked; + size_t chunks_used; + size_t chunks_free; + }; + + /** Create a new LockedPool. This takes ownership of the MemoryPageLocker, + * you can only instantiate this with LockedPool(std::move(...)). + * + * The second argument is an optional callback when locking a newly allocated arena failed. + * If this callback is provided and returns false, the allocation fails (hard fail), if + * it returns true the allocation proceeds, but it could warn. + */ + LockedPool(std::unique_ptr allocator, LockingFailed_Callback lf_cb_in = 0); + ~LockedPool(); + + /** Allocate size bytes from this arena. + * Returns pointer on success, or 0 if memory is full or + * the application tried to allocate 0 bytes. + */ + void* alloc(size_t size); + + /** Free a previously allocated chunk of memory. + * Freeing the zero pointer has no effect. + * Raises std::runtime_error in case of error. + */ + void free(void *ptr); + + /** Get pool usage statistics */ + Stats stats() const; +private: + LockedPool(const LockedPool& other) = delete; // non construction-copyable + LockedPool& operator=(const LockedPool&) = delete; // non copyable + + std::unique_ptr allocator; + + /** Create an arena from locked pages */ + class LockedPageArena: public Arena + { + public: + LockedPageArena(LockedPageAllocator *alloc_in, void *base_in, size_t size, size_t align); + ~LockedPageArena(); + private: + void *base; + size_t size; + LockedPageAllocator *allocator; + }; + + bool new_arena(size_t size, size_t align); + + std::list arenas; + LockingFailed_Callback lf_cb; + size_t cumulative_bytes_locked; + /** Mutex protects access to this pool's data structures, including arenas. + */ + mutable std::mutex mutex; +}; + +/** + * Singleton class to keep track of locked (ie, non-swappable) memory, for use in + * std::allocator templates. + * + * Some implementations of the STL allocate memory in some constructors (i.e., see + * MSVC's vector implementation where it allocates 1 byte of memory in the allocator.) + * Due to the unpredictable order of static initializers, we have to make sure the + * LockedPoolManager instance exists before any other STL-based objects that use + * secure_allocator are created. So instead of having LockedPoolManager also be + * static-initialized, it is created on demand. + */ +class LockedPoolManager : public LockedPool +{ +public: + /** Return the current instance, or create it once */ + static LockedPoolManager& Instance() + { + std::call_once(LockedPoolManager::init_flag, LockedPoolManager::CreateInstance); + return *LockedPoolManager::_instance; + } + +private: + LockedPoolManager(std::unique_ptr allocator); + + /** Create a new LockedPoolManager specialized to the OS */ + static void CreateInstance(); + /** Called when locking fails, warn the user here */ + static bool LockingFailed(); + + static LockedPoolManager* _instance; + static std::once_flag init_flag; +}; + +#endif // BITCOIN_SUPPORT_LOCKEDPOOL_H diff --git a/src/support/pagelocker.cpp b/src/support/pagelocker.cpp deleted file mode 100644 index 7cea2d88c5..0000000000 --- a/src/support/pagelocker.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2009-2015 The Bitcoin Core developers -// Distributed under the MIT software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#include "support/pagelocker.h" - -#if defined(HAVE_CONFIG_H) -#include "config/bitcoin-config.h" -#endif - -#ifdef WIN32 -#ifdef _WIN32_WINNT -#undef _WIN32_WINNT -#endif -#define _WIN32_WINNT 0x0501 -#define WIN32_LEAN_AND_MEAN 1 -#ifndef NOMINMAX -#define NOMINMAX -#endif -#include -// This is used to attempt to keep keying material out of swap -// Note that VirtualLock does not provide this as a guarantee on Windows, -// but, in practice, memory that has been VirtualLock'd almost never gets written to -// the pagefile except in rare circumstances where memory is extremely low. -#else -#include -#include // for PAGESIZE -#include // for sysconf -#endif - -LockedPageManager* LockedPageManager::_instance = NULL; -boost::once_flag LockedPageManager::init_flag = BOOST_ONCE_INIT; - -/** Determine system page size in bytes */ -static inline size_t GetSystemPageSize() -{ - size_t page_size; -#if defined(WIN32) - SYSTEM_INFO sSysInfo; - GetSystemInfo(&sSysInfo); - page_size = sSysInfo.dwPageSize; -#elif defined(PAGESIZE) // defined in limits.h - page_size = PAGESIZE; -#else // assume some POSIX OS - page_size = sysconf(_SC_PAGESIZE); -#endif - return page_size; -} - -bool MemoryPageLocker::Lock(const void* addr, size_t len) -{ -#ifdef WIN32 - return VirtualLock(const_cast(addr), len) != 0; -#else - return mlock(addr, len) == 0; -#endif -} - -bool MemoryPageLocker::Unlock(const void* addr, size_t len) -{ -#ifdef WIN32 - return VirtualUnlock(const_cast(addr), len) != 0; -#else - return munlock(addr, len) == 0; -#endif -} - -LockedPageManager::LockedPageManager() : LockedPageManagerBase(GetSystemPageSize()) -{ -} diff --git a/src/support/pagelocker.h b/src/support/pagelocker.h deleted file mode 100644 index 042144fad5..0000000000 --- a/src/support/pagelocker.h +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright (c) 2009-2010 Satoshi Nakamoto -// Copyright (c) 2009-2015 The Bitcoin Core developers -// Distributed under the MIT software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef BITCOIN_SUPPORT_PAGELOCKER_H -#define BITCOIN_SUPPORT_PAGELOCKER_H - -#include "support/cleanse.h" - -#include - -#include -#include - -/** - * Thread-safe class to keep track of locked (ie, non-swappable) memory pages. - * - * Memory locks do not stack, that is, pages which have been locked several times by calls to mlock() - * will be unlocked by a single call to munlock(). This can result in keying material ending up in swap when - * those functions are used naively. This class simulates stacking memory locks by keeping a counter per page. - * - * @note By using a map from each page base address to lock count, this class is optimized for - * small objects that span up to a few pages, mostly smaller than a page. To support large allocations, - * something like an interval tree would be the preferred data structure. - */ -template -class LockedPageManagerBase -{ -public: - LockedPageManagerBase(size_t _page_size) : page_size(_page_size) - { - // Determine bitmask for extracting page from address - assert(!(_page_size & (_page_size - 1))); // size must be power of two - page_mask = ~(_page_size - 1); - } - - ~LockedPageManagerBase() - { - } - - - // For all pages in affected range, increase lock count - void LockRange(void* p, size_t size) - { - boost::mutex::scoped_lock lock(mutex); - if (!size) - return; - const size_t base_addr = reinterpret_cast(p); - const size_t start_page = base_addr & page_mask; - const size_t end_page = (base_addr + size - 1) & page_mask; - for (size_t page = start_page; page <= end_page; page += page_size) { - Histogram::iterator it = histogram.find(page); - if (it == histogram.end()) // Newly locked page - { - locker.Lock(reinterpret_cast(page), page_size); - histogram.insert(std::make_pair(page, 1)); - } else // Page was already locked; increase counter - { - it->second += 1; - } - } - } - - // For all pages in affected range, decrease lock count - void UnlockRange(void* p, size_t size) - { - boost::mutex::scoped_lock lock(mutex); - if (!size) - return; - const size_t base_addr = reinterpret_cast(p); - const size_t start_page = base_addr & page_mask; - const size_t end_page = (base_addr + size - 1) & page_mask; - for (size_t page = start_page; page <= end_page; page += page_size) { - Histogram::iterator it = histogram.find(page); - assert(it != histogram.end()); // Cannot unlock an area that was not locked - // Decrease counter for page, when it is zero, the page will be unlocked - it->second -= 1; - if (it->second == 0) // Nothing on the page anymore that keeps it locked - { - // Unlock page and remove the count from histogram - locker.Unlock(reinterpret_cast(page), page_size); - histogram.erase(it); - } - } - } - - // Get number of locked pages for diagnostics - int GetLockedPageCount() - { - boost::mutex::scoped_lock lock(mutex); - return histogram.size(); - } - -private: - Locker locker; - boost::mutex mutex; - size_t page_size, page_mask; - // map of page base address to lock count - typedef std::map Histogram; - Histogram histogram; -}; - - -/** - * OS-dependent memory page locking/unlocking. - * Defined as policy class to make stubbing for test possible. - */ -class MemoryPageLocker -{ -public: - /** Lock memory pages. - * addr and len must be a multiple of the system page size - */ - bool Lock(const void* addr, size_t len); - /** Unlock memory pages. - * addr and len must be a multiple of the system page size - */ - bool Unlock(const void* addr, size_t len); -}; - -/** - * Singleton class to keep track of locked (ie, non-swappable) memory pages, for use in - * std::allocator templates. - * - * Some implementations of the STL allocate memory in some constructors (i.e., see - * MSVC's vector implementation where it allocates 1 byte of memory in the allocator.) - * Due to the unpredictable order of static initializers, we have to make sure the - * LockedPageManager instance exists before any other STL-based objects that use - * secure_allocator are created. So instead of having LockedPageManager also be - * static-initialized, it is created on demand. - */ -class LockedPageManager : public LockedPageManagerBase -{ -public: - static LockedPageManager& Instance() - { - boost::call_once(LockedPageManager::CreateInstance, LockedPageManager::init_flag); - return *LockedPageManager::_instance; - } - -private: - LockedPageManager(); - - static void CreateInstance() - { - // Using a local static instance guarantees that the object is initialized - // when it's first needed and also deinitialized after all objects that use - // it are done with it. I can think of one unlikely scenario where we may - // have a static deinitialization order/problem, but the check in - // LockedPageManagerBase's destructor helps us detect if that ever happens. - static LockedPageManager instance; - LockedPageManager::_instance = &instance; - } - - static LockedPageManager* _instance; - static boost::once_flag init_flag; -}; - -#endif // BITCOIN_SUPPORT_PAGELOCKER_H diff --git a/src/test/allocator_tests.cpp b/src/test/allocator_tests.cpp index 613f6c12d7..f0e848655f 100644 --- a/src/test/allocator_tests.cpp +++ b/src/test/allocator_tests.cpp @@ -11,110 +11,214 @@ BOOST_FIXTURE_TEST_SUITE(allocator_tests, BasicTestingSetup) -// Dummy memory page locker for platform independent tests -static const void *last_lock_addr, *last_unlock_addr; -static size_t last_lock_len, last_unlock_len; -class TestLocker +BOOST_AUTO_TEST_CASE(arena_tests) { -public: - bool Lock(const void *addr, size_t len) + // Fake memory base address for testing + // without actually using memory. + void *synth_base = reinterpret_cast(0x08000000); + const size_t synth_size = 1024*1024; + Arena b(synth_base, synth_size, 16); + void *chunk = b.alloc(1000); +#ifdef ARENA_DEBUG + b.walk(); +#endif + BOOST_CHECK(chunk != nullptr); + BOOST_CHECK(b.stats().used == 1008); // Aligned to 16 + BOOST_CHECK(b.stats().total == synth_size); // Nothing has disappeared? + b.free(chunk); +#ifdef ARENA_DEBUG + b.walk(); +#endif + BOOST_CHECK(b.stats().used == 0); + BOOST_CHECK(b.stats().free == synth_size); + try { // Test exception on double-free + b.free(chunk); + BOOST_CHECK(0); + } catch(std::runtime_error &) { - last_lock_addr = addr; - last_lock_len = len; - return true; } - bool Unlock(const void *addr, size_t len) - { - last_unlock_addr = addr; - last_unlock_len = len; - return true; + + void *a0 = b.alloc(128); + BOOST_CHECK(a0 == synth_base); // first allocation must start at beginning + void *a1 = b.alloc(256); + void *a2 = b.alloc(512); + BOOST_CHECK(b.stats().used == 896); + BOOST_CHECK(b.stats().total == synth_size); +#ifdef ARENA_DEBUG + b.walk(); +#endif + b.free(a0); +#ifdef ARENA_DEBUG + b.walk(); +#endif + BOOST_CHECK(b.stats().used == 768); + b.free(a1); + BOOST_CHECK(b.stats().used == 512); + void *a3 = b.alloc(128); +#ifdef ARENA_DEBUG + b.walk(); +#endif + BOOST_CHECK(b.stats().used == 640); + b.free(a2); + BOOST_CHECK(b.stats().used == 128); + b.free(a3); + BOOST_CHECK(b.stats().used == 0); + BOOST_CHECK(b.stats().total == synth_size); + BOOST_CHECK(b.stats().free == synth_size); + + std::vector addr; + BOOST_CHECK(b.alloc(0) == nullptr); // allocating 0 always returns nullptr +#ifdef ARENA_DEBUG + b.walk(); +#endif + // Sweeping allocate all memory + for (int x=0; x<1024; ++x) + addr.push_back(b.alloc(1024)); + BOOST_CHECK(addr[0] == synth_base); // first allocation must start at beginning + BOOST_CHECK(b.stats().free == 0); + BOOST_CHECK(b.alloc(1024) == nullptr); // memory is full, this must return nullptr + BOOST_CHECK(b.alloc(0) == nullptr); + for (int x=0; x<1024; ++x) + b.free(addr[x]); + addr.clear(); + BOOST_CHECK(b.stats().total == synth_size); + BOOST_CHECK(b.stats().free == synth_size); + + // Now in the other direction... + for (int x=0; x<1024; ++x) + addr.push_back(b.alloc(1024)); + for (int x=0; x<1024; ++x) + b.free(addr[1023-x]); + addr.clear(); + + // Now allocate in smaller unequal chunks, then deallocate haphazardly + // Not all the chunks will succeed allocating, but freeing nullptr is + // allowed so that is no problem. + for (int x=0; x<2048; ++x) + addr.push_back(b.alloc(x+1)); + for (int x=0; x<2048; ++x) + b.free(addr[((x*23)%2048)^242]); + addr.clear(); + + // Go entirely wild: free and alloc interleaved, + // generate targets and sizes using pseudo-randomness. + for (int x=0; x<2048; ++x) + addr.push_back(0); + uint32_t s = 0x12345678; + for (int x=0; x<5000; ++x) { + int idx = s & (addr.size()-1); + if (s & 0x80000000) { + b.free(addr[idx]); + addr[idx] = 0; + } else if(!addr[idx]) { + addr[idx] = b.alloc((s >> 16) & 2047); + } + bool lsb = s & 1; + s >>= 1; + if (lsb) + s ^= 0xf00f00f0; // LFSR period 0xf7ffffe0 } -}; + for (void *ptr: addr) + b.free(ptr); + addr.clear(); -BOOST_AUTO_TEST_CASE(test_LockedPageManagerBase) + BOOST_CHECK(b.stats().total == synth_size); + BOOST_CHECK(b.stats().free == synth_size); +} + +/** Mock LockedPageAllocator for testing */ +class TestLockedPageAllocator: public LockedPageAllocator { - const size_t test_page_size = 4096; - LockedPageManagerBase lpm(test_page_size); - size_t addr; - last_lock_addr = last_unlock_addr = 0; - last_lock_len = last_unlock_len = 0; - - /* Try large number of small objects */ - addr = 0; - for(int i=0; i<1000; ++i) - { - lpm.LockRange(reinterpret_cast(addr), 33); - addr += 33; - } - /* Try small number of page-sized objects, straddling two pages */ - addr = test_page_size*100 + 53; - for(int i=0; i<100; ++i) - { - lpm.LockRange(reinterpret_cast(addr), test_page_size); - addr += test_page_size; - } - /* Try small number of page-sized objects aligned to exactly one page */ - addr = test_page_size*300; - for(int i=0; i<100; ++i) - { - lpm.LockRange(reinterpret_cast(addr), test_page_size); - addr += test_page_size; - } - /* one very large object, straddling pages */ - lpm.LockRange(reinterpret_cast(test_page_size*600+1), test_page_size*500); - BOOST_CHECK(last_lock_addr == reinterpret_cast(test_page_size*(600+500))); - /* one very large object, page aligned */ - lpm.LockRange(reinterpret_cast(test_page_size*1200), test_page_size*500-1); - BOOST_CHECK(last_lock_addr == reinterpret_cast(test_page_size*(1200+500-1))); - - BOOST_CHECK(lpm.GetLockedPageCount() == ( - (1000*33+test_page_size-1)/test_page_size + // small objects - 101 + 100 + // page-sized objects - 501 + 500)); // large objects - BOOST_CHECK((last_lock_len & (test_page_size-1)) == 0); // always lock entire pages - BOOST_CHECK(last_unlock_len == 0); // nothing unlocked yet - - /* And unlock again */ - addr = 0; - for(int i=0; i<1000; ++i) +public: + TestLockedPageAllocator(int count_in, int lockedcount_in): count(count_in), lockedcount(lockedcount_in) {} + void* AllocateLocked(size_t len, bool *lockingSuccess) { - lpm.UnlockRange(reinterpret_cast(addr), 33); - addr += 33; + *lockingSuccess = false; + if (count > 0) { + --count; + + if (lockedcount > 0) { + --lockedcount; + *lockingSuccess = true; + } + + return reinterpret_cast(0x08000000 + (count<<24)); // Fake address, do not actually use this memory + } + return 0; } - addr = test_page_size*100 + 53; - for(int i=0; i<100; ++i) + void FreeLocked(void* addr, size_t len) { - lpm.UnlockRange(reinterpret_cast(addr), test_page_size); - addr += test_page_size; } - addr = test_page_size*300; - for(int i=0; i<100; ++i) + size_t GetLimit() { - lpm.UnlockRange(reinterpret_cast(addr), test_page_size); - addr += test_page_size; + return std::numeric_limits::max(); } - lpm.UnlockRange(reinterpret_cast(test_page_size*600+1), test_page_size*500); - lpm.UnlockRange(reinterpret_cast(test_page_size*1200), test_page_size*500-1); +private: + int count; + int lockedcount; +}; - /* Check that everything is released */ - BOOST_CHECK(lpm.GetLockedPageCount() == 0); +BOOST_AUTO_TEST_CASE(lockedpool_tests_mock) +{ + // Test over three virtual arenas, of which one will succeed being locked + std::unique_ptr x(new TestLockedPageAllocator(3, 1)); + LockedPool pool(std::move(x)); + BOOST_CHECK(pool.stats().total == 0); + BOOST_CHECK(pool.stats().locked == 0); - /* A few and unlocks of size zero (should have no effect) */ - addr = 0; - for(int i=0; i<1000; ++i) - { - lpm.LockRange(reinterpret_cast(addr), 0); - addr += 1; - } - BOOST_CHECK(lpm.GetLockedPageCount() == 0); - addr = 0; - for(int i=0; i<1000; ++i) + void *a0 = pool.alloc(LockedPool::ARENA_SIZE / 2); + BOOST_CHECK(a0); + BOOST_CHECK(pool.stats().locked == LockedPool::ARENA_SIZE); + void *a1 = pool.alloc(LockedPool::ARENA_SIZE / 2); + BOOST_CHECK(a1); + void *a2 = pool.alloc(LockedPool::ARENA_SIZE / 2); + BOOST_CHECK(a2); + void *a3 = pool.alloc(LockedPool::ARENA_SIZE / 2); + BOOST_CHECK(a3); + void *a4 = pool.alloc(LockedPool::ARENA_SIZE / 2); + BOOST_CHECK(a4); + void *a5 = pool.alloc(LockedPool::ARENA_SIZE / 2); + BOOST_CHECK(a5); + // We've passed a count of three arenas, so this allocation should fail + void *a6 = pool.alloc(16); + BOOST_CHECK(!a6); + + pool.free(a0); + pool.free(a2); + pool.free(a4); + pool.free(a1); + pool.free(a3); + pool.free(a5); + BOOST_CHECK(pool.stats().total == 3*LockedPool::ARENA_SIZE); + BOOST_CHECK(pool.stats().locked == LockedPool::ARENA_SIZE); + BOOST_CHECK(pool.stats().used == 0); +} + +// These tests used the live LockedPoolManager object, this is also used +// by other tests so the conditions are somewhat less controllable and thus the +// tests are somewhat more error-prone. +BOOST_AUTO_TEST_CASE(lockedpool_tests_live) +{ + LockedPoolManager &pool = LockedPoolManager::Instance(); + LockedPool::Stats initial = pool.stats(); + + void *a0 = pool.alloc(16); + BOOST_CHECK(a0); + // Test reading and writing the allocated memory + *((uint32_t*)a0) = 0x1234; + BOOST_CHECK(*((uint32_t*)a0) == 0x1234); + + pool.free(a0); + try { // Test exception on double-free + pool.free(a0); + BOOST_CHECK(0); + } catch(std::runtime_error &) { - lpm.UnlockRange(reinterpret_cast(addr), 0); - addr += 1; } - BOOST_CHECK(lpm.GetLockedPageCount() == 0); - BOOST_CHECK((last_unlock_len & (test_page_size-1)) == 0); // always unlock entire pages + // If more than one new arena was allocated for the above tests, something is wrong + BOOST_CHECK(pool.stats().total <= (initial.total + LockedPool::ARENA_SIZE)); + // Usage must be back to where it started + BOOST_CHECK(pool.stats().used == initial.used); } BOOST_AUTO_TEST_SUITE_END()