clusterlin: add Linearize function

This adds a first version of the overall linearization interface, which given
a DepGraph constructs a good linearization, by incrementally including good
candidate sets (found using AncestorCandidateFinder and SearchCandidateFinder).
pull/30126/head
Pieter Wuille 6 months ago
parent ee0ddfe4f6
commit 46aad9b099

@ -167,6 +167,22 @@ public:
for (auto pos : elems) ret += entries[pos].feerate;
return ret;
}
/** Append the entries of select to list in a topologically valid order.
*
* Complexity: O(select.Count() * log(select.Count())).
*/
void AppendTopo(std::vector<ClusterIndex>& list, const SetType& select) const noexcept
{
ClusterIndex old_len = list.size();
for (auto i : select) list.push_back(i);
std::sort(list.begin() + old_len, list.end(), [&](ClusterIndex a, ClusterIndex b) noexcept {
const auto a_anc_count = entries[a].ancestors.Count();
const auto b_anc_count = entries[b].ancestors.Count();
if (a_anc_count != b_anc_count) return a_anc_count < b_anc_count;
return a < b;
});
}
};
/** A set of transactions together with their aggregate feerate. */
@ -486,6 +502,57 @@ public:
}
};
/** Find a linearization for a cluster.
*
* @param[in] depgraph Dependency graph of the cluster to be linearized.
* @param[in] max_iterations Upper bound on the number of optimization steps that will be done.
* @return A pair of:
* - The resulting linearization.
* - A boolean indicating whether the result is guaranteed to be
* optimal.
*
* Complexity: O(N * min(max_iterations + N, 2^N)) where N=depgraph.TxCount().
*/
template<typename SetType>
std::pair<std::vector<ClusterIndex>, bool> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations) noexcept
{
if (depgraph.TxCount() == 0) return {{}, true};
uint64_t iterations_left = max_iterations;
std::vector<ClusterIndex> linearization;
AncestorCandidateFinder anc_finder(depgraph);
SearchCandidateFinder src_finder(depgraph);
linearization.reserve(depgraph.TxCount());
bool optimal = true;
while (true) {
// Initialize best as the best remaining ancestor set.
auto best = anc_finder.FindCandidateSet();
// Invoke bounded search to update best, with up to half of our remaining iterations as
// limit.
uint64_t max_iterations_now = (iterations_left + 1) / 2;
uint64_t iterations_done_now = 0;
std::tie(best, iterations_done_now) = src_finder.FindCandidateSet(max_iterations_now, best);
iterations_left -= iterations_done_now;
if (iterations_done_now == max_iterations_now) {
optimal = false;
}
// Add to output in topological order.
depgraph.AppendTopo(linearization, best.transactions);
// Update state to reflect best is no longer to be linearized.
anc_finder.MarkDone(best.transactions);
if (anc_finder.AllDone()) break;
src_finder.MarkDone(best.transactions);
}
return {std::move(linearization), optimal};
}
} // namespace cluster_linearize
#endif // BITCOIN_CLUSTER_LINEARIZE_H

@ -11,6 +11,7 @@
#include <util/bitset.h>
#include <util/feefrac.h>
#include <algorithm>
#include <stdint.h>
#include <vector>
#include <utility>
@ -140,6 +141,29 @@ public:
}
};
/** A simple linearization algorithm.
*
* This matches Linearize() in interface and behavior, though with fewer optimizations, and using
* just SimpleCandidateFinder rather than AncestorCandidateFinder and SearchCandidateFinder.
*/
template<typename SetType>
std::pair<std::vector<ClusterIndex>, bool> SimpleLinearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations)
{
std::vector<ClusterIndex> linearization;
SimpleCandidateFinder finder(depgraph);
SetType todo = SetType::Fill(depgraph.TxCount());
bool optimal = true;
while (todo.Any()) {
auto [candidate, iterations_done] = finder.FindCandidateSet(max_iterations);
if (iterations_done == max_iterations) optimal = false;
depgraph.AppendTopo(linearization, candidate.transactions);
todo -= candidate.transactions;
finder.MarkDone(candidate.transactions);
max_iterations -= iterations_done;
}
return {std::move(linearization), optimal};
}
/** Given a dependency graph, and a todo set, read a topological subset of todo from reader. */
template<typename SetType>
SetType ReadTopologicalSet(const DepGraph<SetType>& depgraph, const SetType& todo, SpanReader& reader)
@ -458,3 +482,68 @@ FUZZ_TARGET(clusterlin_search_finder)
assert(exh_finder.AllDone());
assert(anc_finder.AllDone());
}
FUZZ_TARGET(clusterlin_linearize)
{
// Verify the behavior of Linearize().
// Retrieve an iteration count, and a depgraph from the fuzz input.
SpanReader reader(buffer);
DepGraph<TestBitSet> depgraph;
uint64_t iter_count{0};
try {
reader >> VARINT(iter_count) >> Using<DepGraphFormatter>(depgraph);
} catch (const std::ios_base::failure&) {}
// Invoke Linearize().
iter_count &= 0x7ffff;
auto [linearization, optimal] = Linearize(depgraph, iter_count);
SanityCheck(depgraph, linearization);
auto chunking = ChunkLinearization(depgraph, linearization);
// If the iteration count is sufficiently high, an optimal linearization must be found.
// Each linearization step can use up to 2^k iterations, with steps k=1..n. That sum is
// 2 * (2^n - 1)
const uint64_t n = depgraph.TxCount();
if (n <= 18 && iter_count > 2U * ((uint64_t{1} << n) - 1U)) {
assert(optimal);
}
// If Linearize claims optimal result, run quality tests.
if (optimal) {
// It must be as good as SimpleLinearize.
auto [simple_linearization, simple_optimal] = SimpleLinearize(depgraph, MAX_SIMPLE_ITERATIONS);
SanityCheck(depgraph, simple_linearization);
auto simple_chunking = ChunkLinearization(depgraph, simple_linearization);
auto cmp = CompareChunks(chunking, simple_chunking);
assert(cmp >= 0);
// If SimpleLinearize finds the optimal result too, they must be equal (if not,
// SimpleLinearize is broken).
if (simple_optimal) assert(cmp == 0);
// Only for very small clusters, test every topologically-valid permutation.
if (depgraph.TxCount() <= 7) {
std::vector<ClusterIndex> perm_linearization(depgraph.TxCount());
for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) perm_linearization[i] = i;
// Iterate over all valid permutations.
do {
// Determine whether perm_linearization is topological.
TestBitSet perm_done;
bool perm_is_topo{true};
for (auto i : perm_linearization) {
perm_done.Set(i);
if (!depgraph.Ancestors(i).IsSubsetOf(perm_done)) {
perm_is_topo = false;
break;
}
}
// If so, verify that the obtained linearization is as good as the permutation.
if (perm_is_topo) {
auto perm_chunking = ChunkLinearization(depgraph, perm_linearization);
auto cmp = CompareChunks(chunking, perm_chunking);
assert(cmp >= 0);
}
} while(std::next_permutation(perm_linearization.begin(), perm_linearization.end()));
}
}
}

@ -7,6 +7,7 @@
#include <cluster_linearize.h>
#include <serialize.h>
#include <span.h>
#include <streams.h>
#include <util/bitset.h>
#include <util/feefrac.h>
@ -331,6 +332,22 @@ void VerifyDepGraphFromCluster(const Cluster<SetType>& cluster, const DepGraph<S
}
}
/** Perform a sanity check on a linearization. */
template<typename SetType>
void SanityCheck(const DepGraph<SetType>& depgraph, Span<const ClusterIndex> linearization)
{
// Check completeness.
assert(linearization.size() == depgraph.TxCount());
TestBitSet done;
for (auto i : linearization) {
// Check transaction position is in range.
assert(i < depgraph.TxCount());
// Check topology and lack of duplicates.
assert((depgraph.Ancestors(i) - done) == TestBitSet::Singleton(i));
done.Set(i);
}
}
} // namespace
#endif // BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H

Loading…
Cancel
Save