Add Binary heap structure (#5084)

Co-authored-by: Ernesto García <ernestognw@gmail.com>
Co-authored-by: cairo <cairoeth@protonmail.com>
This commit is contained in:
Hadrien Croubois
2024-07-23 19:31:26 +02:00
committed by GitHub
parent 9e73c4b581
commit 231fae33f0
16 changed files with 1406 additions and 65 deletions

View File

@ -34,9 +34,10 @@ function generateFromTemplate(file, template, outputPrefix = '') {
for (const [file, template] of Object.entries({
'utils/cryptography/MerkleProof.sol': './templates/MerkleProof.js',
'utils/math/SafeCast.sol': './templates/SafeCast.js',
'utils/structs/Checkpoints.sol': './templates/Checkpoints.js',
'utils/structs/EnumerableSet.sol': './templates/EnumerableSet.js',
'utils/structs/EnumerableMap.sol': './templates/EnumerableMap.js',
'utils/structs/Checkpoints.sol': './templates/Checkpoints.js',
'utils/structs/Heap.sol': './templates/Heap.js',
'utils/SlotDerivation.sol': './templates/SlotDerivation.js',
'utils/StorageSlot.sol': './templates/StorageSlot.js',
'utils/Arrays.sol': './templates/Arrays.js',
@ -49,6 +50,7 @@ for (const [file, template] of Object.entries({
// Tests
for (const [file, template] of Object.entries({
'utils/structs/Checkpoints.t.sol': './templates/Checkpoints.t.js',
'utils/structs/Heap.t.sol': './templates/Heap.t.js',
'utils/Packing.t.sol': './templates/Packing.t.js',
'utils/SlotDerivation.t.sol': './templates/SlotDerivation.t.js',
})) {

View File

@ -5,6 +5,7 @@ const { TYPES } = require('./Arrays.opts');
const header = `\
pragma solidity ^0.8.20;
import {Comparators} from "./Comparators.sol";
import {SlotDerivation} from "./SlotDerivation.sol";
import {StorageSlot} from "./StorageSlot.sol";
import {Math} from "./math/Math.sol";
@ -31,9 +32,9 @@ function sort(
function(${type}, ${type}) pure returns (bool) comp
) internal pure returns (${type}[] memory) {
${
type === 'bytes32'
type === 'uint256'
? '_quickSort(_begin(array), _end(array), comp);'
: 'sort(_castToBytes32Array(array), _castToBytes32Comp(comp));'
: 'sort(_castToUint256Array(array), _castToUint256Comp(comp));'
}
return array;
}
@ -42,7 +43,7 @@ function sort(
* @dev Variant of {sort} that sorts an array of ${type} in increasing order.
*/
function sort(${type}[] memory array) internal pure returns (${type}[] memory) {
${type === 'bytes32' ? 'sort(array, _defaultComp);' : 'sort(_castToBytes32Array(array), _defaultComp);'}
${type === 'uint256' ? 'sort(array, Comparators.lt);' : 'sort(_castToUint256Array(array), Comparators.lt);'}
return array;
}
`;
@ -57,12 +58,12 @@ const quickSort = `\
* IMPORTANT: Memory locations between \`begin\` and \`end\` are not validated/zeroed. This function should
* be used only if the limits are within a memory array.
*/
function _quickSort(uint256 begin, uint256 end, function(bytes32, bytes32) pure returns (bool) comp) private pure {
function _quickSort(uint256 begin, uint256 end, function(uint256, uint256) pure returns (bool) comp) private pure {
unchecked {
if (end - begin < 0x40) return;
// Use first element as pivot
bytes32 pivot = _mload(begin);
uint256 pivot = _mload(begin);
// Position where the pivot should be at the end of the loop
uint256 pos = begin;
@ -84,7 +85,7 @@ function _quickSort(uint256 begin, uint256 end, function(bytes32, bytes32) pure
/**
* @dev Pointer to the memory location of the first element of \`array\`.
*/
function _begin(bytes32[] memory array) private pure returns (uint256 ptr) {
function _begin(uint256[] memory array) private pure returns (uint256 ptr) {
/// @solidity memory-safe-assembly
assembly {
ptr := add(array, 0x20)
@ -95,16 +96,16 @@ function _begin(bytes32[] memory array) private pure returns (uint256 ptr) {
* @dev Pointer to the memory location of the first memory word (32bytes) after \`array\`. This is the memory word
* that comes just after the last element of the array.
*/
function _end(bytes32[] memory array) private pure returns (uint256 ptr) {
function _end(uint256[] memory array) private pure returns (uint256 ptr) {
unchecked {
return _begin(array) + array.length * 0x20;
}
}
/**
* @dev Load memory word (as a bytes32) at location \`ptr\`.
* @dev Load memory word (as a uint256) at location \`ptr\`.
*/
function _mload(uint256 ptr) private pure returns (bytes32 value) {
function _mload(uint256 ptr) private pure returns (uint256 value) {
assembly {
value := mload(ptr)
}
@ -123,16 +124,9 @@ function _swap(uint256 ptr1, uint256 ptr2) private pure {
}
`;
const defaultComparator = `\
/// @dev Comparator for sorting arrays in increasing order.
function _defaultComp(bytes32 a, bytes32 b) private pure returns (bool) {
return a < b;
}
`;
const castArray = type => `\
/// @dev Helper: low level cast ${type} memory array to uint256 memory array
function _castToBytes32Array(${type}[] memory input) private pure returns (bytes32[] memory output) {
function _castToUint256Array(${type}[] memory input) private pure returns (uint256[] memory output) {
assembly {
output := input
}
@ -140,10 +134,10 @@ function _castToBytes32Array(${type}[] memory input) private pure returns (bytes
`;
const castComparator = type => `\
/// @dev Helper: low level cast ${type} comp function to bytes32 comp function
function _castToBytes32Comp(
/// @dev Helper: low level cast ${type} comp function to uint256 comp function
function _castToUint256Comp(
function(${type}, ${type}) pure returns (bool) input
) private pure returns (function(bytes32, bytes32) pure returns (bool) output) {
) private pure returns (function(uint256, uint256) pure returns (bool) output) {
assembly {
output := input
}
@ -374,12 +368,11 @@ module.exports = format(
'using StorageSlot for bytes32;',
'',
// sorting, comparator, helpers and internal
sort('bytes32'),
TYPES.filter(type => type !== 'bytes32').map(sort),
sort('uint256'),
TYPES.filter(type => type !== 'uint256').map(sort),
quickSort,
defaultComparator,
TYPES.filter(type => type !== 'bytes32').map(castArray),
TYPES.filter(type => type !== 'bytes32').map(castComparator),
TYPES.filter(type => type !== 'uint256').map(castArray),
TYPES.filter(type => type !== 'uint256').map(castComparator),
// lookup
search,
// unsafe (direct) storage and memory access

View File

@ -0,0 +1,328 @@
const format = require('../format-lines');
const { TYPES } = require('./Heap.opts');
const { capitalize } = require('../../helpers');
/* eslint-disable max-len */
const header = `\
pragma solidity ^0.8.20;
import {Math} from "../math/Math.sol";
import {SafeCast} from "../math/SafeCast.sol";
import {Comparators} from "../Comparators.sol";
import {Panic} from "../Panic.sol";
/**
* @dev Library for managing https://en.wikipedia.org/wiki/Binary_heap[binary heap] that can be used as
* https://en.wikipedia.org/wiki/Priority_queue[priority queue].
*
* Heaps are represented as an array of Node objects. This array stores two overlapping structures:
* * A tree structure where the first element (index 0) is the root, and where the node at index i is the child of the
* node at index (i-1)/2 and the father of nodes at index 2*i+1 and 2*i+2. Each node stores the index (in the array)
* where the corresponding value is stored.
* * A list of payloads values where each index contains a value and a lookup index. The type of the value depends on
* the variant being used. The lookup is the index of the node (in the tree) that points to this value.
*
* Some invariants:
* \`\`\`
* i == heap.data[heap.data[i].index].lookup // for all indices i
* i == heap.data[heap.data[i].lookup].index // for all indices i
* \`\`\`
*
* The structure is ordered so that each node is bigger than its parent. An immediate consequence is that the
* highest priority value is the one at the root. This value can be lookup up in constant time (O(1)) at
* \`heap.data[heap.data[0].index].value\`
*
* The structure is designed to perform the following operations with the corresponding complexities:
*
* * peek (get the highest priority in set): O(1)
* * insert (insert a value in the set): 0(log(n))
* * pop (remove the highest priority value in set): O(log(n))
* * replace (replace the highest priority value in set with a new value): O(log(n))
* * length (get the number of elements in the set): O(1)
* * clear (remove all elements in the set): O(1)
*/
`;
const generate = ({ struct, node, valueType, indexType, blockSize }) => `\
/**
* @dev Binary heap that support values of type ${valueType}.
*
* Each element of that structures uses ${blockSize} storage slots.
*/
struct ${struct} {
${node}[] data;
}
/**
* @dev Internal node type for ${struct}. Stores a value of type ${valueType}.
*/
struct ${node} {
${valueType} value;
${indexType} index; // position -> value
${indexType} lookup; // value -> position
}
/**
* @dev Lookup the root element of the heap.
*/
function peek(${struct} storage self) internal view returns (${valueType}) {
// self.data[0] will \`ARRAY_ACCESS_OUT_OF_BOUNDS\` panic if heap is empty.
return _unsafeNodeAccess(self, self.data[0].index).value;
}
/**
* @dev Remove (and return) the root element for the heap using the default comparator.
*
* NOTE: All inserting and removal from a heap should always be done using the same comparator. Mixing comparator
* during the lifecycle of a heap will result in undefined behavior.
*/
function pop(${struct} storage self) internal returns (${valueType}) {
return pop(self, Comparators.lt);
}
/**
* @dev Remove (and return) the root element for the heap using the provided comparator.
*
* NOTE: All inserting and removal from a heap should always be done using the same comparator. Mixing comparator
* during the lifecycle of a heap will result in undefined behavior.
*/
function pop(
${struct} storage self,
function(uint256, uint256) view returns (bool) comp
) internal returns (${valueType}) {
unchecked {
${indexType} size = length(self);
if (size == 0) Panic.panic(Panic.EMPTY_ARRAY_POP);
${indexType} last = size - 1;
// get root location (in the data array) and value
${node} storage rootNode = _unsafeNodeAccess(self, 0);
${indexType} rootIdx = rootNode.index;
${node} storage rootData = _unsafeNodeAccess(self, rootIdx);
${node} storage lastNode = _unsafeNodeAccess(self, last);
${valueType} rootDataValue = rootData.value;
// if root is not the last element of the data array (that will get pop-ed), reorder the data array.
if (rootIdx != last) {
// get details about the value stored in the last element of the array (that will get pop-ed)
${indexType} lastDataIdx = lastNode.lookup;
${valueType} lastDataValue = lastNode.value;
// copy these values to the location of the root (that is safe, and that we no longer use)
rootData.value = lastDataValue;
rootData.lookup = lastDataIdx;
// update the tree node that used to point to that last element (value now located where the root was)
_unsafeNodeAccess(self, lastDataIdx).index = rootIdx;
}
// get last leaf location (in the data array) and value
${indexType} lastIdx = lastNode.index;
${valueType} lastValue = _unsafeNodeAccess(self, lastIdx).value;
// move the last leaf to the root, pop last leaf ...
rootNode.index = lastIdx;
_unsafeNodeAccess(self, lastIdx).lookup = 0;
self.data.pop();
// ... and heapify
_siftDown(self, last, 0, lastValue, comp);
// return root value
return rootDataValue;
}
}
/**
* @dev Insert a new element in the heap using the default comparator.
*
* NOTE: All inserting and removal from a heap should always be done using the same comparator. Mixing comparator
* during the lifecycle of a heap will result in undefined behavior.
*/
function insert(${struct} storage self, ${valueType} value) internal {
insert(self, value, Comparators.lt);
}
/**
* @dev Insert a new element in the heap using the provided comparator.
*
* NOTE: All inserting and removal from a heap should always be done using the same comparator. Mixing comparator
* during the lifecycle of a heap will result in undefined behavior.
*/
function insert(
${struct} storage self,
${valueType} value,
function(uint256, uint256) view returns (bool) comp
) internal {
${indexType} size = length(self);
if (size == type(${indexType}).max) Panic.panic(Panic.RESOURCE_ERROR);
self.data.push(${struct}Node({index: size, lookup: size, value: value}));
_siftUp(self, size, value, comp);
}
/**
* @dev Return the root element for the heap, and replace it with a new value, using the default comparator.
* This is equivalent to using {pop} and {insert}, but requires only one rebalancing operation.
*
* NOTE: All inserting and removal from a heap should always be done using the same comparator. Mixing comparator
* during the lifecycle of a heap will result in undefined behavior.
*/
function replace(${struct} storage self, ${valueType} newValue) internal returns (${valueType}) {
return replace(self, newValue, Comparators.lt);
}
/**
* @dev Return the root element for the heap, and replace it with a new value, using the provided comparator.
* This is equivalent to using {pop} and {insert}, but requires only one rebalancing operation.
*
* NOTE: All inserting and removal from a heap should always be done using the same comparator. Mixing comparator
* during the lifecycle of a heap will result in undefined behavior.
*/
function replace(
${struct} storage self,
${valueType} newValue,
function(uint256, uint256) view returns (bool) comp
) internal returns (${valueType}) {
${indexType} size = length(self);
if (size == 0) Panic.panic(Panic.EMPTY_ARRAY_POP);
// position of the node that holds the data for the root
${indexType} rootIdx = _unsafeNodeAccess(self, 0).index;
// storage pointer to the node that holds the data for the root
${node} storage rootData = _unsafeNodeAccess(self, rootIdx);
// cache old value and replace it
${valueType} oldValue = rootData.value;
rootData.value = newValue;
// re-heapify
_siftDown(self, size, 0, newValue, comp);
// return old root value
return oldValue;
}
/**
* @dev Returns the number of elements in the heap.
*/
function length(${struct} storage self) internal view returns (${indexType}) {
return self.data.length.to${capitalize(indexType)}();
}
/**
* @dev Removes all elements in the heap.
*/
function clear(${struct} storage self) internal {
${struct}Node[] storage data = self.data;
/// @solidity memory-safe-assembly
assembly {
sstore(data.slot, 0)
}
}
/*
* @dev Swap node \`i\` and \`j\` in the tree.
*/
function _swap(${struct} storage self, ${indexType} i, ${indexType} j) private {
${node} storage ni = _unsafeNodeAccess(self, i);
${node} storage nj = _unsafeNodeAccess(self, j);
${indexType} ii = ni.index;
${indexType} jj = nj.index;
// update pointers to the data (swap the value)
ni.index = jj;
nj.index = ii;
// update lookup pointers for consistency
_unsafeNodeAccess(self, ii).lookup = j;
_unsafeNodeAccess(self, jj).lookup = i;
}
/**
* @dev Perform heap maintenance on \`self\`, starting at position \`pos\` (with the \`value\`), using \`comp\` as a
* comparator, and moving toward the leafs of the underlying tree.
*
* NOTE: This is a private function that is called in a trusted context with already cached parameters. \`length\`
* and \`value\` could be extracted from \`self\` and \`pos\`, but that would require redundant storage read. These
* parameters are not verified. It is the caller role to make sure the parameters are correct.
*/
function _siftDown(
${struct} storage self,
${indexType} size,
${indexType} pos,
${valueType} value,
function(uint256, uint256) view returns (bool) comp
) private {
uint256 left = 2 * pos + 1; // this could overflow ${indexType}
uint256 right = 2 * pos + 2; // this could overflow ${indexType}
if (right < size) {
// the check guarantees that \`left\` and \`right\` are both valid uint32
${indexType} lIndex = ${indexType}(left);
${indexType} rIndex = ${indexType}(right);
${valueType} lValue = _unsafeNodeAccess(self, _unsafeNodeAccess(self, lIndex).index).value;
${valueType} rValue = _unsafeNodeAccess(self, _unsafeNodeAccess(self, rIndex).index).value;
if (comp(lValue, value) || comp(rValue, value)) {
${indexType} index = ${indexType}(comp(lValue, rValue).ternary(lIndex, rIndex));
_swap(self, pos, index);
_siftDown(self, size, index, value, comp);
}
} else if (left < size) {
// the check guarantees that \`left\` is a valid uint32
${indexType} lIndex = ${indexType}(left);
${valueType} lValue = _unsafeNodeAccess(self, _unsafeNodeAccess(self, lIndex).index).value;
if (comp(lValue, value)) {
_swap(self, pos, lIndex);
_siftDown(self, size, lIndex, value, comp);
}
}
}
/**
* @dev Perform heap maintenance on \`self\`, starting at position \`pos\` (with the \`value\`), using \`comp\` as a
* comparator, and moving toward the root of the underlying tree.
*
* NOTE: This is a private function that is called in a trusted context with already cached parameters. \`value\`
* could be extracted from \`self\` and \`pos\`, but that would require redundant storage read. This parameters is not
* verified. It is the caller role to make sure the parameters are correct.
*/
function _siftUp(
${struct} storage self,
${indexType} pos,
${valueType} value,
function(uint256, uint256) view returns (bool) comp
) private {
unchecked {
while (pos > 0) {
${indexType} parent = (pos - 1) / 2;
${valueType} parentValue = _unsafeNodeAccess(self, _unsafeNodeAccess(self, parent).index).value;
if (comp(parentValue, value)) break;
_swap(self, pos, parent);
pos = parent;
}
}
}
function _unsafeNodeAccess(
${struct} storage self,
${indexType} pos
) private pure returns (${node} storage result) {
assembly ("memory-safe") {
mstore(0x00, self.slot)
result.slot := add(keccak256(0x00, 0x20), ${blockSize == 1 ? 'pos' : `mul(pos, ${blockSize})`})
}
}
`;
// GENERATE
module.exports = format(
header.trimEnd(),
'library Heap {',
format(
[].concat(
'using Math for *;',
'using SafeCast for *;',
'',
TYPES.map(type => generate(type)),
),
).trimEnd(),
'}',
);

View File

@ -0,0 +1,13 @@
const makeType = (valueSize, indexSize) => ({
struct: `Uint${valueSize}Heap`,
node: `Uint${valueSize}HeapNode`,
valueSize,
valueType: `uint${valueSize}`,
indexSize,
indexType: `uint${indexSize}`,
blockSize: Math.ceil((valueSize + 2 * indexSize) / 256),
});
module.exports = {
TYPES: [makeType(256, 64), makeType(208, 24)],
};

View File

@ -0,0 +1,89 @@
const format = require('../format-lines');
const { TYPES } = require('./Heap.opts');
/* eslint-disable max-len */
const header = `\
pragma solidity ^0.8.20;
import {Test} from "forge-std/Test.sol";
import {Math} from "@openzeppelin/contracts/utils/math/Math.sol";
import {Heap} from "@openzeppelin/contracts/utils/structs/Heap.sol";
import {Comparators} from "@openzeppelin/contracts/utils/Comparators.sol";
`;
const generate = ({ struct, valueType }) => `\
contract ${struct}Test is Test {
using Heap for Heap.${struct};
Heap.${struct} internal heap;
function _validateHeap(function(uint256, uint256) view returns (bool) comp) internal {
for (uint32 i = 0; i < heap.length(); ++i) {
// lookups
assertEq(i, heap.data[heap.data[i].index].lookup);
assertEq(i, heap.data[heap.data[i].lookup].index);
// ordering: each node has a value bigger then its parent
if (i > 0)
assertFalse(comp(heap.data[heap.data[i].index].value, heap.data[heap.data[(i - 1) / 2].index].value));
}
}
function testFuzz(${valueType}[] calldata input) public {
vm.assume(input.length < 0x20);
assertEq(heap.length(), 0);
uint256 min = type(uint256).max;
for (uint256 i = 0; i < input.length; ++i) {
heap.insert(input[i]);
assertEq(heap.length(), i + 1);
_validateHeap(Comparators.lt);
min = Math.min(min, input[i]);
assertEq(heap.peek(), min);
}
uint256 max = 0;
for (uint256 i = 0; i < input.length; ++i) {
${valueType} top = heap.peek();
${valueType} pop = heap.pop();
assertEq(heap.length(), input.length - i - 1);
_validateHeap(Comparators.lt);
assertEq(pop, top);
assertGe(pop, max);
max = pop;
}
}
function testFuzzGt(${valueType}[] calldata input) public {
vm.assume(input.length < 0x20);
assertEq(heap.length(), 0);
uint256 max = 0;
for (uint256 i = 0; i < input.length; ++i) {
heap.insert(input[i], Comparators.gt);
assertEq(heap.length(), i + 1);
_validateHeap(Comparators.gt);
max = Math.max(max, input[i]);
assertEq(heap.peek(), max);
}
uint256 min = type(uint256).max;
for (uint256 i = 0; i < input.length; ++i) {
${valueType} top = heap.peek();
${valueType} pop = heap.pop(Comparators.gt);
assertEq(heap.length(), input.length - i - 1);
_validateHeap(Comparators.gt);
assertEq(pop, top);
assertLe(pop, min);
min = pop;
}
}
}
`;
// GENERATE
module.exports = format(header, ...TYPES.map(type => generate(type)));