|
| node () |
| Constructor.
|
|
template<class qds > |
void | generate_query_structure (qds *qs) |
|
void | has_leaves (bool leaves) |
| Set whether the children of the node are leaves or internal nodes. More...
|
|
bool | has_leaves () const |
| Get the type of children for the node. More...
|
|
bool | at (dtype index) const |
| Access the value of the indexth element of the logical structure. More...
|
|
int | set (dtype index, bool v) |
| Set the value of the logical indexth element to v. More...
|
|
dtype | rank (dtype index) const |
| Counts number of one bits up to the indexth logical element. More...
|
|
dtype | select (dtype count) const |
| Calculates the index of the counttu 1-bit. More...
|
|
template<class allocator > |
void | deallocate (allocator *alloc) |
| Recursively deallocates all children. More...
|
|
uint8_t | child_count () const |
| Get the number of children of this node. More...
|
|
void ** | children () |
| Get pointer to the children of this node. More...
|
|
branching * | child_sizes () |
| Get pointer to the cumulative child sizes. More...
|
|
branching * | child_sums () |
| Get pointer to the cumulative child sums. More...
|
|
dtype | size () const |
| Logical number of elements stored in the subtree. More...
|
|
dtype | p_sum () const |
| Logical number of 1-bits stored in the subtree. More...
|
|
template<class child > |
void | append_child (child *new_child) |
| Add child to the subtree. More...
|
|
void * | child (uint8_t i) |
| Get ith child of the node. More...
|
|
template<class allocator > |
void | insert (dtype index, bool value, allocator *alloc) |
| Insert "value" at "index". More...
|
|
template<class allocator > |
bool | remove (dtype index, allocator *alloc) |
| Remove the indexth element. More...
|
|
void | clear_first (uint8_t elems) |
| Remove the first "elems" elements form this node. More...
|
|
void | transfer_append (node *other, uint8_t elems) |
| Moves the fist "elems" elements from "other" to the end of "this". More...
|
|
void | clear_last (uint8_t elems) |
| Remove the last "elems" elemets from the node. More...
|
|
void | transfer_prepend (node *other, uint8_t elems) |
| Moves the last "elems" elements from "other" to the start of "this". More...
|
|
void | append_all (node *other) |
| Copies all children from "other" to the end of this node. More...
|
|
uint64_t | bits_size () const |
| Size of the subtree in "allocated" bits. More...
|
|
void | flush () |
|
uint64_t | validate () const |
| Check that the subtree structure is internally consistent. More...
|
|
void | print (bool internal_only) const |
| Outputs this subtree as json. More...
|
|
|
template<class allocator > |
void | rebalance_leaf (uint8_t index, leaf_type *leaf, allocator *alloc) |
| Ensure that there is space for insertion in the child leaves. More...
|
|
template<class allocator > |
void | leaf_insert (dtype index, bool value, allocator *alloc) |
| Insertion operation if the children are leaves. More...
|
|
template<class allocator > |
void | rebalance_node (uint8_t index, allocator *alloc) |
| Ensure that there is space for insertion in the child nodes. More...
|
|
template<class allocator > |
void | node_insert (dtype index, bool value, allocator *alloc) |
| Insertion operation if the children are internal nodes. More...
|
|
template<class allocator > |
void | rebalance_leaves_right (leaf_type *a, leaf_type *b, allocator *alloc) |
| Transfer elements from the "right" leaf to the "left" leaf. More...
|
|
template<class allocator > |
void | rebalance_leaves_left (leaf_type *a, leaf_type *b, uint8_t idx, allocator *alloc) |
| Transfer elements from the "left" leaf to the "right" leaf. More...
|
|
template<class allocator > |
void | merge_leaves (leaf_type *a, leaf_type *b, uint8_t idx, allocator *alloc) |
| Merge the right leaf into the left leaf. More...
|
|
template<class allocator > |
bool | leaf_remove (dtype index, allocator *alloc) |
| Removal used when the children are leaves. More...
|
|
void | rebalance_nodes_right (node *a, node *b, uint8_t idx) |
| Transfer elements from the "right" node to the "left" node. More...
|
|
void | rebalance_nodes_left (node *a, node *b, uint8_t idx) |
| Transfer elements from the "lef" node to the "right" node. More...
|
|
template<class allocator > |
void | merge_nodes (node *a, node *b, uint8_t idx, allocator *alloc) |
| Transfer all from the "right" node to the "left" node. More...
|
|
template<class allocator > |
bool | node_remove (dtype index, allocator *alloc) |
| Removal operation when children are internal nodes. More...
|
|
template<class leaf_type, class dtype, uint64_t leaf_size, uint8_t branches>
class bv::node< leaf_type, dtype, leaf_size, branches >
Internal node for use with bit vector b-tree structures.
Intended for use with bv::bit_vector and bv::leaf, to support the dynamic b-tree bit vector structure.
Practical limitations
The maximum logical size of the bit vector is \(2^{b - 1} - 1\) where b is the number of bits available in dtype. This is due to the "sign bit" being used to speed up branching. So for 64-bit words the maximum data structure size is 9223372036854775807 and for 32-bot words the limit is 2147483647.
The internal nodes need to keep track of leaf sizes, since splitting, merging and balancing is done based on a top-down approach, where an insertion or removal already submitted to a leaf cannot cause any rebalancing.
The maximum leaf size practically needs to be divisible by 128 due to the practical allocation scheme allocating an even amount of 64-bit integers for leaf storage. In addition a minimum size of 128 causes edge cases in balancing that can result in undefined behaviour. Thus a minimum leaf size of 256 and divisibility of maximum leaf size by 128 is enforced by static assertions.
Buffered bv::leaf instances can not be bigger than \(2^{24} - 1\).
The branching factor for the internal nodes need to be 8, 16, 32, 64 or 128 due to how the branchless binary search is written. Further limited by the 7 bits available in the child counter.
Cache line size
Branch selection uses binary search, which significantly benefits from agressive cache prefetching. It is suggested that a definition of CACHLE_LINE
is given with the value of the cache line size for the (running) architecture. Make attempts to retrieve this information and passing the correct value to the compiler with the -DCACHE_LINE
flag.
If the prefetching fails significantly due to prefetching being unavailable or the wrong cache line size, the current binary search based branch selection will likely be noticeably slower than a naive linear search.
- Template Parameters
-
leaf_type | Type of leaf to use in the tree structure (E.g. bv::leaf). |
dtype | Integer type to use for indexing (uint32_t or uint64_t). |
leaf_size | Maximum size of a leaf node. |
branches | Maximum branching factor of internal nodes. |
template<class leaf_type , class dtype , uint64_t leaf_size, uint8_t branches>
template<class allocator >
void bv::node< leaf_type, dtype, leaf_size, branches >::rebalance_leaf |
( |
uint8_t |
index, |
|
|
leaf_type * |
leaf, |
|
|
allocator * |
alloc |
|
) |
| |
|
inlineprotected |
Ensure that there is space for insertion in the child leaves.
If there is sufficient space in either sibling, elements will be transferred to the sibling with more room. If there is insufficient space in the siblings, a new leaf is allocated and elements are transferred from the target leaf to one of the siblings.
Generally delays allocation of a new leaf as long as posisble, and when reallocating, generates a new leaf that is \(\approx \frac{2}{3}\) full.
- Template Parameters
-
- Parameters
-
index | Location of the full leaf. |
leaf | Pointer to the full leaf. |
alloc | Instance of allocator to use for reallocation. |
template<class leaf_type , class dtype , uint64_t leaf_size, uint8_t branches>
template<class allocator >
void bv::node< leaf_type, dtype, leaf_size, branches >::rebalance_node |
( |
uint8_t |
index, |
|
|
allocator * |
alloc |
|
) |
| |
|
inlineprotected |
Ensure that there is space for insertion in the child nodes.
If there is sufficient space in either sibling, elements will be transferred to the sibling with more room. If there is insufficient space in the siblings, a new node is allocated and elements are transferred from the target node to one of the siblings.
Generally delays allocation of a new node as long as possible, and when reallocating, generates a new node that is \(\approx \frac{2}{3}\) full.
- Template Parameters
-
- Parameters
-
index | Location of the full node. |
alloc | Allocator instance to use for allocation. |
template<class leaf_type , class dtype , uint64_t leaf_size, uint8_t branches>
uint64_t bv::node< leaf_type, dtype, leaf_size, branches >::validate |
( |
| ) |
const |
|
inline |
Check that the subtree structure is internally consistent.
Checks that cumulative sizes and sums agree with the reported sizes and sums of the children.
If the children are leaves, also checks the validity of capacity for the child.
If compiled with NDEBUG
defined, this function will do essentially nothing since all checks are based on C assertions.
Also does not say anything about the correctness of the subtree given the sequence of operations up to this point. Only guarantees that the structure is internally consistent and "could" be the result of some valid sequence of operations.
Number of nodes are reported for verification of allocation counts.
- Returns
- Number of nodes in this subtree.