Bit vector  0.9
Fast and space efficient dynamic bit vector library.
bit_vector.hpp
1 #ifndef BV_BIT_VECTOR_HPP
2 #define BV_BIT_VECTOR_HPP
3 
4 #ifndef WORD_BITS
5 #define WORD_BITS 64
6 #endif
7 
8 #include <cassert>
9 #include <cstdint>
10 
11 #include "query_support.hpp"
12 
13 namespace bv {
14 
35 template <class leaf, class node, class allocator, uint64_t leaf_size,
36  uint8_t branches, class dtype>
37 class bit_vector {
38  protected:
39  bool root_is_leaf_ = true;
40  bool owned_allocator_ = false;
42  node* n_root_;
45  leaf* l_root_;
47  allocator* allocator_;
48 
59  void split_root() {
60  node* new_root = allocator_->template allocate_node<node>();
61  node* sibling = allocator_->template allocate_node<node>();
62  if (n_root_->has_leaves()) sibling->has_leaves(true);
63  sibling->transfer_prepend(n_root_, branches / 2);
64  new_root->append_child(n_root_);
65  new_root->append_child(sibling);
66  n_root_ = new_root;
67  }
68 
69  public:
80  bit_vector(allocator* alloc) {
81  allocator_ = alloc;
82  l_root_ = allocator_->template allocate_leaf<leaf>(leaf_size /
83  (2 * WORD_BITS));
84  }
85 
93  allocator_ = new allocator();
94  owned_allocator_ = true;
95  l_root_ = allocator_->template allocate_leaf<leaf>(leaf_size /
96  (2 * WORD_BITS));
97  }
98 
108  if (root_is_leaf_) {
109  allocator_->deallocate_leaf(l_root_);
110  } else {
112  allocator_->deallocate_node(n_root_);
113  }
114  if (owned_allocator_) {
115  delete (allocator_);
116  }
117  }
118 
128  template <dtype block_size = 2048>
131  static_assert(block_size * 3 <= leaf_size);
132  static_assert(block_size >= 2 * 64);
133  if (root_is_leaf_) {
134  [[unlikely]] qs->append(l_root_);
135  } else {
136  n_root_->generate_query_structure(qs);
137  }
138  qs->finalize();
139  }
140 
149  template <dtype block_size = leaf_size / 3>
151  static_assert(block_size * 3 <= leaf_size);
155  return qs;
156  }
157 
174  void insert(uint64_t index, bool value) {
175 #ifdef DEBUG
176  if (index > size()) {
177  std::cerr << "Invalid insertion to index " << index << " for "
178  << size() << " element bit vector." << std::endl;
179  assert(index <= size());
180  }
181 #endif
182  if (root_is_leaf_) {
183  if (l_root_->need_realloc()) {
184  if (l_root_->size() >= leaf_size) {
185  leaf* sibling = allocator_->template allocate_leaf<leaf>(
186  leaf_size / (2 * WORD_BITS));
187  sibling->transfer_append(l_root_, leaf_size / 2);
188  n_root_ = allocator_->template allocate_node<node>();
189  n_root_->has_leaves(true);
190  n_root_->append_child(sibling);
192  root_is_leaf_ = false;
193  n_root_->insert(index, value, allocator_);
194  } else {
195  uint64_t cap = l_root_->capacity();
196  l_root_ =
197  allocator_->reallocate_leaf(l_root_, cap, cap + 2);
198  [[likely]] l_root_->insert(index, value);
199  }
200  } else {
201  [[likely]] l_root_->insert(index, value);
202  }
203  } else {
204  if (n_root_->child_count() == branches) {
205  [[unlikely]] split_root();
206  }
207  [[likely]] n_root_->insert(index, value, allocator_);
208  }
209  }
210 
233  bool remove(uint64_t index) {
234  if (root_is_leaf_) {
235  [[unlikely]] return l_root_->remove(index);
236  } else {
237  bool v = n_root_->remove(index, allocator_);
238  if (n_root_->child_count() == 1) {
239  if (n_root_->has_leaves()) {
240  l_root_ = reinterpret_cast<leaf*>(n_root_->child(0));
241  root_is_leaf_ = true;
242  allocator_->deallocate_node(n_root_);
243  } else {
244  node* new_root = reinterpret_cast<node*>(n_root_->child(0));
245  allocator_->deallocate_node(n_root_);
246  n_root_ = new_root;
247  }
248  [[unlikely]] (void(0));
249  }
250  return v;
251  }
252  }
253 
263  uint64_t sum() const {
264  return !root_is_leaf_ ? n_root_->p_sum() : l_root_->p_sum();
265  }
266 
275  uint64_t size() const {
276  return !root_is_leaf_ ? n_root_->size() : l_root_->size();
277  }
278 
297  bool at(uint64_t index) const {
298  return !root_is_leaf_ ? n_root_->at(index) : l_root_->at(index);
299  }
300 
315  uint64_t rank(uint64_t index) const {
316  return !root_is_leaf_ ? n_root_->rank(index) : l_root_->rank(index);
317  }
318 
333  uint64_t select(uint64_t count) const {
334  return !root_is_leaf_ ? n_root_->select(count) : l_root_->select(count);
335  }
336 
349  void set(uint64_t index, bool value) {
350  !root_is_leaf_ ? n_root_->set(index, value)
351  : l_root_->set(index, value);
352  }
353 
361  void flush() { root_is_leaf_ ? l_root_->commit() : n_root_->flush(); }
362 
374  uint64_t bit_size() const {
375  uint64_t tree_size =
377  return 8 * sizeof(bit_vector) + tree_size;
378  }
379 
394  void validate() const {
395  if (owned_allocator_) {
396  uint64_t allocs = allocator_->live_allocations();
397  if (root_is_leaf_) {
398  assert(allocs == l_root_->validate());
399  } else {
400  assert(allocs == n_root_->validate());
401  }
402  } else {
403  if (root_is_leaf_)
404  l_root_->validate();
405  else
406  n_root_->validate();
407  }
408  }
409 
416  void print(bool internal_only) const {
417  root_is_leaf_ ? l_root_->print(internal_only)
418  : n_root_->print(internal_only);
419  std::cout << std::endl;
420  }
421 };
422 } // namespace bv
423 #endif
bv::bit_vector::set
void set(uint64_t index, bool value)
Sets the bit at "index" to "value".
Definition: bit_vector.hpp:349
bv::leaf::set
int set(const uint64_t i, const bool x)
Set the ith bit to x.
Definition: leaf.hpp:274
bv::bit_vector::generate_query_structure
void generate_query_structure(query_support< dtype, leaf, block_size > *qs) const
Populate a given query support stucture using this
Definition: bit_vector.hpp:129
bv::node::set
int set(dtype index, bool v)
Set the value of the logical indexth element to v.
Definition: node.hpp:174
bv::node::deallocate
void deallocate(allocator *alloc)
Recursively deallocates all children.
Definition: node.hpp:258
bv::leaf::validate
uint64_t validate() const
Ensure that the leaf is in a valid state.
Definition: leaf.hpp:1069
bv::bit_vector::sum
uint64_t sum() const
Number of 1-bits in the data structure.
Definition: bit_vector.hpp:263
bv::bit_vector::generate_query_structure
query_support< dtype, leaf, block_size > * generate_query_structure() const
Create and Populate a query support structure using this
Definition: bit_vector.hpp:150
bv::bit_vector::bit_size
uint64_t bit_size() const
Total size of data structure allocations in bits.
Definition: bit_vector.hpp:374
bv::bit_vector
Container class for dynamic b-tree bit vector stuctures.
Definition: bit_vector.hpp:37
bv::bit_vector::size
uint64_t size() const
Number of elements stored in the data structure.
Definition: bit_vector.hpp:275
bv::query_support::append
void append(leaf_type *leaf)
Add leaf reference to the support structure.
Definition: query_support.hpp:66
bv::query_support
Support structure for bv::bit_vector to enable fast queries.
Definition: query_support.hpp:49
bv::leaf::transfer_append
void transfer_append(leaf *other, uint64_t elems)
Move "elems" elements from the start of "other" to the end of "this".
Definition: leaf.hpp:738
bv::bit_vector::owned_allocator_
bool owned_allocator_
True if deallocating the allocator is the responsibilty of the data structure.
Definition: bit_vector.hpp:41
bv::bit_vector::flush
void flush()
Recursively flushes all buffers in the data structure.
Definition: bit_vector.hpp:361
bv::leaf::at
bool at(const uint32_t i) const
Get the value of the ith element in the leaf.
Definition: leaf.hpp:101
bv::leaf::p_sum
uint32_t p_sum() const
Getter for p_sum_.
Definition: leaf.hpp:122
bv::bit_vector::root_is_leaf_
bool root_is_leaf_
Value indicating whether the root is in n_root_ or l_root_.
Definition: bit_vector.hpp:39
bv::node::rank
dtype rank(dtype index) const
Counts number of one bits up to the indexth logical element.
Definition: node.hpp:202
bv::bit_vector::rank
uint64_t rank(uint64_t index) const
Number of 1-bits up to position index.
Definition: bit_vector.hpp:315
bv::bit_vector::~bit_vector
~bit_vector()
Deconstructor that deallocates the entire data structure.
Definition: bit_vector.hpp:107
bv::node::p_sum
dtype p_sum() const
Logical number of 1-bits stored in the subtree.
Definition: node.hpp:323
bv::leaf::print
void print(bool internal_only) const
Output data stored in the leaf as json.
Definition: leaf.hpp:1096
bv::bit_vector::bit_vector
bit_vector(allocator *alloc)
Bit vector constructor with existing allocator.
Definition: bit_vector.hpp:80
bv::bv
simple_bv< 8, 16384, 64 > bv
Default dynamic bit vector type.
Definition: bv.hpp:94
bv::node::insert
void insert(dtype index, bool value, allocator *alloc)
Insert "value" at "index".
Definition: node.hpp:369
bv::node::append_child
void append_child(child *new_child)
Add child to the subtree.
Definition: node.hpp:337
bv::leaf::capacity
uint32_t capacity() const
Size of the leaf-associated data storage in 64-bit words.
Definition: leaf.hpp:637
bv::bit_vector::split_root
void split_root()
Increases the height of the tree by one level.
Definition: bit_vector.hpp:59
bv::query_support::finalize
void finalize()
Prepare the structure for querying.
Definition: query_support.hpp:88
bv::leaf
Simple flat dynamic bit vector for use as a leaf for a dynamic b-tree bit vector.
Definition: leaf.hpp:47
bv::node::child
void * child(uint8_t i)
Get ith child of the node.
Definition: node.hpp:353
bv::node::print
void print(bool internal_only) const
Outputs this subtree as json.
Definition: node.hpp:606
bv::node::has_leaves
void has_leaves(bool leaves)
Set whether the children of the node are leaves or internal nodes.
Definition: node.hpp:131
bv::leaf::insert
void insert(const uint64_t i, const bool x)
Insert x into position i.
Definition: leaf.hpp:140
bv::leaf::remove
bool remove(const uint64_t i)
Remove the ith bit from the leaf.
Definition: leaf.hpp:207
bv::bit_vector::remove
bool remove(uint64_t index)
Remove element at "index".
Definition: bit_vector.hpp:233
bv::leaf::size
uint32_t size() const
Getter for size_.
Definition: leaf.hpp:124
bv::bit_vector::bit_vector
bit_vector()
Default constructor that creates an owned allocator.
Definition: bit_vector.hpp:92
bv::node::size
dtype size() const
Logical number of elements stored in the subtree.
Definition: node.hpp:314
bv::bit_vector::l_root_
leaf * l_root_
Root if a single leaf is sufficient.
Definition: bit_vector.hpp:46
bv::node::remove
bool remove(dtype index, allocator *alloc)
Remove the indexth element.
Definition: node.hpp:389
bv::bit_vector::allocator_
allocator * allocator_
Pointer to allocator used for allocating internal nodes and leaves.
Definition: bit_vector.hpp:47
bv::node::select
dtype select(dtype count) const
Calculates the index of the counttu 1-bit.
Definition: node.hpp:230
bv::leaf::need_realloc
bool need_realloc() const
Determine if reallocation / splitting is required prior to additional insertion of new elements.
Definition: leaf.hpp:628
bv::bit_vector::at
bool at(uint64_t index) const
Retrieves the value of the indexth element in the data structure.
Definition: bit_vector.hpp:297
bv::leaf::bits_size
uint64_t bits_size() const
Size of the leaf and associated data in bits.
Definition: leaf.hpp:617
bv::node::validate
uint64_t validate() const
Check that the subtree structure is internally consistent.
Definition: node.hpp:563
bv::node
Internal node for use with bit vector b-tree structures.
Definition: node.hpp:65
bv::node::at
bool at(dtype index) const
Access the value of the indexth element of the logical structure.
Definition: node.hpp:150
bv::leaf::commit
void commit()
Commit and clear the Insert/Remove buffer for the leaf.
Definition: leaf.hpp:955
bv::bit_vector::select
uint64_t select(uint64_t count) const
Index of the countth 1-bit in the data structure.
Definition: bit_vector.hpp:333
bv::leaf::rank
uint32_t rank(const uint64_t n) const
Number of 1-bits up to position n.
Definition: leaf.hpp:323
bv::bit_vector::n_root_
node * n_root_
Root of the b-tree if a multi-level structure is required.
Definition: bit_vector.hpp:44
bv::node::bits_size
uint64_t bits_size() const
Size of the subtree in "allocated" bits.
Definition: node.hpp:511
bv::bit_vector::insert
void insert(uint64_t index, bool value)
Insert "value" into position "index".
Definition: bit_vector.hpp:174
bv::bit_vector::validate
void validate() const
Asserts that the data structure is internally consistent.
Definition: bit_vector.hpp:394
bv::node::transfer_prepend
void transfer_prepend(node *other, uint8_t elems)
Moves the last "elems" elements from "other" to the start of "this".
Definition: node.hpp:466
bv::leaf::select
uint32_t select(const uint32_t x) const
Index of the xth 1-bit in the data structure.
Definition: leaf.hpp:444
bv::node::child_count
uint8_t child_count() const
Get the number of children of this node.
Definition: node.hpp:280
bv::bit_vector::print
void print(bool internal_only) const
Output data structure to standard out as json.
Definition: bit_vector.hpp:416