12 #include "libpopcnt.h"
46 template <u
int8_t buffer_size,
bool avx = true>
53 #pragma GCC diagnostic ignored "-Wpedantic"
55 #pragma GCC diagnostic pop
58 static const constexpr uint64_t
MASK = 1;
64 static const constexpr uint32_t
INDEX_MASK = ((uint32_t(1) << 8) - 1);
71 static_assert(buffer_size < 64);
101 bool at(
const uint32_t i)
const {
102 if constexpr (buffer_size != 0) {
116 return MASK & (
data_[index / WORD_BITS] >> (index % WORD_BITS));
118 return MASK & (
data_[i / WORD_BITS] >> (i % WORD_BITS));
140 void insert(
const uint64_t i,
const bool x) {
143 std::cerr <<
"Overflow. Reallocate before adding elements"
145 std::cerr <<
"Attempted to insert to " << i << std::endl;
155 if constexpr (buffer_size != 0) {
180 auto target_word = i / WORD_BITS;
181 auto target_offset = i % WORD_BITS;
182 for (
size_t j =
capacity_ - 1; j > target_word; j--) {
187 (
data_[target_word] & ((
MASK << target_offset) - 1)) |
188 ((
data_[target_word] & ~((
MASK << target_offset) - 1)) << 1);
189 data_[target_word] |= x ? (
MASK << target_offset) : uint64_t(0);
208 if constexpr (buffer_size != 0) {
209 bool x = this->
at(i);
241 uint64_t target_word = i / WORD_BITS;
242 uint64_t target_offset = i % WORD_BITS;
243 bool x =
MASK & (
data_[target_word] >> target_offset);
246 (
data_[target_word] & ((
MASK << target_offset) - 1)) |
247 ((
data_[target_word] >> 1) & (~((
MASK << target_offset) - 1)));
249 ? (
data_[target_word + 1] << 63)
251 for (
size_t j = target_word + 1; j < uint32_t(
capacity_ - 1); j++) {
256 (uint64_t(
capacity_ - 1) > target_word) ? 1 : 0;
274 int set(
const uint64_t i,
const bool x) {
276 if constexpr (buffer_size != 0) {
287 int change = x ? 1 : -1;
290 [[likely]]
return change;
300 uint64_t word_nr = idx / WORD_BITS;
301 uint64_t pos = idx % WORD_BITS;
303 if ((
data_[word_nr] & (
MASK << pos)) != (uint64_t(x) << pos)) {
304 int change = x ? 1 : -1;
307 [[likely]]
return change;
323 uint32_t
rank(
const uint64_t n)
const {
327 if constexpr (buffer_size != 0) {
343 uint64_t target_word = idx / WORD_BITS;
344 uint64_t target_offset = idx % WORD_BITS;
347 count += pop::popcnt(
data_, target_word * 8);
350 for (
size_t i = 0; i < target_word; i++) {
351 count += __builtin_popcountll(
data_[i]);
354 if (target_offset != 0) {
355 count += __builtin_popcountll(
data_[target_word] &
356 ((
MASK << target_offset) - 1));
373 uint32_t
rank(
const uint64_t n,
const uint64_t offset)
const {
376 uint64_t o_idx = offset;
377 if constexpr (buffer_size != 0) {
402 uint64_t target_word = idx / WORD_BITS;
403 uint64_t offset_word = o_idx / WORD_BITS;
404 uint64_t target_offset = idx % WORD_BITS;
405 uint64_t offset_offset = o_idx % WORD_BITS;
406 if (target_word == offset_word) {
407 count += __builtin_popcountll(
data_[offset_word] &
408 ~((
MASK << offset_offset) - 1) &
409 ((
MASK << target_offset) - 1));
412 if (offset_offset != 0) {
413 count += __builtin_popcountll(
data_[offset_word] &
414 ~((
MASK << offset_offset) - 1));
418 if (target_word - offset_word > 0) {
419 count += pop::popcnt(
data_ + offset_word,
420 (target_word - offset_word) * 8);
423 for (
size_t i = offset_word; i < target_word; i++) {
424 count += __builtin_popcountll(
data_[i]);
427 if (target_offset != 0) {
428 count += __builtin_popcountll(
data_[target_word] &
429 ((
MASK << target_offset) - 1));
444 uint32_t
select(
const uint32_t x)
const {
447 uint8_t current_buffer = 0;
448 int8_t a_pos_offset = 0;
451 for (uint64_t j = 0; j <
capacity_; j++) {
452 pop += __builtin_popcountll(
data_[j]);
454 if constexpr (buffer_size != 0) {
464 (
data_[(b_index + a_pos_offset) / WORD_BITS] >>
465 ((b_index + a_pos_offset) % WORD_BITS)) &
470 [[unlikely]] current_buffer++;
474 [[unlikely]] (void(0));
477 if (pop >= x) [[unlikely]]
487 while (pop >= x && pos <
capacity_ * 64) {
505 uint32_t
select(
const uint32_t x, uint32_t pos, uint32_t pop)
const {
508 uint8_t current_buffer = 0;
509 int8_t a_pos_offset = 0;
511 if constexpr (buffer_size != 0) {
520 [[unlikely]] current_buffer++;
528 uint64_t pop_idx = (pos + a_pos_offset) / WORD_BITS;
529 uint64_t offset = (pos + a_pos_offset) % WORD_BITS;
532 std::cerr <<
"Invalid select query apparently\n"
533 <<
"x = " << x <<
", pos = " << pos
535 <<
"\npop_idx = " << pop_idx
542 pop += __builtin_popcountll(
data_[pop_idx++] >> offset);
544 if constexpr (buffer_size != 0) {
554 (
data_[(b_index + a_pos_offset) / WORD_BITS] >>
555 ((b_index + a_pos_offset) % WORD_BITS)) &
560 [[unlikely]] current_buffer++;
564 [[unlikely]] (void(0));
570 for (uint64_t j = pop_idx; j <
capacity_; j++) {
571 pop += __builtin_popcountll(
data_[j]);
573 if constexpr (buffer_size != 0) {
583 (
data_[(b_index + a_pos_offset) / WORD_BITS] >>
584 ((b_index + a_pos_offset) % WORD_BITS)) &
589 [[unlikely]] current_buffer++;
593 [[unlikely]] (void(0));
596 if (pop >= x) [[unlikely]]
606 while (pop >= x && pos <
capacity_ * 64) {
618 return 8 * (
sizeof(*this) +
capacity_ *
sizeof(uint64_t));
686 uint64_t ones =
rank(elems);
687 uint64_t words = elems / WORD_BITS;
689 if (elems % WORD_BITS == 0) {
694 for (uint64_t i = 0; i <
capacity_ - words; i++) {
702 for (uint64_t i = 0; i < words; i++) {
705 uint64_t tail = elems % WORD_BITS;
706 uint64_t tail_mask = (
MASK << tail) - 1;
707 data_[words] &= ~tail_mask;
710 uint64_t words_to_shuffle =
capacity_ - words - 1;
711 for (uint64_t i = 0; i < words_to_shuffle; i++) {
713 data_[i] |= (
data_[words + i + 1]) << (WORD_BITS - tail);
719 [[unlikely]] (void(0));
742 uint64_t* o_data = other->
data();
743 uint64_t split_point =
size_ % WORD_BITS;
744 uint64_t target_word =
size_ / WORD_BITS;
745 uint64_t copy_words = elems / WORD_BITS;
746 uint64_t overhang = elems % WORD_BITS;
748 if (split_point == 0) {
750 for (
size_t i = 0; i < copy_words; i++) {
751 data_[target_word++] = o_data[i];
752 p_sum_ += __builtin_popcountll(o_data[i]);
757 o_data[copy_words] & ((
MASK << overhang) - 1);
758 [[unlikely]]
p_sum_ += __builtin_popcountll(
data_[target_word]);
762 for (
size_t i = 0; i < copy_words; i++) {
763 data_[target_word++] |= o_data[i] << split_point;
764 data_[target_word] |= o_data[i] >> (WORD_BITS - split_point);
765 p_sum_ += __builtin_popcountll(o_data[i]);
767 if (elems % WORD_BITS != 0) {
770 o_data[copy_words] & ((
MASK << overhang) - 1);
771 p_sum_ += __builtin_popcountll(to_write);
772 data_[target_word++] |= to_write << split_point;
774 data_[target_word] |= to_write >> (WORD_BITS - split_point);
776 [[likely]] ((void)0);
796 uint64_t offset =
size_ % 64;
797 uint64_t words =
size_ / 64;
801 [[likely]]
data_[words++] &= (
MASK << offset) - 1;
803 for (uint64_t i = words; i <
capacity_; i++) {
824 uint64_t* o_data = other->
data();
825 uint64_t words = elems / 64;
827 for (uint64_t i =
capacity_ - 1; i >= words; i--) {
831 for (uint64_t i = 0; i < words; i++) {
834 uint64_t overflow = elems % 64;
836 for (uint64_t i =
capacity_ - 1; i > words; i--) {
837 data_[i] <<= overflow;
840 data_[words] <<= overflow;
841 [[likely]] (void(0));
844 uint64_t source_word = other->
size();
845 uint64_t source_offset = source_word % 64;
851 if (source_offset == 0) {
855 for (uint64_t i = words - 1; i < words; i--) {
856 data_[i] = o_data[--source_word];
862 for (uint64_t i = words; i > 0; i--) {
863 p_sum_ += __builtin_popcountll(o_data[source_word]);
864 data_[i] |= o_data[source_word] >> (64 - overflow);
865 data_[i - 1] |= o_data[source_word--] << overflow;
867 uint64_t w = o_data[source_word] >> (64 - overflow);
868 p_sum_ += __builtin_popcountll(w);
869 [[likely]]
data_[0] |= w;
871 [[unlikely]] (void(0));
876 for (uint64_t i = words - 1; i < words; i--) {
877 data_[i] = o_data[source_word] << (64 - source_offset);
878 data_[i] |= o_data[--source_word] >> source_offset;
884 for (uint64_t i = words; i > 0; i--) {
885 w = o_data[source_word] << (64 - source_offset);
886 w |= o_data[--source_word] >> source_offset;
887 p_sum_ += __builtin_popcountll(w);
888 data_[i] |= w >> (64 - overflow);
889 data_[i - 1] |= w << overflow;
891 w = o_data[source_word] << (64 - source_offset);
892 w |= o_data[--source_word] >> source_offset;
894 p_sum_ += __builtin_popcountll(w);
895 [[likely]]
data_[0] |= w;
920 uint64_t* o_data = other->
data();
921 uint64_t offset =
size_ % 64;
922 uint64_t word =
size_ / 64;
923 uint64_t o_size = other->
size();
924 uint64_t o_p_sum = other->
p_sum();
925 uint64_t o_words = o_size / 64;
926 o_words += o_size % 64 == 0 ? 0 : 1;
931 for (uint64_t i = 0; i < o_words; i++) {
932 data_[word++] = o_data[i];
934 [[unlikely]] (void(0));
937 for (uint64_t i = 0; i < o_words; i++) {
938 data_[word++] |= o_data[i] << offset;
939 data_[word] |= o_data[i] >> (64 - offset);
958 if constexpr (buffer_size == 0)
return;
962 uint64_t overflow = 0;
963 uint8_t overflow_length = 0;
964 uint8_t underflow_length = 0;
965 uint8_t current_index = 0;
966 uint32_t buf =
buffer_[current_index];
970 uint64_t words =
size_ / WORD_BITS;
971 words +=
size_ % WORD_BITS > 0 ? 1 : 0;
972 for (uint64_t current_word = 0; current_word < words; current_word++) {
975 if (overflow_length) {
976 [[likely]] underflow =
977 (underflow << overflow_length) |
978 (
data_[current_word] >> (64 - overflow_length));
981 uint64_t new_overflow = 0;
983 if (current_word == target_word && current_index <
buffer_count_) {
986 ? (
data_[current_word] >> underflow_length) |
987 (underflow << (64 - underflow_length))
988 : (
data_[current_word] << overflow_length) | overflow;
989 underflow >>= underflow_length;
990 uint64_t new_word = 0;
991 uint8_t start_offset = 0;
993 while (current_word == target_word) {
995 (word << start_offset) & ((
MASK << target_offset) - 1);
996 word = (word >> (target_offset - start_offset)) |
997 (target_offset == 0 ? 0
998 : target_offset - start_offset == 0
1001 << (64 - (target_offset - start_offset))));
1002 underflow >>= target_offset - start_offset;
1005 new_word |=
MASK << target_offset;
1007 start_offset = target_offset + 1;
1008 if (underflow_length) [[unlikely]]
1014 word |= underflow << 63;
1016 if (overflow_length)
1020 start_offset = target_offset;
1027 [[unlikely]] target_offset =
buffer_index(buf) % WORD_BITS;
1030 start_offset < 64 ? (word << start_offset) : uint64_t(0);
1031 new_overflow = overflow_length ?
data_[current_word] >>
1032 (64 - overflow_length)
1034 [[unlikely]]
data_[current_word] = new_word;
1036 if (underflow_length) {
1037 data_[current_word] =
1038 (
data_[current_word] >> underflow_length) |
1039 (underflow << (64 - underflow_length));
1040 }
else if (overflow_length) {
1042 data_[current_word] >> (64 - overflow_length);
1043 [[likely]]
data_[current_word] =
1044 (
data_[current_word] << overflow_length) | overflow;
1049 overflow = new_overflow;
1073 uint64_t last_word =
size_ / WORD_BITS;
1074 uint64_t overflow =
size_ % WORD_BITS;
1075 if (overflow != 0) {
1076 for (uint64_t i = overflow; i < WORD_BITS; i++) {
1077 uint64_t val = (
MASK << i) &
data_[last_word];
1082 for (uint64_t i = last_word; i <
capacity_; i++) {
1083 assert(
data_[i] == 0);
1097 std::cout <<
"{\n\"type\": \"leaf\",\n"
1098 <<
"\"size\": " <<
size_ <<
",\n"
1099 <<
"\"capacity\": " <<
capacity_ <<
",\n"
1100 <<
"\"p_sum\": " <<
p_sum_ <<
",\n"
1101 <<
"\"buffer_size\": " << int(buffer_size) <<
",\n"
1102 <<
"\"buffer\": [\n";
1104 #pragma GCC diagnostic ignored "-Warray-bounds"
1105 std::cout <<
"{\"is_insertion\": "
1111 #pragma GCC diagnostic pop
1116 if (!internal_only) {
1117 std::cout <<
"]\n\"data\": [\n";
1118 for (uint64_t i = 0; i <
capacity_; i++) {
1119 std::bitset<64> b(
data_[i]);
1120 std::cout <<
"\"" << b <<
"\"";
1198 return ((idx << 8) | (t ?
TYPE_MASK : uint32_t(0))) |
1245 auto pb_size =
size_;
1246 if constexpr (buffer_size != 0) {
1259 [[unlikely]]
data_[
size_ / WORD_BITS] |= uint64_t(x)
1260 << (
size_ % WORD_BITS);
1262 data_[pb_size / WORD_BITS] |= uint64_t(x) << (pb_size % WORD_BITS);