EWAHBoolArray 0.8.0
A compressed bitmap class in C++
Loading...
Searching...
No Matches
ewah-inl.h
1// See LICENSE file for license information.
2#ifndef EWAH_INL_H
3#define EWAH_INL_H
4
5#include "ewah.h"
6
7namespace ewah {
8
15template <class uword>
16void fast_logicalor_tocontainer(size_t n, const EWAHBoolArray<uword> **inputs,
17 EWAHBoolArray<uword> &container);
18
25template <class uword>
26EWAHBoolArray<uword> fast_logicalor(size_t n,
27 const EWAHBoolArray<uword> **inputs) {
28 EWAHBoolArray<uword> answer;
29 fast_logicalor_tocontainer(n, inputs, answer);
30 return answer;
31}
32
36template <class uword> class EWAHBoolArrayIterator {
37public:
41 bool hasNext() const { return pointer < myparent.size(); }
42
46 uword next() {
47 uword returnvalue;
48 if (compressedwords < rl) {
49 ++compressedwords;
50 if (b)
51 returnvalue = notzero;
52 else
53 returnvalue = zero;
54 } else {
55 ++literalwords;
56 ++pointer;
57 returnvalue = myparent[pointer];
58 }
59 if ((compressedwords == rl) && (literalwords == lw)) {
60 ++pointer;
61 if (pointer < myparent.size())
62 readNewRunningLengthWord();
63 }
64 return returnvalue;
65 }
66
68 : pointer(other.pointer), myparent(other.myparent),
69 compressedwords(other.compressedwords),
70 literalwords(other.literalwords), rl(other.rl), lw(other.lw),
71 b(other.b) {}
72
73 static const uword zero = 0;
74 static const uword notzero = static_cast<uword>(~zero);
75
76private:
77 EWAHBoolArrayIterator(const std::vector<uword> &parent);
78 void readNewRunningLengthWord();
79 friend class EWAHBoolArray<uword>;
80 size_t pointer;
81 const std::vector<uword> &myparent;
82 uword compressedwords;
83 uword literalwords;
84 uword rl, lw;
85 bool b;
86};
87
91template <class uword> class EWAHBoolArraySetBitForwardIterator {
92public:
93 typedef std::forward_iterator_tag iterator_category;
94 typedef size_t *pointer;
95 typedef size_t &reference_type;
96 typedef size_t value_type;
97 typedef ptrdiff_t difference_type;
98 typedef EWAHBoolArraySetBitForwardIterator<uword> type_of_iterator;
102 inline size_t operator*() const { return answer; }
103
104 bool operator<(const type_of_iterator &o) const {
105 if (!o.hasValue)
106 return true;
107 if (!hasValue)
108 return false;
109 return answer < o.answer;
110 }
111
112 bool operator<=(const type_of_iterator &o) const {
113 if (!o.hasValue)
114 return true;
115 if (!hasValue)
116 return false;
117 return answer <= o.answer;
118 }
119
120 bool operator>(const type_of_iterator &o) const { return !((*this) <= o); }
121
122 bool operator>=(const type_of_iterator &o) const { return !((*this) < o); }
123
124 EWAHBoolArraySetBitForwardIterator &operator++() { //++i
125 if (hasNext)
126 next();
127 else
128 hasValue = false;
129 return *this;
130 }
131
132 EWAHBoolArraySetBitForwardIterator operator++(int) { // i++
133 EWAHBoolArraySetBitForwardIterator old(*this);
134 if (hasNext)
135 next();
136 else
137 hasValue = false;
138 return old;
139 }
140
141 bool operator==(const EWAHBoolArraySetBitForwardIterator<uword> &o) const {
142 if ((!hasValue) && (!o.hasValue))
143 return true;
144 return (hasValue == o.hasValue) && (answer == o.answer);
145 }
146
147 bool operator!=(const EWAHBoolArraySetBitForwardIterator<uword> &o) const {
148 return !(*this == o);
149 }
150
151 static EWAHBoolArraySetBitForwardIterator<uword> &end() {
152 static EWAHBoolArraySetBitForwardIterator<uword> e;
153 return e;
154 }
155
156 EWAHBoolArraySetBitForwardIterator(const std::vector<uword> *parent,
157 size_t startpointer = 0)
158 : word(0), position(0), runningLength(0), literalPosition(0),
159 wordPosition(startpointer), wordLength(0), buffer(parent),
160 hasNext(false), hasValue(false), answer(0) {
161 if (wordPosition < buffer->size()) {
162 setRunningLengthWord();
163 hasNext = moveToNext();
164 if (hasNext) {
165 next();
166 hasValue = true;
167 }
168 }
169 }
170
171 EWAHBoolArraySetBitForwardIterator()
172 : word(0), position(0), runningLength(0), literalPosition(0),
173 wordPosition(0), wordLength(0), buffer(NULL), hasNext(false),
174 hasValue(false), answer(0) {}
175
176 inline bool runningHasNext() const { return position < runningLength; }
177
178 inline bool literalHasNext() {
179 while (word == 0 && wordPosition < wordLength) {
180 word = (*buffer)[wordPosition++];
181 literalPosition = position;
182 position += WORD_IN_BITS;
183 }
184 return word != 0;
185 }
186
187 inline void setRunningLengthWord() {
188 uword rlw = (*buffer)[wordPosition];
189 runningLength =
190 (size_t)WORD_IN_BITS * RunningLengthWord<uword>::getRunningLength(rlw) +
191 position;
193 position = runningLength;
194 }
195 wordPosition++; // point to first literal word
196 wordLength = static_cast<uword>(
198 }
199
200 inline bool moveToNext() {
201 while (!runningHasNext() && !literalHasNext()) {
202 if (wordPosition >= buffer->size()) {
203 return false;
204 }
205 setRunningLengthWord();
206 }
207 return true;
208 }
209
210 void next() { // update answer
211 if (runningHasNext()) {
212 answer = position++;
213 if (runningHasNext())
214 return;
215 } else {
216 uword t = static_cast<uword>(word & (~word + 1));
217 answer = literalPosition + countOnes((uword)(t - 1));
218 word ^= t;
219 }
220 hasNext = moveToNext();
221 }
222
223 enum { WORD_IN_BITS = sizeof(uword) * 8 };
224 uword word; // lit word
225 size_t position;
226 size_t runningLength;
227 size_t literalPosition;
228 size_t wordPosition; // points to word in buffer
229 uword wordLength;
230 const std::vector<uword> *buffer;
231 bool hasNext;
232 bool hasValue;
233 size_t answer;
234};
235
241public:
243 : totalliteral(0), totalcompressed(0), runningwordmarker(0),
244 maximumofrunningcounterreached(0) {}
245 size_t getCompressedSize() const { return totalliteral + runningwordmarker; }
246 size_t getUncompressedSize() const { return totalliteral + totalcompressed; }
247 size_t getNumberOfDirtyWords() const { return totalliteral; }
248 size_t getNumberOfCleanWords() const { return totalcompressed; }
249 size_t getNumberOfMarkers() const { return runningwordmarker; }
250 size_t getOverRuns() const { return maximumofrunningcounterreached; }
251 size_t totalliteral;
252 size_t totalcompressed;
253 size_t runningwordmarker;
254 size_t maximumofrunningcounterreached;
255};
256
257template <class uword> bool EWAHBoolArray<uword>::set(size_t i) {
258 if (i < sizeinbits)
259 return false;
260 const size_t dist = (i + wordinbits) / wordinbits -
261 (sizeinbits + wordinbits - 1) / wordinbits;
262 sizeinbits = i + 1;
263 if (dist > 0) { // easy
264 if (dist > 1) {
265 fastaddStreamOfEmptyWords(false, dist - 1);
266 }
267 addLiteralWord(
268 static_cast<uword>(static_cast<uword>(1) << (i % wordinbits)));
269 return true;
270 }
271 RunningLengthWord<uword> lastRunningLengthWord(buffer[lastRLW]);
272 if (lastRunningLengthWord.getNumberOfLiteralWords() == 0) {
273 lastRunningLengthWord.setRunningLength(
274 static_cast<uword>(lastRunningLengthWord.getRunningLength() - 1));
275 addLiteralWord(
276 static_cast<uword>(static_cast<uword>(1) << (i % wordinbits)));
277 return true;
278 }
279 buffer[buffer.size() - 1] |=
280 static_cast<uword>(static_cast<uword>(1) << (i % wordinbits));
281 // check if we just completed a stream of 1s
282 if (buffer[buffer.size() - 1] == static_cast<uword>(~0)) {
283 // we remove the last dirty word
284 buffer[buffer.size() - 1] = 0;
285 buffer.resize(buffer.size() - 1);
286 lastRunningLengthWord.setNumberOfLiteralWords(static_cast<uword>(
287 lastRunningLengthWord.getNumberOfLiteralWords() - 1));
288 // next we add one clean word
289 addEmptyWord(true);
290 }
291 return true;
292}
293
294template <class uword> void EWAHBoolArray<uword>::inplace_logicalnot() {
295 size_t pointer(0), lastrlw(0);
296 while (pointer < buffer.size()) {
297 RunningLengthWord<uword> rlw(buffer[pointer]);
298 lastrlw = pointer; // we save this up
299 if (rlw.getRunningBit())
300 rlw.setRunningBit(false);
301 else
302 rlw.setRunningBit(true);
303 ++pointer;
304 for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) {
305 buffer[pointer] = static_cast<uword>(~buffer[pointer]);
306 ++pointer;
307 }
308 }
309 if (sizeinbits % wordinbits != 0) {
310 RunningLengthWord<uword> rlw(buffer[lastrlw]);
311 const uword maskbogus = static_cast<uword>(
312 (static_cast<uword>(1) << (sizeinbits % wordinbits)) - 1);
313 if (rlw.getNumberOfLiteralWords() > 0) { // easy case
314 buffer[lastrlw + 1 + rlw.getNumberOfLiteralWords() - 1] &= maskbogus;
315 } else {
316 rlw.setRunningLength(rlw.getRunningLength() - 1);
317 addLiteralWord(maskbogus);
318 }
319 }
320}
321
322template <class uword> size_t EWAHBoolArray<uword>::numberOfWords() const {
323 size_t tot(0);
324 size_t pointer(0);
325 while (pointer < buffer.size()) {
326 ConstRunningLengthWord<uword> rlw(buffer[pointer]);
327 tot += rlw.size();
328 pointer += 1 + rlw.getNumberOfLiteralWords();
329 }
330 return tot;
331}
332
333template <class uword>
334void EWAHBoolArray<uword>::assertWordCount(std::string message) const {
335#ifdef EWAHASSERT
336 size_t tot = numberOfWords();
337 size_t expected = (sizeinbits + wordinbits - 1) / wordinbits;
338 if (expected != tot) {
339 std::cerr << "[assertWordCount] wordinbits " << wordinbits << std::endl;
340 std::cerr << "[assertWordCount] sizeinbits " << sizeinbits << std::endl;
341 std::cerr << "[assertWordCount] " << message << std::endl;
342 std::cerr << "[assertWordCount] number of words " << tot << std::endl;
343 std::cerr << "[assertWordCount] expected number of words " << expected
344 << std::endl;
345 debugprintout();
346 throw std::runtime_error("bug");
347 }
348#endif
349}
350
351template <class uword> void EWAHBoolArray<uword>::correctWordCount() {
352 size_t tot = numberOfWords();
353 size_t expected = (sizeinbits + wordinbits - 1) / wordinbits;
354 if (expected != tot) {
355 if (tot < expected) {
356 fastaddStreamOfEmptyWords(false, expected - tot);
357 } else {
358 RunningLengthWord<uword> lastRunningLengthWord(buffer[lastRLW]);
359 lastRunningLengthWord.setRunningLength(static_cast<uword>(
360 lastRunningLengthWord.getRunningLength() + expected - tot));
361 }
362 }
363}
364
365template <class uword> size_t EWAHBoolArray<uword>::numberOfOnes() const {
366 size_t tot(0);
367 size_t pointer(0);
368 while (pointer < buffer.size()) {
369 ConstRunningLengthWord<uword> rlw(buffer[pointer]);
370 if (rlw.getRunningBit()) {
371 tot += static_cast<size_t>(rlw.getRunningLength() * wordinbits);
372 }
373 ++pointer;
374 for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) {
375 tot += countOnes((uword)buffer[pointer]);
376 ++pointer;
377 }
378 }
379 return tot;
380}
381
382template <class uword>
383std::vector<size_t> EWAHBoolArray<uword>::toArray() const {
384 std::vector<size_t> ans;
385 size_t pos(0);
386 size_t pointer(0);
387 const size_t buffersize = buffer.size();
388 while (pointer < buffersize) {
389 ConstRunningLengthWord<uword> rlw(buffer[pointer]);
390 const size_t productofrl =
391 static_cast<size_t>(rlw.getRunningLength() * wordinbits);
392 if (rlw.getRunningBit()) {
393 size_t upper_limit = pos + productofrl;
394 for (; pos < upper_limit; ++pos) {
395 ans.push_back(pos);
396 }
397 } else {
398 pos += productofrl;
399 }
400 ++pointer;
401 const size_t rlwlw = rlw.getNumberOfLiteralWords();
402 for (size_t k = 0; k < rlwlw; ++k) {
403 uword myword = buffer[pointer];
404 while (myword != 0) {
405 uint64_t t = myword & (~myword + 1);
406 uint32_t r = numberOfTrailingZeros(t);
407 ans.push_back(pos + r);
408 myword ^= t;
409 }
410 pos += wordinbits;
411 ++pointer;
412 }
413 }
414 return ans;
415}
416
417template <class uword>
419 x.reset();
420 x.buffer.reserve(buffer.size());
421 EWAHBoolArrayRawIterator<uword> i = this->raw_iterator();
422 if (!i.hasNext())
423 return; // nothing to do
424 while (true) {
425 BufferedRunningLengthWord<uword> &rlw = i.next();
426 if (i.hasNext()) {
427 if (rlw.getRunningLength() > 0)
429 rlw.getRunningLength());
430 if (rlw.getNumberOfLiteralWords() > 0) {
431 const uword *dw = i.dirtyWords();
432 for (size_t k = 0; k < rlw.getNumberOfLiteralWords(); ++k) {
433 x.addLiteralWord(~dw[k]);
434 }
435 }
436 } else {
437 if (rlw.getNumberOfLiteralWords() == 0) {
438 if ((this->sizeinbits % wordinbits != 0) && !rlw.getRunningBit()) {
439 if (rlw.getRunningLength() > 1)
441 rlw.getRunningLength() - 1);
442 const uword maskbogus = static_cast<uword>(
443 (static_cast<uword>(1) << (this->sizeinbits % wordinbits)) - 1);
444 x.addLiteralWord(maskbogus);
445 break;
446 } else {
447 if (rlw.getRunningLength() > 0)
449 rlw.getRunningLength());
450 break;
451 }
452 }
453 if (rlw.getRunningLength() > 0)
455 rlw.getRunningLength());
456 const uword *dw = i.dirtyWords();
457 for (size_t k = 0; k + 1 < rlw.getNumberOfLiteralWords(); ++k) {
458 x.addLiteralWord(~dw[k]);
459 }
460 const uword maskbogus =
461 (this->sizeinbits % wordinbits != 0)
462 ? static_cast<uword>(
463 (static_cast<uword>(1) << (this->sizeinbits % wordinbits)) -
464 1)
465 : ~static_cast<uword>(0);
466 x.addLiteralWord(static_cast<uword>(
467 (~dw[rlw.getNumberOfLiteralWords() - 1]) & maskbogus));
468 break;
469 }
470 }
471 x.sizeinbits = this->sizeinbits;
472}
473
474template <class uword>
475size_t EWAHBoolArray<uword>::addWord(const uword newdata,
476 const uint32_t bitsthatmatter) {
477 sizeinbits += bitsthatmatter;
478 if (newdata == 0) {
479 return addEmptyWord(0);
480 } else if (newdata == static_cast<uword>(~0)) {
481 return addEmptyWord(1);
482 } else {
483 return addLiteralWord(newdata);
484 }
485}
486
487template <class uword>
488inline void EWAHBoolArray<uword>::writeBuffer(std::ostream &out) const {
489 if (!buffer.empty())
490 out.write(reinterpret_cast<const char *>(&buffer[0]),
491 sizeof(uword) * buffer.size());
492}
493
494template <class uword>
495inline void EWAHBoolArray<uword>::readBuffer(std::istream &in,
496 const size_t buffersize) {
497 buffer.resize(buffersize);
498 if (buffersize > 0)
499 in.read(reinterpret_cast<char *>(&buffer[0]), sizeof(uword) * buffersize);
500}
501
502template <class uword>
503size_t EWAHBoolArray<uword>::write(std::ostream &out,
504 const bool savesizeinbits) const {
505 size_t written = 0;
506 if (savesizeinbits) {
507 uint64_t sb = static_cast<uint64_t>(sizeinbits);
508 out.write(reinterpret_cast<const char *>(&sb), sizeof(sb));
509 written += sizeof(uint64_t);
510 }
511 const size_t buffersize = buffer.size();
512 uint64_t bs = static_cast<uint64_t>(buffersize);
513 out.write(reinterpret_cast<const char *>(&bs), sizeof(bs));
514 written += sizeof(uint64_t);
515
516 if (buffersize > 0) {
517 out.write(reinterpret_cast<const char *>(&buffer[0]),
518 static_cast<std::streamsize>(sizeof(uword) * buffersize));
519 written += sizeof(uword) * buffersize;
520 }
521 return written;
522}
523
524template <class uword>
525size_t EWAHBoolArray<uword>::write(char *out, size_t capacity,
526 const bool savesizeinbits) const {
527 size_t written = 0;
528 if (savesizeinbits) {
529 uint64_t sb = static_cast<uint64_t>(sizeinbits);
530 if (capacity < sizeof(sb))
531 return 0;
532 capacity -= sizeof(sb);
533 memcpy(out, &sb, sizeof(sb));
534 out += sizeof(sb);
535 written += sizeof(uint64_t);
536 }
537 const size_t buffersize = buffer.size();
538 uint64_t bs = static_cast<uint64_t>(buffersize);
539 if (capacity < sizeof(bs))
540 return 0;
541 capacity -= sizeof(bs);
542 memcpy(out, &buffersize, sizeof(bs));
543 out += sizeof(bs);
544 written += sizeof(uint64_t);
545
546 if (buffersize > 0) {
547 if (capacity < sizeof(uword) * buffersize)
548 return 0;
549 memcpy(out, &buffer[0], sizeof(uword) * buffersize);
550 written += sizeof(uword) * buffersize;
551 }
552 return written;
553}
554
555template <class uword>
556size_t EWAHBoolArray<uword>::read(std::istream &in, const bool savesizeinbits) {
557 size_t read = 0;
558 if (savesizeinbits) {
559 uint64_t tmp;
560 in.read(reinterpret_cast<char *>(&tmp), sizeof(tmp));
561 read += sizeof(tmp);
562 sizeinbits = static_cast<size_t>(tmp);
563 } else {
564 sizeinbits = 0;
565 }
566 size_t buffersize(0);
567 uint64_t tmp;
568 in.read(reinterpret_cast<char *>(&tmp), sizeof(tmp));
569 read += sizeof(tmp);
570 buffersize = static_cast<size_t>(tmp);
571 buffer.resize(buffersize);
572 if (buffersize > 0) {
573 in.read(reinterpret_cast<char *>(&buffer[0]),
574 static_cast<std::streamsize>(sizeof(uword) * buffersize));
575 read += sizeof(uword) * buffersize;
576 }
577 return read;
578}
579
580template <class uword>
581size_t EWAHBoolArray<uword>::read(const char *in, size_t capacity,
582 const bool savesizeinbits) {
583 size_t read = 0;
584 if (savesizeinbits) {
585 uint64_t tmp;
586 if (capacity < sizeof(tmp))
587 return 0;
588 capacity -= sizeof(tmp);
589 memcpy(reinterpret_cast<char *>(&tmp), in, sizeof(tmp));
590 read += sizeof(tmp);
591 in += sizeof(tmp);
592 sizeinbits = static_cast<size_t>(tmp);
593 } else {
594 sizeinbits = 0;
595 }
596 size_t buffersize(0);
597 uint64_t tmp;
598 if (capacity < sizeof(uint64_t))
599 return 0;
600 capacity -= sizeof(uint64_t);
601 memcpy(reinterpret_cast<char *>(&tmp), in, sizeof(uint64_t));
602 in += sizeof(uint64_t);
603 read += sizeof(uint64_t);
604 buffersize = static_cast<size_t>(tmp);
605 buffer.resize(buffersize);
606 if (buffersize > 0) {
607 if (capacity < sizeof(uword) * buffersize)
608 return 0;
609 memcpy(&buffer[0], in, sizeof(uword) * buffersize);
610 read += sizeof(uword) * buffersize;
611 }
612 return read;
613}
614
615template <class uword>
616size_t EWAHBoolArray<uword>::addLiteralWord(const uword newdata) {
617 RunningLengthWord<uword> lastRunningLengthWord(buffer[lastRLW]);
618 uword numbersofar = lastRunningLengthWord.getNumberOfLiteralWords();
619 if (numbersofar >=
621 buffer.push_back(0);
622 lastRLW = buffer.size() - 1;
623 RunningLengthWord<uword> lastRunningLengthWord2(buffer[lastRLW]);
624 lastRunningLengthWord2.setNumberOfLiteralWords(1);
625 buffer.push_back(newdata);
626 return 2;
627 }
628 lastRunningLengthWord.setNumberOfLiteralWords(
629 static_cast<uword>(numbersofar + 1));
630 buffer.push_back(newdata);
631 return 1;
632}
633
634template <class uword>
635size_t EWAHBoolArray<uword>::padWithZeroes(const size_t totalbits) {
636 size_t wordsadded = 0;
637 if (totalbits <= sizeinbits)
638 return wordsadded;
639
640 size_t missingbits = totalbits - sizeinbits;
641
642 RunningLengthWord<uword> rlw(buffer[lastRLW]);
643 if (rlw.getNumberOfLiteralWords() > 0) {
644 // Consume trailing zeroes of trailing literal word (past sizeinbits)
645 size_t remain = sizeinbits % wordinbits;
646 if (remain > 0) // Is last word partial?
647 {
648 size_t avail = wordinbits - remain;
649 if (avail > 0) {
650 if (missingbits > avail) {
651 missingbits -= avail;
652 } else {
653 missingbits = 0;
654 }
655 sizeinbits += avail;
656 }
657 }
658 }
659
660 if (missingbits > 0) {
661 size_t wordstoadd = missingbits / wordinbits;
662 if ((missingbits % wordinbits) != 0)
663 ++wordstoadd;
664
665 wordsadded = addStreamOfEmptyWords(false, wordstoadd);
666 }
667 sizeinbits = totalbits;
668 return wordsadded;
669}
670
675template <class uword = uint32_t> class EWAHBoolArrayRawIterator {
676public:
678 : pointer(0), myparent(&p.getBuffer()), rlw((*myparent)[pointer], this) {}
679 EWAHBoolArrayRawIterator(const EWAHBoolArrayRawIterator &o)
680 : pointer(o.pointer), myparent(o.myparent), rlw(o.rlw) {}
681
682 bool hasNext() const { return pointer < myparent->size(); }
683
684 BufferedRunningLengthWord<uword> &next() {
685 rlw.read((*myparent)[pointer]);
686 pointer = static_cast<size_t>(pointer + rlw.getNumberOfLiteralWords() + 1);
687 return rlw;
688 }
689
690 const uword *dirtyWords() const {
691 return myparent->data() +
692 static_cast<size_t>(pointer - rlw.getNumberOfLiteralWords());
693 }
694
695 EWAHBoolArrayRawIterator &operator=(const EWAHBoolArrayRawIterator &other) {
696 pointer = other.pointer;
697 myparent = other.myparent;
698 rlw = other.rlw;
699 return *this;
700 }
701
702 size_t pointer;
703 const std::vector<uword> *myparent;
704 BufferedRunningLengthWord<uword> rlw;
705
706 EWAHBoolArrayRawIterator();
707};
708
709template <class uword>
713
714template <class uword>
718
719template <class uword>
722 EWAHBoolArrayRawIterator<uword> j = raw_iterator();
723 if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
724 return (i.hasNext() == false) && (j.hasNext() == false);
725 }
726 // at this point, this should be safe:
727 BufferedRunningLengthWord<uword> &rlwi = i.next();
728 BufferedRunningLengthWord<uword> &rlwj = j.next();
729
730 while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
731 while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
732 const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
733 BufferedRunningLengthWord<uword> &prey = i_is_prey ? rlwi : rlwj;
734 BufferedRunningLengthWord<uword> &predator = i_is_prey ? rlwj : rlwi;
735 size_t index = 0;
736 const bool nonzero =
737 ((!predator.getRunningBit())
738 ? prey.nonzero_discharge(predator.getRunningLength(), index)
739 : prey.nonzero_dischargeNegated(predator.getRunningLength(),
740 index));
741 if (nonzero) {
742 return false;
743 }
744 if (predator.getRunningLength() - index > 0) {
745 if (predator.getRunningBit()) {
746 return false;
747 }
748 }
749 predator.discardRunningWordsWithReload();
750 }
751 const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
753 if (nbre_literal > 0) {
754 for (size_t k = 0; k < nbre_literal; ++k)
755 if ((rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)) != 0)
756 return false;
757 rlwi.discardLiteralWordsWithReload(nbre_literal);
758 rlwj.discardLiteralWordsWithReload(nbre_literal);
759 }
760 }
761 const bool i_remains = rlwi.size() > 0;
762 BufferedRunningLengthWord<uword> &remaining = i_remains ? rlwi : rlwj;
763 return !remaining.nonzero_discharge();
764}
765
766template <class uword> void EWAHBoolArray<uword>::swap(EWAHBoolArray &x) {
767 buffer.swap(x.buffer);
768 size_t tmp = x.sizeinbits;
769 x.sizeinbits = sizeinbits;
770 sizeinbits = tmp;
771 tmp = x.lastRLW;
772 x.lastRLW = lastRLW;
773 lastRLW = tmp;
774}
775
776template <class uword>
778 if (sizeinbits % wordinbits == 0) {
779 // hoping for the best?
780 sizeinbits += x.sizeinbits;
781 ConstRunningLengthWord<uword> lRLW(buffer[lastRLW]);
782 if ((lRLW.getRunningLength() == 0) &&
783 (lRLW.getNumberOfLiteralWords() == 0)) {
784 // it could be that the running length word is empty, in such a case,
785 // we want to get rid of it!
786 lastRLW = x.lastRLW + buffer.size() - 1;
787 buffer.resize(buffer.size() - 1);
788 buffer.insert(buffer.end(), x.buffer.begin(), x.buffer.end());
789 } else {
790 lastRLW = x.lastRLW + buffer.size();
791 buffer.insert(buffer.end(), x.buffer.begin(), x.buffer.end());
792 }
793 } else {
794 std::stringstream ss;
795 ss << "This should really not happen! You are trying to append to a bitmap "
796 "having a fractional number of words, that is, "
797 << static_cast<int>(sizeinbits) << " bits with a word size in bits of "
798 << static_cast<int>(wordinbits) << ". ";
799 ss << "Size of the bitmap being appended: " << x.sizeinbits << " bits."
800 << std::endl;
801 throw std::invalid_argument(ss.str());
802 }
803}
804
805template <class uword>
807 const std::vector<uword> &parent)
808 : pointer(0), myparent(parent), compressedwords(0), literalwords(0), rl(0),
809 lw(0), b(0) {
810 if (pointer < myparent.size())
811 readNewRunningLengthWord();
812}
813
814template <class uword>
815void EWAHBoolArrayIterator<uword>::readNewRunningLengthWord() {
816 literalwords = 0;
817 compressedwords = 0;
818 ConstRunningLengthWord<uword> rlw(myparent[pointer]);
819 rl = rlw.getRunningLength();
820 lw = rlw.getNumberOfLiteralWords();
821 b = rlw.getRunningBit();
822 if ((rl == 0) && (lw == 0)) {
823 if (pointer < myparent.size() - 1) {
824 ++pointer;
825 readNewRunningLengthWord();
826 } else {
827 pointer = myparent.size();
828 }
829 }
830}
831
832template <class uword>
834 BoolArray<uword> ans(sizeinbits);
835 EWAHBoolArrayIterator<uword> i = uncompress();
836 size_t counter = 0;
837 while (i.hasNext()) {
838 ans.setWord(counter++, i.next());
839 }
840 return ans;
841}
842
843template <class uword>
844template <class container>
846 const size_t offset) const {
847 size_t pointer(0);
848 size_t currentoffset(offset);
849 if (RESERVEMEMORY)
850 out.reserve(buffer.size() + 64); // trading memory for speed.
851 const size_t buffersize = buffer.size();
852 while (pointer < buffersize) {
853 ConstRunningLengthWord<uword> rlw(buffer[pointer]);
854 const size_t productofrl =
855 static_cast<size_t>(rlw.getRunningLength() * wordinbits);
856 if (rlw.getRunningBit()) {
857 const size_t upper_limit = currentoffset + productofrl;
858 for (; currentoffset < upper_limit; ++currentoffset) {
859 out.push_back(currentoffset);
860 }
861 } else {
862 currentoffset += productofrl;
863 }
864 ++pointer;
865 const size_t rlwlw = rlw.getNumberOfLiteralWords();
866 for (uword k = 0; k < rlwlw; ++k) {
867 uword currentword = buffer[pointer];
868 while (currentword != 0) {
869 uword t = static_cast<uword>(currentword & (~currentword + 1));
870 uint32_t r = numberOfTrailingZeros(t);
871 out.push_back(currentoffset + r);
872 currentword ^= t;
873 }
874 currentoffset += wordinbits;
875 ++pointer;
876 }
877 }
878}
879
880template <class uword>
882 return !(*this == x);
883}
884
885template <class uword>
887 // could be more efficient
888 return (this->toBoolArray() == x);
889}
890
891template <class uword>
892bool EWAHBoolArray<uword>::operator!=(const BoolArray<uword> &x) const {
893 // could be more efficient
894 return (this->toBoolArray() != x);
895}
896
897template <class uword>
899 size_t number) {
900 if (number == 0)
901 return 0;
902 sizeinbits += number * wordinbits;
903 size_t wordsadded = 0;
904 if ((RunningLengthWord<uword>::getRunningBit(buffer[lastRLW]) != v) &&
905 (RunningLengthWord<uword>::size(buffer[lastRLW]) == 0)) {
906 RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
908 buffer[lastRLW]) != 0) ||
909 (RunningLengthWord<uword>::getRunningBit(buffer[lastRLW]) != v)) {
910 buffer.push_back(0);
911 ++wordsadded;
912 lastRLW = buffer.size() - 1;
913 if (v)
914 RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
915 }
916 const uword runlen =
918
919 const uword whatwecanadd =
920 number < static_cast<size_t>(
922 ? static_cast<uword>(number)
923 : static_cast<uword>(
926 buffer[lastRLW], static_cast<uword>(runlen + whatwecanadd));
927
928 number -= static_cast<size_t>(whatwecanadd);
930 buffer.push_back(0);
931 ++wordsadded;
932 lastRLW = buffer.size() - 1;
933 if (v)
934 RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
937 number -= static_cast<size_t>(
939 }
940 if (number > 0) {
941 buffer.push_back(0);
942 ++wordsadded;
943 lastRLW = buffer.size() - 1;
944 if (v)
945 RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
947 static_cast<uword>(number));
948 }
949 return wordsadded;
950}
951
952template <class uword>
954 size_t number) {
955 if (number == 0)
956 return;
957 if ((RunningLengthWord<uword>::getRunningBit(buffer[lastRLW]) != v) &&
958 (RunningLengthWord<uword>::size(buffer[lastRLW]) == 0)) {
959 RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
961 buffer[lastRLW]) != 0) ||
962 (RunningLengthWord<uword>::getRunningBit(buffer[lastRLW]) != v)) {
963 buffer.push_back(0);
964 lastRLW = buffer.size() - 1;
965 if (v)
966 RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
967 }
968 const uword runlen =
970
971 const uword whatwecanadd =
972 number < static_cast<size_t>(
974 ? static_cast<uword>(number)
975 : static_cast<uword>(
978 buffer[lastRLW], static_cast<uword>(runlen + whatwecanadd));
979
980 number -= static_cast<size_t>(whatwecanadd);
982 buffer.push_back(0);
983 lastRLW = buffer.size() - 1;
984 if (v)
985 RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
988 number -= static_cast<size_t>(
990 }
991 if (number > 0) {
992 buffer.push_back(0);
993 lastRLW = buffer.size() - 1;
994 if (v)
995 RunningLengthWord<uword>::setRunningBit(buffer[lastRLW], v);
997 static_cast<uword>(number));
998 }
999}
1000
1001template <class uword>
1003 const size_t number) {
1004 if (number == 0)
1005 return 0;
1006 uword rlw = buffer[lastRLW];
1007 size_t NumberOfLiteralWords =
1009 if (NumberOfLiteralWords + number <=
1012 rlw, static_cast<uword>(NumberOfLiteralWords + number));
1013 buffer[lastRLW] = rlw;
1014 sizeinbits += number * wordinbits;
1015 buffer.insert(buffer.end(), v, v + number);
1016 return number;
1017 }
1018 // we proceed the long way
1019 size_t howmanywecanadd =
1020 RunningLengthWord<uword>::largestliteralcount - NumberOfLiteralWords;
1023 buffer[lastRLW] = rlw;
1024 buffer.insert(buffer.end(), v, v + howmanywecanadd);
1025 size_t wordadded = howmanywecanadd;
1026 sizeinbits += howmanywecanadd * wordinbits;
1027 buffer.push_back(0);
1028 lastRLW = buffer.size() - 1;
1029 ++wordadded;
1030 wordadded +=
1031 addStreamOfDirtyWords(v + howmanywecanadd, number - howmanywecanadd);
1032 return wordadded;
1033}
1034
1035template <class uword>
1037 const size_t number) {
1038 if (number == 0)
1039 return;
1040 uword rlw = buffer[lastRLW];
1041 size_t NumberOfLiteralWords =
1043 if (NumberOfLiteralWords + number <=
1046 rlw, static_cast<uword>(NumberOfLiteralWords + number));
1047 buffer[lastRLW] = rlw;
1048 for (size_t i = 0; i < number; ++i)
1049 buffer.push_back(v[i]);
1050 // buffer.insert(buffer.end(), v, v+number); // seems slower than push_back?
1051 return;
1052 }
1053 // we proceed the long way
1054 size_t howmanywecanadd =
1055 RunningLengthWord<uword>::largestliteralcount - NumberOfLiteralWords;
1058 buffer[lastRLW] = rlw;
1059 for (size_t i = 0; i < howmanywecanadd; ++i)
1060 buffer.push_back(v[i]);
1061 // buffer.insert(buffer.end(), v, v+howmanywecanadd);// seems slower than
1062 // push_back?
1063 buffer.push_back(0);
1064 lastRLW = buffer.size() - 1;
1065 fastaddStreamOfDirtyWords(v + howmanywecanadd, number - howmanywecanadd);
1066}
1067
1068template <class uword>
1070 const size_t number) {
1071 if (number == 0)
1072 return 0;
1073 uword rlw = buffer[lastRLW];
1074 size_t NumberOfLiteralWords =
1076 if (NumberOfLiteralWords + number <=
1079 rlw, static_cast<uword>(NumberOfLiteralWords + number));
1080 buffer[lastRLW] = rlw;
1081 sizeinbits += number * wordinbits;
1082 for (size_t k = 0; k < number; ++k)
1083 buffer.push_back(~v[k]);
1084 return number;
1085 }
1086 // we proceed the long way
1087 size_t howmanywecanadd =
1088 RunningLengthWord<uword>::largestliteralcount - NumberOfLiteralWords;
1091 buffer[lastRLW] = rlw;
1092 for (size_t k = 0; k < howmanywecanadd; ++k)
1093 buffer.push_back(~v[k]);
1094 size_t wordadded = howmanywecanadd;
1095 sizeinbits += howmanywecanadd * wordinbits;
1096 buffer.push_back(0);
1097 lastRLW = buffer.size() - 1;
1098 ++wordadded;
1099 wordadded +=
1100 addStreamOfDirtyWords(v + howmanywecanadd, number - howmanywecanadd);
1101 return wordadded;
1102}
1103
1104template <class uword> size_t EWAHBoolArray<uword>::addEmptyWord(const bool v) {
1105 RunningLengthWord<uword> lastRunningLengthWord(buffer[lastRLW]);
1106 const bool noliteralword =
1107 (lastRunningLengthWord.getNumberOfLiteralWords() == 0);
1108 // first, if the last running length word is empty, we align it
1109 // this
1110 uword runlen = lastRunningLengthWord.getRunningLength();
1111 if ((noliteralword) && (runlen == 0)) {
1112 lastRunningLengthWord.setRunningBit(v);
1113 }
1114 if ((noliteralword) && (lastRunningLengthWord.getRunningBit() == v) &&
1115 (runlen < RunningLengthWord<uword>::largestrunninglengthcount)) {
1116 lastRunningLengthWord.setRunningLength(static_cast<uword>(runlen + 1));
1117 return 0;
1118 } else {
1119 // we have to start anew
1120 buffer.push_back(0);
1121 lastRLW = buffer.size() - 1;
1122 RunningLengthWord<uword> lastRunningLengthWord2(buffer[lastRLW]);
1123 lastRunningLengthWord2.setRunningBit(v);
1124 lastRunningLengthWord2.setRunningLength(1);
1125 return 1;
1126 }
1127}
1128
1129template <class uword>
1130void fast_logicalor_tocontainer(size_t n, const EWAHBoolArray<uword> **inputs,
1131 EWAHBoolArray<uword> &container) {
1132 class EWAHBoolArrayPtr {
1133
1134 public:
1135 EWAHBoolArrayPtr(const EWAHBoolArray<uword> *p, bool o) : ptr(p), own(o) {}
1136 const EWAHBoolArray<uword> *ptr;
1137 bool own; // whether to clean
1138
1139 bool operator<(const EWAHBoolArrayPtr &o) const {
1140 return o.ptr->sizeInBytes() < ptr->sizeInBytes(); // backward on purpose
1141 }
1142 };
1143
1144 if (n == 0) {
1145 container.reset();
1146 return;
1147 }
1148 if (n == 1) {
1149 container = *inputs[0];
1150 return;
1151 }
1152 std::priority_queue<EWAHBoolArrayPtr> pq;
1153 for (size_t i = 0; i < n; i++) {
1154 // could use emplace
1155 pq.push(EWAHBoolArrayPtr(inputs[i], false));
1156 }
1157 while (pq.size() > 2) {
1158
1159 EWAHBoolArrayPtr x1 = pq.top();
1160 pq.pop();
1161
1162 EWAHBoolArrayPtr x2 = pq.top();
1163 pq.pop();
1164
1165 EWAHBoolArray<uword> *buffer = new EWAHBoolArray<uword>();
1166 x1.ptr->logicalor(*x2.ptr, *buffer);
1167
1168 if (x1.own) {
1169 delete x1.ptr;
1170 }
1171 if (x2.own) {
1172 delete x2.ptr;
1173 }
1174 pq.push(EWAHBoolArrayPtr(buffer, true));
1175 }
1176 EWAHBoolArrayPtr x1 = pq.top();
1177 pq.pop();
1178
1179 EWAHBoolArrayPtr x2 = pq.top();
1180 pq.pop();
1181
1182 x1.ptr->logicalor(*x2.ptr, container);
1183
1184 if (x1.own) {
1185 delete x1.ptr;
1186 }
1187 if (x2.own) {
1188 delete x2.ptr;
1189 }
1190}
1191
1192template <class uword>
1194 EWAHBoolArray &container) const {
1195 container.reset();
1196 if (RESERVEMEMORY)
1197 container.buffer.reserve(buffer.size() + a.buffer.size());
1199 EWAHBoolArrayRawIterator<uword> j = raw_iterator();
1200 if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
1201 container.setSizeInBits(sizeInBits());
1202 return;
1203 }
1204 // at this point, this should be safe:
1205 BufferedRunningLengthWord<uword> &rlwi = i.next();
1206 BufferedRunningLengthWord<uword> &rlwj = j.next();
1207
1208 while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
1209 while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
1210 const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
1211 BufferedRunningLengthWord<uword> &prey = i_is_prey ? rlwi : rlwj;
1212 BufferedRunningLengthWord<uword> &predator = i_is_prey ? rlwj : rlwi;
1213 if (predator.getRunningBit()) {
1214 container.fastaddStreamOfEmptyWords(true, predator.getRunningLength());
1215 prey.discardFirstWordsWithReload(predator.getRunningLength());
1216 } else {
1217 const size_t index =
1218 prey.discharge(container, predator.getRunningLength());
1219 container.fastaddStreamOfEmptyWords(false, predator.getRunningLength() -
1220 index);
1221 }
1222 predator.discardRunningWordsWithReload();
1223 }
1224
1225 const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
1227 if (nbre_literal > 0) {
1228 for (size_t k = 0; k < nbre_literal; ++k) {
1229 container.addWord(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k));
1230 }
1231 rlwi.discardLiteralWordsWithReload(nbre_literal);
1232 rlwj.discardLiteralWordsWithReload(nbre_literal);
1233 }
1234 }
1235 const bool i_remains = rlwi.size() > 0;
1236 BufferedRunningLengthWord<uword> &remaining = i_remains ? rlwi : rlwj;
1237 remaining.discharge(container);
1238 container.setSizeInBits(sizeInBits() > a.sizeInBits() ? sizeInBits()
1239 : a.sizeInBits());
1240}
1241
1242template <class uword>
1244 size_t answer = 0;
1246 EWAHBoolArrayRawIterator<uword> j = raw_iterator();
1247 if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
1248 return 0;
1249 }
1250 // at this point, this should be safe:
1251 BufferedRunningLengthWord<uword> &rlwi = i.next();
1252 BufferedRunningLengthWord<uword> &rlwj = j.next();
1253
1254 while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
1255 while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
1256 const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
1257 BufferedRunningLengthWord<uword> &prey = i_is_prey ? rlwi : rlwj;
1258 BufferedRunningLengthWord<uword> &predator = i_is_prey ? rlwj : rlwi;
1259 if (predator.getRunningBit()) {
1260 answer += predator.getRunningLength() * wordinbits;
1261 prey.discardFirstWordsWithReload(predator.getRunningLength());
1262
1263 } else {
1264 // const size_t index =
1265 prey.dischargeCount(predator.getRunningLength(), &answer);
1266 }
1267 predator.discardRunningWordsWithReload();
1268 }
1269
1270 const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
1272 if (nbre_literal > 0) {
1273 for (size_t k = 0; k < nbre_literal; ++k) {
1274 answer += countOnes(
1275 (uword)(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k)));
1276 }
1277 rlwi.discardLiteralWordsWithReload(nbre_literal);
1278 rlwj.discardLiteralWordsWithReload(nbre_literal);
1279 }
1280 }
1281 const bool i_remains = rlwi.size() > 0;
1282 BufferedRunningLengthWord<uword> &remaining = i_remains ? rlwi : rlwj;
1283 answer += remaining.dischargeCount();
1284 return answer;
1285}
1286
1287template <class uword>
1289 EWAHBoolArray &container) const {
1290 container.reset();
1291 if (RESERVEMEMORY)
1292 container.buffer.reserve(buffer.size() + a.buffer.size());
1294 EWAHBoolArrayRawIterator<uword> j = raw_iterator();
1295 if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
1296 container.setSizeInBits(sizeInBits());
1297 return;
1298 }
1299 // at this point, this should be safe:
1300 BufferedRunningLengthWord<uword> &rlwi = i.next();
1301 BufferedRunningLengthWord<uword> &rlwj = j.next();
1302 while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
1303 while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
1304 const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
1305 BufferedRunningLengthWord<uword> &prey = i_is_prey ? rlwi : rlwj;
1306 BufferedRunningLengthWord<uword> &predator = i_is_prey ? rlwj : rlwi;
1307 const size_t index =
1308 (!predator.getRunningBit())
1309 ? prey.discharge(container, predator.getRunningLength())
1310 : prey.dischargeNegated(container, predator.getRunningLength());
1311 container.fastaddStreamOfEmptyWords(predator.getRunningBit(),
1312 predator.getRunningLength() - index);
1313 predator.discardRunningWordsWithReload();
1314 }
1315 const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
1317 if (nbre_literal > 0) {
1318 for (size_t k = 0; k < nbre_literal; ++k)
1319 container.addWord(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k));
1320 rlwi.discardLiteralWordsWithReload(nbre_literal);
1321 rlwj.discardLiteralWordsWithReload(nbre_literal);
1322 }
1323 }
1324 const bool i_remains = rlwi.size() > 0;
1325 BufferedRunningLengthWord<uword> &remaining = i_remains ? rlwi : rlwj;
1326 remaining.discharge(container);
1327 container.setSizeInBits(sizeInBits() > a.sizeInBits() ? sizeInBits()
1328 : a.sizeInBits());
1329}
1330
1331template <class uword>
1334 EWAHBoolArrayRawIterator<uword> j = raw_iterator();
1335 if (!i.hasNext())
1336 return a.numberOfOnes();
1337 if (!j.hasNext())
1338 return this->numberOfOnes();
1339
1340 size_t answer = 0;
1341
1342 // at this point, this should be safe:
1343 BufferedRunningLengthWord<uword> &rlwi = i.next();
1344 BufferedRunningLengthWord<uword> &rlwj = j.next();
1345 while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
1346 while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
1347 const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
1348 BufferedRunningLengthWord<uword> &prey = i_is_prey ? rlwi : rlwj;
1349 BufferedRunningLengthWord<uword> &predator = i_is_prey ? rlwj : rlwi;
1350 size_t index;
1351
1352 if (predator.getRunningBit()) {
1353 index =
1354 prey.dischargeCountNegated(predator.getRunningLength(), &answer);
1355 } else {
1356 index = prey.dischargeCount(predator.getRunningLength(), &answer);
1357 }
1358 if (predator.getRunningBit())
1359 answer += (predator.getRunningLength() - index) * wordinbits;
1360
1361 predator.discardRunningWordsWithReload();
1362 }
1363 const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
1365 if (nbre_literal > 0) {
1366 for (size_t k = 0; k < nbre_literal; ++k) {
1367 answer += countOnes(
1368 (uword)(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)));
1369 }
1370 rlwi.discardLiteralWordsWithReload(nbre_literal);
1371 rlwj.discardLiteralWordsWithReload(nbre_literal);
1372 }
1373 }
1374 const bool i_remains = rlwi.size() > 0;
1375 BufferedRunningLengthWord<uword> &remaining = i_remains ? rlwi : rlwj;
1376 answer += remaining.dischargeCount();
1377 return answer;
1378}
1379
1380template <class uword>
1382 EWAHBoolArray &container) const {
1383 container.reset();
1384 if (RESERVEMEMORY)
1385 container.buffer.reserve(buffer.size() > a.buffer.size() ? buffer.size()
1386 : a.buffer.size());
1388 EWAHBoolArrayRawIterator<uword> j = raw_iterator();
1389 if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
1390 container.setSizeInBits(sizeInBits());
1391 return;
1392 }
1393 // at this point, this should be safe:
1394 BufferedRunningLengthWord<uword> &rlwi = i.next();
1395 BufferedRunningLengthWord<uword> &rlwj = j.next();
1396
1397 while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
1398 while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
1399 const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
1400 BufferedRunningLengthWord<uword> &prey(i_is_prey ? rlwi : rlwj);
1401 BufferedRunningLengthWord<uword> &predator(i_is_prey ? rlwj : rlwi);
1402 if (!predator.getRunningBit()) {
1403 container.fastaddStreamOfEmptyWords(false, predator.getRunningLength());
1404 prey.discardFirstWordsWithReload(predator.getRunningLength());
1405 } else {
1406 const size_t index =
1407 prey.discharge(container, predator.getRunningLength());
1408 container.fastaddStreamOfEmptyWords(false, predator.getRunningLength() -
1409 index);
1410 }
1411 predator.discardRunningWordsWithReload();
1412 }
1413 const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
1415 if (nbre_literal > 0) {
1416 for (size_t k = 0; k < nbre_literal; ++k) {
1417 container.addWord(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k));
1418 }
1419 rlwi.discardLiteralWordsWithReload(nbre_literal);
1420 rlwj.discardLiteralWordsWithReload(nbre_literal);
1421 }
1422 }
1423 BufferedRunningLengthWord<uword> &remain = rlwj.size() > 0 ? rlwj : rlwi;
1424 while (remain.size() > 0) {
1425 container.addStreamOfEmptyWords(false, remain.size());
1426 if (!remain.next()) {
1427 break;
1428 }
1429 }
1430 container.setSizeInBits(sizeInBits() > a.sizeInBits() ? sizeInBits()
1431 : a.sizeInBits());
1432 container.assertWordCount("logicaland");
1433}
1434
1435template <class uword>
1437 EWAHBoolArray &container) const {
1438 container.reset();
1439 if (RESERVEMEMORY)
1440 container.buffer.reserve(buffer.size() > a.buffer.size() ? buffer.size()
1441 : a.buffer.size());
1442 EWAHBoolArrayRawIterator<uword> i = raw_iterator();
1444 if (!j.hasNext()) { // the other fellow is empty
1445 container = *this; // just copy, stupidly, the data
1446 return;
1447 }
1448 if (!(i.hasNext())) { // hopefully this never happens...
1449 container.setSizeInBits(sizeInBits());
1450 return;
1451 }
1452 // at this point, this should be safe:
1453 BufferedRunningLengthWord<uword> &rlwi = i.next();
1454 BufferedRunningLengthWord<uword> &rlwj = j.next();
1455
1456 while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
1457 while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
1458 const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
1459 BufferedRunningLengthWord<uword> &prey(i_is_prey ? rlwi : rlwj);
1460 BufferedRunningLengthWord<uword> &predator(i_is_prey ? rlwj : rlwi);
1461 if (((predator.getRunningBit()) && (i_is_prey)) ||
1462 ((!predator.getRunningBit()) && (!i_is_prey))) {
1463 container.fastaddStreamOfEmptyWords(false, predator.getRunningLength());
1464 prey.discardFirstWordsWithReload(predator.getRunningLength());
1465 } else if (i_is_prey) {
1466 const size_t index =
1467 prey.discharge(container, predator.getRunningLength());
1468 container.fastaddStreamOfEmptyWords(false, predator.getRunningLength() -
1469 index);
1470 } else {
1471 const size_t index =
1472 prey.dischargeNegated(container, predator.getRunningLength());
1473 container.fastaddStreamOfEmptyWords(true, predator.getRunningLength() -
1474 index);
1475 }
1476 predator.discardRunningWordsWithReload();
1477 }
1478 const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
1480 if (nbre_literal > 0) {
1481 for (size_t k = 0; k < nbre_literal; ++k) {
1482 container.addWord(static_cast<uword>(rlwi.getLiteralWordAt(k) &
1483 ~rlwj.getLiteralWordAt(k)));
1484 }
1485 rlwi.discardLiteralWordsWithReload(nbre_literal);
1486 rlwj.discardLiteralWordsWithReload(nbre_literal);
1487 }
1488 }
1489 if (rlwi.size() > 0) {
1490 rlwi.discharge(container);
1491 container.setSizeInBits(sizeInBits());
1492 } else {
1493 while (rlwj.size() > 0) {
1494 container.addStreamOfEmptyWords(false, rlwj.size());
1495 if (!rlwj.next()) {
1496 break;
1497 }
1498 }
1499 container.setSizeInBits(a.sizeInBits());
1500 }
1501 container.assertWordCount("logicalandnot");
1502}
1503
1504template <class uword>
1506 EWAHBoolArrayRawIterator<uword> i = raw_iterator();
1508 if (!j.hasNext()) { // the other fellow is empty
1509 return this->numberOfOnes();
1510 }
1511 if (!(i.hasNext())) { // hopefully this never happens...
1512 return 0;
1513 }
1514 size_t answer = 0;
1515 // at this point, this should be safe:
1516 BufferedRunningLengthWord<uword> &rlwi = i.next();
1517 BufferedRunningLengthWord<uword> &rlwj = j.next();
1518
1519 while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
1520 while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
1521 const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
1522 BufferedRunningLengthWord<uword> &prey(i_is_prey ? rlwi : rlwj);
1523 BufferedRunningLengthWord<uword> &predator(i_is_prey ? rlwj : rlwi);
1524 if (((predator.getRunningBit()) && (i_is_prey)) ||
1525 ((!predator.getRunningBit()) && (!i_is_prey))) {
1526 prey.discardFirstWordsWithReload(predator.getRunningLength());
1527 } else if (i_is_prey) {
1528 prey.dischargeCount(predator.getRunningLength(), &answer);
1529 } else {
1530 const size_t index =
1531 prey.dischargeCountNegated(predator.getRunningLength(), &answer);
1532 answer += (predator.getRunningLength() - index) * wordinbits;
1533 }
1534 predator.discardRunningWordsWithReload();
1535 }
1536 const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
1538 if (nbre_literal > 0) {
1539 for (size_t k = 0; k < nbre_literal; ++k) {
1540 answer += countOnes(
1541 (uword)(rlwi.getLiteralWordAt(k) & (~rlwj.getLiteralWordAt(k))));
1542 }
1543 rlwi.discardLiteralWordsWithReload(nbre_literal);
1544 rlwj.discardLiteralWordsWithReload(nbre_literal);
1545 }
1546 }
1547 const bool i_remains = rlwi.size() > 0;
1548 if (i_remains) {
1549 answer += rlwi.dischargeCount();
1550 }
1551 return answer;
1552}
1553
1554template <class uword>
1557 EWAHBoolArrayRawIterator<uword> j = raw_iterator();
1558 if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
1559 return 0;
1560 }
1561 size_t answer = 0;
1562 // at this point, this should be safe:
1563 BufferedRunningLengthWord<uword> &rlwi = i.next();
1564 BufferedRunningLengthWord<uword> &rlwj = j.next();
1565
1566 while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
1567 while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
1568 const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
1569 BufferedRunningLengthWord<uword> &prey(i_is_prey ? rlwi : rlwj);
1570 BufferedRunningLengthWord<uword> &predator(i_is_prey ? rlwj : rlwi);
1571 if (!predator.getRunningBit()) {
1572 prey.discardFirstWordsWithReload(predator.getRunningLength());
1573 } else {
1574 // const size_t index =
1575 prey.dischargeCount(predator.getRunningLength(), &answer);
1576 }
1577 predator.discardRunningWordsWithReload();
1578 }
1579 const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
1581 if (nbre_literal > 0) {
1582 for (size_t k = 0; k < nbre_literal; ++k) {
1583 answer += countOnes(
1584 (uword)(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)));
1585 }
1586 rlwi.discardLiteralWordsWithReload(nbre_literal);
1587 rlwj.discardLiteralWordsWithReload(nbre_literal);
1588 }
1589 }
1590 return answer;
1591}
1592
1593template <class uword>
1596 EWAHBoolArrayRawIterator<uword> j = raw_iterator();
1597 if (!(i.hasNext() and j.hasNext())) { // hopefully this never happens...
1598 return false;
1599 }
1600 // at this point, this should be safe:
1601 BufferedRunningLengthWord<uword> &rlwi = i.next();
1602 BufferedRunningLengthWord<uword> &rlwj = j.next();
1603
1604 while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
1605 while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
1606 const bool i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
1607 BufferedRunningLengthWord<uword> &prey(i_is_prey ? rlwi : rlwj);
1608 BufferedRunningLengthWord<uword> &predator(i_is_prey ? rlwj : rlwi);
1609 if (!predator.getRunningBit()) {
1610 prey.discardFirstWordsWithReload(predator.getRunningLength());
1611 } else {
1612 size_t index = 0;
1613 bool isnonzero =
1614 prey.nonzero_discharge(predator.getRunningLength(), index);
1615 if (isnonzero)
1616 return true;
1617 }
1618 predator.discardRunningWordsWithReload();
1619 }
1620 const uword nbre_literal = std::min(rlwi.getNumberOfLiteralWords(),
1622 if (nbre_literal > 0) {
1623 for (size_t k = 0; k < nbre_literal; ++k) {
1624 if ((rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)) != 0)
1625 return true;
1626 }
1627 rlwi.discardLiteralWordsWithReload(nbre_literal);
1628 rlwj.discardLiteralWordsWithReload(nbre_literal);
1629 }
1630 }
1631 return false;
1632}
1633
1634template <class uword>
1637 EWAHBoolArrayRawIterator<uword> i = raw_iterator();
1638 while (i.hasNext()) {
1639 BufferedRunningLengthWord<uword> &brlw(i.next());
1640 ++bs.runningwordmarker;
1641 bs.totalliteral += brlw.getNumberOfLiteralWords();
1642 bs.totalcompressed += brlw.getRunningLength();
1643 if (brlw.getRunningLength() ==
1645 ++bs.maximumofrunningcounterreached;
1646 }
1647 }
1648 return bs;
1649}
1650
1651template <class uword> void EWAHBoolArray<uword>::debugprintout() const {
1652 std::cout << "==printing out EWAHBoolArray==" << std::endl;
1653 std::cout << "Number of compressed words: " << buffer.size() << std::endl;
1654 std::cout << "Size in bits: " << sizeinbits << std::endl;
1655
1656 size_t pointer = 0;
1657 while (pointer < buffer.size()) {
1658 ConstRunningLengthWord<uword> rlw(buffer[pointer]);
1659 bool b = rlw.getRunningBit();
1660 const uword rl = rlw.getRunningLength();
1661 const uword lw = rlw.getNumberOfLiteralWords();
1662 std::cout << "pointer = " << pointer << " running bit=" << b
1663 << " running length=" << rl << " lit. words=" << lw << std::endl;
1664 for (uword j = 0; j < lw; ++j) {
1665 const uword &w = buffer[pointer + j + 1];
1666 std::cout << toBinaryString(w) << std::endl;
1667 }
1668 pointer += lw + 1;
1669 }
1670 std::cout << "==END==" << std::endl;
1671}
1672
1673template <class uword>
1674size_t EWAHBoolArray<uword>::sizeOnDisk(const bool savesizeinbits) const {
1675 return (savesizeinbits ? sizeof(uint64_t) : 0) + sizeof(uint64_t) +
1676 sizeof(uword) * buffer.size();
1677}
1678} // namespace ewah
1679#endif
This object is returned by the compressed bitmap as a statistical descriptor.
Definition ewah-inl.h:240
A dynamic bitset implementation.
Definition boolarray.h:19
Same as RunningLengthWord, except that the values are buffered for quick access.
uword getRunningLength() const
how many words should be filled by the running bit (see previous method)
uword getNumberOfLiteralWords() const
followed by how many literal words?
uword size() const
Total of getRunningLength() and getNumberOfLiteralWords()
bool getRunningBit() const
Which bit is being repeated?
Same as RunningLengthWord, except that the values cannot be modified.
bool getRunningBit() const
Which bit is being repeated?
uword getRunningLength() const
how many words should be filled by the running bit
uword getNumberOfLiteralWords() const
followed by how many literal words?
This class is a compressed bitmap.
bool operator==(const EWAHBoolArray &x) const
We define two EWAHBoolArray as being equal if they have the same set bits.
Definition ewah-inl.h:720
void appendSetBits(container &out, const size_t offset=0) const
Convert to a list of positions of "set" bits.
Definition ewah-inl.h:845
void setSizeInBits(const size_t size)
set size in bits.
Definition ewah.h:668
void logicalxor(const EWAHBoolArray &a, EWAHBoolArray &container) const
computes the logical xor with another compressed bitmap answer goes into container Running time compl...
Definition ewah-inl.h:1288
EWAHBoolArrayRawIterator< uword > raw_iterator() const
To iterate over the compressed data.
Definition ewah-inl.h:715
size_t padWithZeroes(const size_t totalbits)
make sure the size of the array is totalbits bits by padding with zeroes.
Definition ewah-inl.h:635
void logicalor(const EWAHBoolArray &a, EWAHBoolArray &container) const
computes the logical or with another compressed bitmap answer goes into container Running time comple...
Definition ewah-inl.h:1193
void fastaddStreamOfDirtyWords(const uword *v, const size_t number)
LikeaddStreamOfDirtyWords but does not return the cost increse, does not update sizeinbits.
Definition ewah-inl.h:1036
void logicalandnot(const EWAHBoolArray &a, EWAHBoolArray &container) const
computes the logical and with another compressed bitmap answer goes into container Running time compl...
Definition ewah-inl.h:1436
size_t addStreamOfNegatedDirtyWords(const uword *v, const size_t number)
add a stream of dirty words, each one negated, returns the number of words added (storage cost increa...
Definition ewah-inl.h:1069
size_t logicalandcount(const EWAHBoolArray &a) const
computes the size (in number of set bits) of the logical and with another compressed bitmap Running t...
Definition ewah-inl.h:1555
void inplace_logicalnot()
Apply the logical not operation on this bitmap.
Definition ewah-inl.h:294
BitmapStatistics computeStatistics() const
For research purposes.
Definition ewah-inl.h:1635
void fastaddStreamOfEmptyWords(const bool v, size_t number)
Like addStreamOfEmptyWords but addStreamOfEmptyWords but does not return the cost increase,...
Definition ewah-inl.h:953
void debugprintout() const
Prints a verbose description of the content of the compressed bitmap.
Definition ewah-inl.h:1651
EWAHBoolArray< uword > logicalnot() const
Write the logical not of this bitmap in the provided container.
Definition ewah.h:646
bool set(size_t i)
Set the ith bit to true (starting at zero).
Definition ewah-inl.h:257
size_t addWord(const uword newdata, const uint32_t bitsthatmatter=8 *sizeof(uword))
convenience method.
Definition ewah-inl.h:475
size_t addStreamOfEmptyWords(const bool v, size_t number)
same as addEmptyWord, but you can do several in one shot! returns the number of words added (storage ...
Definition ewah-inl.h:898
void readBuffer(std::istream &in, const size_t buffersize)
read the buffer from a stream, see method writeBuffer.
Definition ewah-inl.h:495
size_t logicalxorcount(const EWAHBoolArray &a) const
computes the size (in number of set bits) of the logical xor with another compressed bitmap Running t...
Definition ewah-inl.h:1332
void writeBuffer(std::ostream &out) const
This only writes the content of the buffer (see write()) method.
Definition ewah-inl.h:488
size_t logicalandnotcount(const EWAHBoolArray &a) const
computes the size (in number of set bits) of the logical and not with another compressed bitmap Runni...
Definition ewah-inl.h:1505
size_t logicalorcount(const EWAHBoolArray &a) const
computes the size (in number of set bits) of the logical or with another compressed bitmap Running ti...
Definition ewah-inl.h:1243
size_t write(std::ostream &out, const bool savesizeinbits=true) const
Save this bitmap to a stream.
Definition ewah-inl.h:503
size_t read(std::istream &in, const bool savesizeinbits=true)
this is the counterpart to the write method.
Definition ewah-inl.h:556
void logicaland(const EWAHBoolArray &a, EWAHBoolArray &container) const
computes the logical and with another compressed bitmap answer goes into container Running time compl...
Definition ewah-inl.h:1381
size_t sizeOnDisk(const bool savesizeinbits=true) const
Compute the size on disk assuming that it was saved using the method "write".
Definition ewah-inl.h:1674
size_t numberOfOnes() const
Returns the number of bits set to the value 1.
Definition ewah-inl.h:365
size_t sizeInBits() const
Return the size in bits of this bitmap (this refers to the uncompressed size in bits).
Definition ewah.h:395
void reset()
clear the content of the bitmap.
Definition ewah.h:365
bool operator!=(const EWAHBoolArray &x) const
We define two EWAHBoolArray as being different if they do not have the same set bits.
Definition ewah-inl.h:881
size_t addStreamOfDirtyWords(const uword *v, const size_t number)
add a stream of dirty words, returns the number of words added (storage cost increase)
Definition ewah-inl.h:1002
bool intersects(const EWAHBoolArray &a) const
tests whether the bitmaps "intersect" (have at least one 1-bit at the same position).
Definition ewah-inl.h:1594
std::vector< size_t > toArray() const
Retrieve the set bits.
Definition ewah-inl.h:383
void append(const EWAHBoolArray &x)
Appends the content of some other compressed bitmap at the end of the current bitmap.
Definition ewah-inl.h:777
void swap(EWAHBoolArray &x)
Swap the content of this bitmap with another bitmap.
Definition ewah-inl.h:766
BoolArray< uword > toBoolArray() const
For convenience, this fully uncompresses the bitmap.
Definition ewah-inl.h:833
EWAHBoolArrayIterator< uword > uncompress() const
Iterate over the uncompressed words.
Definition ewah-inl.h:710
Iterate over words of bits from a compressed bitmap.
Definition ewah.h:16
bool hasNext() const
is there a new word?
Definition ewah-inl.h:41
uword next()
return next word.
Definition ewah-inl.h:46
This is a low-level iterator.
size_t operator*() const
Provides the location of the set bit.
Definition ewah-inl.h:102
uword getNumberOfLiteralWords() const
followed by how many literal words?
void setRunningBit(bool b)
running length of which type of bits
bool getRunningBit() const
Which bit is being repeated?
uword getRunningLength() const
how many words should be filled by the running bit