gitweb.ps.run Git - toc/blob - antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/IntervalSet.cpp

   1 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
   2  * Use of this file is governed by the BSD 3-clause license that
   3  * can be found in the LICENSE.txt file in the project root.
   4  */
   5
   6 #include "misc/MurmurHash.h"
   7 #include "Lexer.h"
   8 #include "Exceptions.h"
   9 #include "Vocabulary.h"
  10
  11 #include "misc/IntervalSet.h"
  12
  13 using namespace antlr4;
  14 using namespace antlr4::misc;
  15
  16 IntervalSet const IntervalSet::COMPLETE_CHAR_SET =
  17     IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE);
  18
  19 IntervalSet const IntervalSet::EMPTY_SET;
  20
  21 IntervalSet::IntervalSet() : _intervals() {
  22 }
  23
  24 IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() {
  25   _intervals = set._intervals;
  26 }
  27
  28 IntervalSet::IntervalSet(IntervalSet&& set) : IntervalSet(std::move(set._intervals)) {
  29 }
  30
  31 IntervalSet::IntervalSet(std::vector<Interval>&& intervals) : _intervals(std::move(intervals)) {
  32 }
  33
  34 IntervalSet& IntervalSet::operator=(const IntervalSet& other) {
  35   _intervals = other._intervals;
  36   return *this;
  37 }
  38
  39 IntervalSet& IntervalSet::operator=(IntervalSet&& other) {
  40   _intervals = move(other._intervals);
  41   return *this;
  42 }
  43
  44 IntervalSet IntervalSet::of(ssize_t a) {
  45   return IntervalSet({ Interval(a, a) });
  46 }
  47
  48 IntervalSet IntervalSet::of(ssize_t a, ssize_t b) {
  49   return IntervalSet({ Interval(a, b) });
  50 }
  51
  52 void IntervalSet::clear() {
  53   _intervals.clear();
  54 }
  55
  56 void IntervalSet::add(ssize_t el) {
  57   add(el, el);
  58 }
  59
  60 void IntervalSet::add(ssize_t a, ssize_t b) {
  61   add(Interval(a, b));
  62 }
  63
  64 void IntervalSet::add(const Interval &addition) {
  65   if (addition.b < addition.a) {
  66     return;
  67   }
  68
  69   // find position in list
  70   for (auto iterator = _intervals.begin(); iterator != _intervals.end(); ++iterator) {
  71     Interval r = *iterator;
  72     if (addition == r) {
  73       return;
  74     }
  75
  76     if (addition.adjacent(r) || !addition.disjoint(r)) {
  77       // next to each other, make a single larger interval
  78       Interval bigger = addition.Union(r);
  79       *iterator = bigger;
  80
  81       // make sure we didn't just create an interval that
  82       // should be merged with next interval in list
  83       while (iterator + 1 != _intervals.end()) {
  84         Interval next = *++iterator;
  85         if (!bigger.adjacent(next) && bigger.disjoint(next)) {
  86           break;
  87         }
  88
  89         // if we bump up against or overlap next, merge
  90         iterator = _intervals.erase(iterator);// remove this one
  91         --iterator; // move backwards to what we just set
  92         *iterator = bigger.Union(next); // set to 3 merged ones
  93         // ml: no need to advance iterator, we do that in the next round anyway. ++iterator; // first call to next after previous duplicates the result
  94       }
  95       return;
  96     }
  97
  98     if (addition.startsBeforeDisjoint(r)) {
  99       // insert before r
 100       //--iterator;
 101       _intervals.insert(iterator, addition);
 102       return;
 103     }
 104
 105     // if disjoint and after r, a future iteration will handle it
 106   }
 107
 108   // ok, must be after last interval (and disjoint from last interval)
 109   // just add it
 110   _intervals.push_back(addition);
 111 }
 112
 113 IntervalSet IntervalSet::Or(const std::vector<IntervalSet> &sets) {
 114   IntervalSet result;
 115   for (const auto &s : sets) {
 116     result.addAll(s);
 117   }
 118   return result;
 119 }
 120
 121 IntervalSet& IntervalSet::addAll(const IntervalSet &set) {
 122   // walk set and add each interval
 123   for (auto const& interval : set._intervals) {
 124     add(interval);
 125   }
 126   return *this;
 127 }
 128
 129 IntervalSet IntervalSet::complement(ssize_t minElement, ssize_t maxElement) const {
 130   return complement(IntervalSet::of(minElement, maxElement));
 131 }
 132
 133 IntervalSet IntervalSet::complement(const IntervalSet &vocabulary) const {
 134   return vocabulary.subtract(*this);
 135 }
 136
 137 IntervalSet IntervalSet::subtract(const IntervalSet &other) const {
 138   return subtract(*this, other);
 139 }
 140
 141 IntervalSet IntervalSet::subtract(const IntervalSet &left, const IntervalSet &right) {
 142   if (left.isEmpty()) {
 143     return IntervalSet();
 144   }
 145
 146   if (right.isEmpty()) {
 147     // right set has no elements; just return the copy of the current set
 148     return left;
 149   }
 150
 151   IntervalSet result(left);
 152   size_t resultI = 0;
 153   size_t rightI = 0;
 154   while (resultI < result._intervals.size() && rightI < right._intervals.size()) {
 155     Interval &resultInterval = result._intervals[resultI];
 156     const Interval &rightInterval = right._intervals[rightI];
 157
 158     // operation: (resultInterval - rightInterval) and update indexes
 159
 160     if (rightInterval.b < resultInterval.a) {
 161       rightI++;
 162       continue;
 163     }
 164
 165     if (rightInterval.a > resultInterval.b) {
 166       resultI++;
 167       continue;
 168     }
 169
 170     Interval beforeCurrent;
 171     Interval afterCurrent;
 172     if (rightInterval.a > resultInterval.a) {
 173       beforeCurrent = Interval(resultInterval.a, rightInterval.a - 1);
 174     }
 175
 176     if (rightInterval.b < resultInterval.b) {
 177       afterCurrent = Interval(rightInterval.b + 1, resultInterval.b);
 178     }
 179
 180     if (beforeCurrent.a > -1) { // -1 is the default value
 181       if (afterCurrent.a > -1) {
 182         // split the current interval into two
 183         result._intervals[resultI] = beforeCurrent;
 184         result._intervals.insert(result._intervals.begin() + resultI + 1, afterCurrent);
 185         resultI++;
 186         rightI++;
 187       } else {
 188         // replace the current interval
 189         result._intervals[resultI] = beforeCurrent;
 190         resultI++;
 191       }
 192     } else {
 193       if (afterCurrent.a > -1) {
 194         // replace the current interval
 195         result._intervals[resultI] = afterCurrent;
 196         rightI++;
 197       } else {
 198         // remove the current interval (thus no need to increment resultI)
 199         result._intervals.erase(result._intervals.begin() + resultI);
 200       }
 201     }
 202   }
 203
 204   // If rightI reached right.intervals.size(), no more intervals to subtract from result.
 205   // If resultI reached result.intervals.size(), we would be subtracting from an empty set.
 206   // Either way, we are done.
 207   return result;
 208 }
 209
 210 IntervalSet IntervalSet::Or(const IntervalSet &a) const {
 211   IntervalSet result;
 212   result.addAll(*this);
 213   result.addAll(a);
 214   return result;
 215 }
 216
 217 IntervalSet IntervalSet::And(const IntervalSet &other) const {
 218   IntervalSet intersection;
 219   size_t i = 0;
 220   size_t j = 0;
 221
 222   // iterate down both interval lists looking for nondisjoint intervals
 223   while (i < _intervals.size() && j < other._intervals.size()) {
 224     Interval mine = _intervals[i];
 225     Interval theirs = other._intervals[j];
 226
 227     if (mine.startsBeforeDisjoint(theirs)) {
 228       // move this iterator looking for interval that might overlap
 229       i++;
 230     } else if (theirs.startsBeforeDisjoint(mine)) {
 231       // move other iterator looking for interval that might overlap
 232       j++;
 233     } else if (mine.properlyContains(theirs)) {
 234       // overlap, add intersection, get next theirs
 235       intersection.add(mine.intersection(theirs));
 236       j++;
 237     } else if (theirs.properlyContains(mine)) {
 238       // overlap, add intersection, get next mine
 239       intersection.add(mine.intersection(theirs));
 240       i++;
 241     } else if (!mine.disjoint(theirs)) {
 242       // overlap, add intersection
 243       intersection.add(mine.intersection(theirs));
 244
 245       // Move the iterator of lower range [a..b], but not
 246       // the upper range as it may contain elements that will collide
 247       // with the next iterator. So, if mine=[0..115] and
 248       // theirs=[115..200], then intersection is 115 and move mine
 249       // but not theirs as theirs may collide with the next range
 250       // in thisIter.
 251       // move both iterators to next ranges
 252       if (mine.startsAfterNonDisjoint(theirs)) {
 253         j++;
 254       } else if (theirs.startsAfterNonDisjoint(mine)) {
 255         i++;
 256       }
 257     }
 258   }
 259
 260   return intersection;
 261 }
 262
 263 bool IntervalSet::contains(size_t el) const {
 264   return contains(symbolToNumeric(el));
 265 }
 266
 267 bool IntervalSet::contains(ssize_t el) const {
 268   if (_intervals.empty())
 269     return false;
 270
 271   if (el < _intervals[0].a) // list is sorted and el is before first interval; not here
 272     return false;
 273
 274   for (const auto &interval : _intervals) {
 275     if (el >= interval.a && el <= interval.b) {
 276       return true; // found in this interval
 277     }
 278   }
 279   return false;
 280 }
 281
 282 bool IntervalSet::isEmpty() const {
 283   return _intervals.empty();
 284 }
 285
 286 ssize_t IntervalSet::getSingleElement() const {
 287   if (_intervals.size() == 1) {
 288     if (_intervals[0].a == _intervals[0].b) {
 289       return _intervals[0].a;
 290     }
 291   }
 292
 293   return Token::INVALID_TYPE; // XXX: this value is 0, but 0 is a valid interval range, how can that work?
 294 }
 295
 296 ssize_t IntervalSet::getMaxElement() const {
 297   if (_intervals.empty()) {
 298     return Token::INVALID_TYPE;
 299   }
 300
 301   return _intervals.back().b;
 302 }
 303
 304 ssize_t IntervalSet::getMinElement() const {
 305   if (_intervals.empty()) {
 306     return Token::INVALID_TYPE;
 307   }
 308
 309   return _intervals[0].a;
 310 }
 311
 312 std::vector<Interval> const& IntervalSet::getIntervals() const {
 313   return _intervals;
 314 }
 315
 316 size_t IntervalSet::hashCode() const {
 317   size_t hash = MurmurHash::initialize();
 318   for (const auto &interval : _intervals) {
 319     hash = MurmurHash::update(hash, interval.a);
 320     hash = MurmurHash::update(hash, interval.b);
 321   }
 322
 323   return MurmurHash::finish(hash, _intervals.size() * 2);
 324 }
 325
 326 bool IntervalSet::operator == (const IntervalSet &other) const {
 327   if (_intervals.empty() && other._intervals.empty())
 328     return true;
 329
 330   if (_intervals.size() != other._intervals.size())
 331     return false;
 332
 333   return std::equal(_intervals.begin(), _intervals.end(), other._intervals.begin());
 334 }
 335
 336 std::string IntervalSet::toString() const {
 337   return toString(false);
 338 }
 339
 340 std::string IntervalSet::toString(bool elemAreChar) const {
 341   if (_intervals.empty()) {
 342     return "{}";
 343   }
 344
 345   std::stringstream ss;
 346   size_t effectiveSize = size();
 347   if (effectiveSize > 1) {
 348     ss << "{";
 349   }
 350
 351   bool firstEntry = true;
 352   for (const auto &interval : _intervals) {
 353     if (!firstEntry)
 354       ss << ", ";
 355     firstEntry = false;
 356
 357     ssize_t a = interval.a;
 358     ssize_t b = interval.b;
 359     if (a == b) {
 360       if (a == -1) {
 361         ss << "<EOF>";
 362       } else if (elemAreChar) {
 363         ss << "'" << static_cast<char>(a) << "'";
 364       } else {
 365         ss << a;
 366       }
 367     } else {
 368       if (elemAreChar) {
 369         ss << "'" << static_cast<char>(a) << "'..'" << static_cast<char>(b) << "'";
 370       } else {
 371         ss << a << ".." << b;
 372       }
 373     }
 374   }
 375   if (effectiveSize > 1) {
 376     ss << "}";
 377   }
 378
 379   return ss.str();
 380 }
 381
 382 std::string IntervalSet::toString(const std::vector<std::string> &tokenNames) const {
 383   return toString(dfa::Vocabulary::fromTokenNames(tokenNames));
 384 }
 385
 386 std::string IntervalSet::toString(const dfa::Vocabulary &vocabulary) const {
 387   if (_intervals.empty()) {
 388     return "{}";
 389   }
 390
 391   std::stringstream ss;
 392   size_t effectiveSize = size();
 393   if (effectiveSize > 1) {
 394     ss << "{";
 395   }
 396
 397   bool firstEntry = true;
 398   for (const auto &interval : _intervals) {
 399     if (!firstEntry)
 400       ss << ", ";
 401     firstEntry = false;
 402
 403     ssize_t a = interval.a;
 404     ssize_t b = interval.b;
 405     if (a == b) {
 406       ss << elementName(vocabulary, a);
 407     } else {
 408       for (ssize_t i = a; i <= b; i++) {
 409         if (i > a) {
 410           ss << ", ";
 411         }
 412         ss << elementName(vocabulary, i);
 413       }
 414     }
 415   }
 416   if (effectiveSize > 1) {
 417     ss << "}";
 418   }
 419
 420   return ss.str();
 421 }
 422
 423 std::string IntervalSet::elementName(const std::vector<std::string> &tokenNames, ssize_t a) const {
 424   return elementName(dfa::Vocabulary::fromTokenNames(tokenNames), a);
 425 }
 426
 427 std::string IntervalSet::elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const {
 428   if (a == -1) {
 429     return "<EOF>";
 430   } else if (a == -2) {
 431     return "<EPSILON>";
 432   } else {
 433     return vocabulary.getDisplayName(a);
 434   }
 435 }
 436
 437 size_t IntervalSet::size() const {
 438   size_t result = 0;
 439   for (const auto &interval : _intervals) {
 440     result += size_t(interval.b - interval.a + 1);
 441   }
 442   return result;
 443 }
 444
 445 std::vector<ssize_t> IntervalSet::toList() const {
 446   std::vector<ssize_t> result;
 447   for (const auto &interval : _intervals) {
 448     ssize_t a = interval.a;
 449     ssize_t b = interval.b;
 450     for (ssize_t v = a; v <= b; v++) {
 451       result.push_back(v);
 452     }
 453   }
 454   return result;
 455 }
 456
 457 std::set<ssize_t> IntervalSet::toSet() const {
 458   std::set<ssize_t> result;
 459   for (const auto &interval : _intervals) {
 460     ssize_t a = interval.a;
 461     ssize_t b = interval.b;
 462     for (ssize_t v = a; v <= b; v++) {
 463       result.insert(v);
 464     }
 465   }
 466   return result;
 467 }
 468
 469 ssize_t IntervalSet::get(size_t i) const {
 470   size_t index = 0;
 471   for (const auto &interval : _intervals) {
 472     ssize_t a = interval.a;
 473     ssize_t b = interval.b;
 474     for (ssize_t v = a; v <= b; v++) {
 475       if (index == i) {
 476         return v;
 477       }
 478       index++;
 479     }
 480   }
 481   return -1;
 482 }
 483
 484 void IntervalSet::remove(size_t el) {
 485   remove(symbolToNumeric(el));
 486 }
 487
 488 void IntervalSet::remove(ssize_t el) {
 489   for (size_t i = 0; i < _intervals.size(); ++i) {
 490     Interval &interval = _intervals[i];
 491     ssize_t a = interval.a;
 492     ssize_t b = interval.b;
 493     if (el < a) {
 494       break; // list is sorted and el is before this interval; not here
 495     }
 496
 497     // if whole interval x..x, rm
 498     if (el == a && el == b) {
 499       _intervals.erase(_intervals.begin() + (long)i);
 500       break;
 501     }
 502     // if on left edge x..b, adjust left
 503     if (el == a) {
 504       interval.a++;
 505       break;
 506     }
 507     // if on right edge a..x, adjust right
 508     if (el == b) {
 509       interval.b--;
 510       break;
 511     }
 512     // if in middle a..x..b, split interval
 513     if (el > a && el < b) { // found in this interval
 514       ssize_t oldb = interval.b;
 515       interval.b = el - 1; // [a..x-1]
 516       add(el + 1, oldb); // add [x+1..b]
 517
 518       break; // ml: not in the Java code but I believe we also should stop searching here, as we found x.
 519     }
 520   }
 521 }