200 lines
5.4 KiB
C++
200 lines
5.4 KiB
C++
// Copyright (c) 2005, Google Inc.
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following disclaimer
|
|
// in the documentation and/or other materials provided with the
|
|
// distribution.
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Author: Sanjay Ghemawat
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include <vector>
|
|
#include <assert.h>
|
|
|
|
#include "pcrecpp_internal.h"
|
|
#include "pcre_scanner.h"
|
|
|
|
using std::vector;
|
|
|
|
namespace pcrecpp {
|
|
|
|
Scanner::Scanner()
|
|
: data_(),
|
|
input_(data_),
|
|
skip_(NULL),
|
|
should_skip_(false),
|
|
skip_repeat_(false),
|
|
save_comments_(false),
|
|
comments_(NULL),
|
|
comments_offset_(0) {
|
|
}
|
|
|
|
Scanner::Scanner(const string& in)
|
|
: data_(in),
|
|
input_(data_),
|
|
skip_(NULL),
|
|
should_skip_(false),
|
|
skip_repeat_(false),
|
|
save_comments_(false),
|
|
comments_(NULL),
|
|
comments_offset_(0) {
|
|
}
|
|
|
|
Scanner::~Scanner() {
|
|
delete skip_;
|
|
delete comments_;
|
|
}
|
|
|
|
void Scanner::SetSkipExpression(const char* re) {
|
|
delete skip_;
|
|
if (re != NULL) {
|
|
skip_ = new RE(re);
|
|
should_skip_ = true;
|
|
skip_repeat_ = true;
|
|
ConsumeSkip();
|
|
} else {
|
|
skip_ = NULL;
|
|
should_skip_ = false;
|
|
skip_repeat_ = false;
|
|
}
|
|
}
|
|
|
|
void Scanner::Skip(const char* re) {
|
|
delete skip_;
|
|
if (re != NULL) {
|
|
skip_ = new RE(re);
|
|
should_skip_ = true;
|
|
skip_repeat_ = false;
|
|
ConsumeSkip();
|
|
} else {
|
|
skip_ = NULL;
|
|
should_skip_ = false;
|
|
skip_repeat_ = false;
|
|
}
|
|
}
|
|
|
|
void Scanner::DisableSkip() {
|
|
assert(skip_ != NULL);
|
|
should_skip_ = false;
|
|
}
|
|
|
|
void Scanner::EnableSkip() {
|
|
assert(skip_ != NULL);
|
|
should_skip_ = true;
|
|
ConsumeSkip();
|
|
}
|
|
|
|
int Scanner::LineNumber() const {
|
|
// TODO: Make it more efficient by keeping track of the last point
|
|
// where we computed line numbers and counting newlines since then.
|
|
// We could use std:count, but not all systems have it. :-(
|
|
int count = 1;
|
|
for (const char* p = data_.data(); p < input_.data(); ++p)
|
|
if (*p == '\n')
|
|
++count;
|
|
return count;
|
|
}
|
|
|
|
int Scanner::Offset() const {
|
|
return (int)(input_.data() - data_.c_str());
|
|
}
|
|
|
|
bool Scanner::LookingAt(const RE& re) const {
|
|
int consumed;
|
|
return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
|
|
}
|
|
|
|
|
|
bool Scanner::Consume(const RE& re,
|
|
const Arg& arg0,
|
|
const Arg& arg1,
|
|
const Arg& arg2) {
|
|
const bool result = re.Consume(&input_, arg0, arg1, arg2);
|
|
if (result && should_skip_) ConsumeSkip();
|
|
return result;
|
|
}
|
|
|
|
// helper function to consume *skip_ and honour save_comments_
|
|
void Scanner::ConsumeSkip() {
|
|
const char* start_data = input_.data();
|
|
while (skip_->Consume(&input_)) {
|
|
if (!skip_repeat_) {
|
|
// Only one skip allowed.
|
|
break;
|
|
}
|
|
}
|
|
if (save_comments_) {
|
|
if (comments_ == NULL) {
|
|
comments_ = new vector<StringPiece>;
|
|
}
|
|
// already pointing one past end, so no need to +1
|
|
int length = (int)(input_.data() - start_data);
|
|
if (length > 0) {
|
|
comments_->push_back(StringPiece(start_data, length));
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
|
|
// short circuit out if we've not yet initialized comments_
|
|
// (e.g., when save_comments is false)
|
|
if (!comments_) {
|
|
return;
|
|
}
|
|
// TODO: if we guarantee that comments_ will contain StringPieces
|
|
// that are ordered by their start, then we can do a binary search
|
|
// for the first StringPiece at or past start and then scan for the
|
|
// ones contained in the range, quit early (use equal_range or
|
|
// lower_bound)
|
|
for (vector<StringPiece>::const_iterator it = comments_->begin();
|
|
it != comments_->end(); ++it) {
|
|
if ((it->data() >= data_.c_str() + start &&
|
|
it->data() + it->size() <= data_.c_str() + end)) {
|
|
ranges->push_back(*it);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void Scanner::GetNextComments(vector<StringPiece> *ranges) {
|
|
// short circuit out if we've not yet initialized comments_
|
|
// (e.g., when save_comments is false)
|
|
if (!comments_) {
|
|
return;
|
|
}
|
|
for (vector<StringPiece>::const_iterator it =
|
|
comments_->begin() + comments_offset_;
|
|
it != comments_->end(); ++it) {
|
|
ranges->push_back(*it);
|
|
++comments_offset_;
|
|
}
|
|
}
|
|
|
|
} // namespace pcrecpp
|