Regex: Add regex_replace native.
This commit is contained in:
@ -18,6 +18,7 @@ binary.sources = [
|
||||
'sdk/amxxmodule.cpp',
|
||||
'module.cpp',
|
||||
'CRegEx.cpp',
|
||||
'utils.cpp',
|
||||
]
|
||||
|
||||
AMXX.modules += [builder.Add(binary)]
|
||||
|
@ -30,10 +30,12 @@
|
||||
* you do not wish to do so, delete this exception statement from your
|
||||
* version.
|
||||
*/
|
||||
#include "amxxmodule.h"
|
||||
#include "pcre.h"
|
||||
#include "CRegEx.h"
|
||||
#include <string.h>
|
||||
#include "amxxmodule.h"
|
||||
#include <ctype.h>
|
||||
#include "utils.h"
|
||||
|
||||
RegEx::RegEx()
|
||||
{
|
||||
@ -43,6 +45,9 @@ RegEx::RegEx()
|
||||
mFree = true;
|
||||
subject = NULL;
|
||||
mSubStrings.clear();
|
||||
mMatchesSubs.clear();
|
||||
mSubsNameTable.clear();
|
||||
mNumSubpatterns = 0;
|
||||
}
|
||||
|
||||
void RegEx::Clear()
|
||||
@ -57,6 +62,9 @@ void RegEx::Clear()
|
||||
delete[] subject;
|
||||
subject = NULL;
|
||||
mSubStrings.clear();
|
||||
mMatchesSubs.clear();
|
||||
mSubsNameTable.clear();
|
||||
mNumSubpatterns = 0;
|
||||
}
|
||||
|
||||
RegEx::~RegEx()
|
||||
@ -143,6 +151,19 @@ int RegEx::Compile(const char *pattern, int iFlags)
|
||||
|
||||
mFree = false;
|
||||
|
||||
/**
|
||||
* Retrieve the number of captured groups
|
||||
* including the full match.
|
||||
*/
|
||||
pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &mNumSubpatterns);
|
||||
++mNumSubpatterns;
|
||||
|
||||
/**
|
||||
* Build the table with the named groups,
|
||||
* which contain an index and a name per group.
|
||||
*/
|
||||
MakeSubpatternsTable(mNumSubpatterns);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -153,13 +174,13 @@ int RegEx::Match(const char *str)
|
||||
if (mFree || re == NULL)
|
||||
return -1;
|
||||
|
||||
this->ClearMatch();
|
||||
ClearMatch();
|
||||
|
||||
//save str
|
||||
subject = new char[strlen(str) + 1];
|
||||
strcpy(subject, str);
|
||||
|
||||
rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30);
|
||||
rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, REGEX_MAX_SUBPATTERNS);
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
@ -188,61 +209,103 @@ int RegEx::Match(const char *str)
|
||||
|
||||
int RegEx::MatchAll(const char *str)
|
||||
{
|
||||
int rc = 0;
|
||||
int rr = 0;
|
||||
int offset = 0;
|
||||
int rc = 0;
|
||||
int startOffset = 0;
|
||||
int exoptions = 0;
|
||||
int notEmpty = 0;
|
||||
int sizeOffsets = mNumSubpatterns * 3;
|
||||
int subjectLen = strlen(str);
|
||||
|
||||
if (mFree || re == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
this->ClearMatch();
|
||||
|
||||
//save str
|
||||
subject = new char[strlen(str) + 1];
|
||||
ClearMatch();
|
||||
|
||||
subject = new char[subjectLen + 1];
|
||||
strcpy(subject, str);
|
||||
|
||||
RegExSub sub, whole;
|
||||
while ((rr = pcre_exec(re, NULL, subject, (int)strlen(subject), offset, 0, ovector, 30)))
|
||||
RegExSub sub;
|
||||
|
||||
while (1)
|
||||
{
|
||||
if (rr < 0)
|
||||
rr = pcre_exec(re, NULL, subject, (int)subjectLen, startOffset, exoptions | notEmpty, ovector, REGEX_MAX_SUBPATTERNS);
|
||||
|
||||
/**
|
||||
* The string was already proved to be valid UTF-8
|
||||
*/
|
||||
exoptions |= PCRE_NO_UTF8_CHECK;
|
||||
|
||||
/**
|
||||
* Too many substrings
|
||||
*/
|
||||
if (rr == 0)
|
||||
{
|
||||
if (rr == PCRE_ERROR_NOMATCH)
|
||||
rr = sizeOffsets / 3;
|
||||
}
|
||||
|
||||
if (rr > 0)
|
||||
{
|
||||
mMatchesSubs.append(rr);
|
||||
|
||||
for (int s = 0; s < rr; ++s)
|
||||
{
|
||||
break;
|
||||
sub.start = ovector[2 * s];
|
||||
sub.end = ovector[2 * s + 1];
|
||||
|
||||
mSubStrings.append(sub);
|
||||
}
|
||||
}
|
||||
else if (rr == PCRE_ERROR_NOMATCH)
|
||||
{
|
||||
/**
|
||||
* If we previously set PCRE_NOTEMPTY after a null match,
|
||||
* this is not necessarily the end. We need to advance
|
||||
* the start offset, and continue. Fudge the offset values
|
||||
* to achieve this, unless we're already at the end of the string.
|
||||
*/
|
||||
if (notEmpty && startOffset < (int)subjectLen)
|
||||
{
|
||||
ovector[0] = startOffset;
|
||||
ovector[1] = startOffset + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
mErrorOffset = rr;
|
||||
|
||||
if (rc)
|
||||
this->ClearMatch();
|
||||
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rc += rr;
|
||||
mSubStrings.ensure(rc);
|
||||
|
||||
for (int s = 1; s < rr; ++s)
|
||||
else
|
||||
{
|
||||
sub.start = ovector[2 * s];
|
||||
sub.end = ovector[2 * s + 1];
|
||||
mSubStrings.append(sub);
|
||||
mErrorOffset = rr;
|
||||
|
||||
if (mMatchesSubs.length())
|
||||
{
|
||||
ClearMatch();
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = ovector[1];
|
||||
/**
|
||||
* If we have matched an empty string, mimic what Perl's /g options does.
|
||||
* This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
|
||||
* the match again at the same point. If this fails (picked up above) we
|
||||
* advance to the next character.
|
||||
*/
|
||||
notEmpty = (ovector[1] == ovector[0]) ? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
|
||||
|
||||
/**
|
||||
* Advance to the next piece.
|
||||
*/
|
||||
startOffset = ovector[1];
|
||||
}
|
||||
|
||||
if (!rc)
|
||||
if (!mMatchesSubs.length())
|
||||
{
|
||||
return 0;
|
||||
|
||||
sub = mSubStrings.at(0);
|
||||
whole.start = sub.start;
|
||||
sub = mSubStrings.back();
|
||||
whole.end = sub.end;
|
||||
|
||||
mSubStrings.insert(0, whole);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
@ -256,18 +319,14 @@ void RegEx::ClearMatch()
|
||||
delete[] subject;
|
||||
subject = NULL;
|
||||
mSubStrings.clear();
|
||||
mMatchesSubs.clear();
|
||||
}
|
||||
|
||||
const char *RegEx::GetSubstring(int s, char buffer[], int max)
|
||||
const char *getSubstring(char *subject, size_t start, size_t end, char buffer[], size_t max, size_t *outlen)
|
||||
{
|
||||
int i = 0;
|
||||
if ((size_t)s >= mSubStrings.length() || s < 0)
|
||||
return NULL;
|
||||
|
||||
RegExSub sub = mSubStrings.at(s);
|
||||
|
||||
char *substr_a = subject + sub.start;
|
||||
int substr_l = sub.end - sub.start;
|
||||
size_t i;
|
||||
char * substr_a = subject + start;
|
||||
size_t substr_l = end - start;
|
||||
|
||||
for (i = 0; i < substr_l; i++)
|
||||
{
|
||||
@ -278,5 +337,516 @@ const char *RegEx::GetSubstring(int s, char buffer[], int max)
|
||||
|
||||
buffer[i] = '\0';
|
||||
|
||||
if (outlen)
|
||||
{
|
||||
*outlen = i;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
|
||||
const char *RegEx::GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen)
|
||||
{
|
||||
if (start < 0 || start >= mSubStrings.length())
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
RegExSub sub = mSubStrings.at(start);
|
||||
|
||||
return getSubstring(subject, sub.start, sub.end, buffer, max, outlen);
|
||||
}
|
||||
|
||||
const char *RegEx::GetSubstring(size_t startOffset, size_t endOffset, char buffer[], size_t max, size_t *outlen)
|
||||
{
|
||||
if (startOffset < 0 || endOffset < 0)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return getSubstring(subject, startOffset, endOffset, buffer, max, outlen);
|
||||
}
|
||||
|
||||
void RegEx::MakeSubpatternsTable(int numSubpatterns)
|
||||
{
|
||||
int nameCount = 0;
|
||||
int rc = pcre_fullinfo(re, NULL, PCRE_INFO_NAMECOUNT, &nameCount);
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (nameCount > 0)
|
||||
{
|
||||
const char *nameTable;
|
||||
int nameSize = 0;
|
||||
int i = 0;
|
||||
|
||||
int rc1 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMETABLE, &nameTable);
|
||||
int rc2 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameSize);
|
||||
|
||||
rc = rc2 ? rc2 : rc1;
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
mSubsNameTable.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
NamedGroup data;
|
||||
|
||||
while (i++ < nameCount)
|
||||
{
|
||||
data.index = 0xff * (unsigned char)nameTable[0] + (unsigned char)nameTable[1];
|
||||
data.name = nameTable + 2;
|
||||
|
||||
mSubsNameTable.append(ke::Move(data));
|
||||
nameTable += nameSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int RegEx::Replace(char *text, size_t textMaxLen, const char *replace, size_t replaceLen, int flags)
|
||||
{
|
||||
char *output = text;
|
||||
|
||||
/**
|
||||
* Retrieve all matches and store them in
|
||||
* mSubStrings list.
|
||||
*/
|
||||
if (MatchAll(output) == -1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t subjectLen = strlen(subject);
|
||||
size_t total = 0;
|
||||
size_t baseIndex = 0;
|
||||
size_t diffLength = 0;
|
||||
|
||||
char *toReplace = new char[textMaxLen + 1];
|
||||
char *toSearch = NULL;
|
||||
|
||||
/**
|
||||
* All characters which is not matched are not copied when replacing matches.
|
||||
* Then original text (output buffer) should be considerated as empty.
|
||||
*/
|
||||
if (flags & REGEX_FORMAT_NOCOPY)
|
||||
{
|
||||
*output = '\0';
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* This is used only when we do replace matches.
|
||||
*/
|
||||
toSearch = new char[textMaxLen + 1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Loop over all matches found.
|
||||
*/
|
||||
for (size_t i = 0; i < mMatchesSubs.length(); ++i)
|
||||
{
|
||||
char *ptr = toReplace;
|
||||
|
||||
size_t browsed = 0;
|
||||
size_t searchLen = 0;
|
||||
size_t length = 0;
|
||||
|
||||
/**
|
||||
* Build the replace string as it can contain backreference
|
||||
* and this needs to be parsed.
|
||||
*/
|
||||
for (const char *s = replace, *end = s + replaceLen; s < end && browsed <= textMaxLen; ++s, ++browsed)
|
||||
{
|
||||
unsigned int c = *s;
|
||||
|
||||
/**
|
||||
* Supported format specifiers:
|
||||
*
|
||||
* $number : Substitutes the substring matched by group number.
|
||||
* n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
|
||||
* ${name} : Substitutes the substring matched by the named group name (a maximum of 32 characters).
|
||||
* $& : Substitutes a copy of the whole match.
|
||||
* $` : Substitutes all the text of the input string before the match.
|
||||
* $' : Substitutes all the text of the input string after the match.
|
||||
* $+ : Substitutes the last group that was captured.
|
||||
* $_ : Substitutes the entire input string.
|
||||
* $$ : Substitutes a literal "$".
|
||||
*/
|
||||
if (c == '$' || c == '\\')
|
||||
{
|
||||
switch (*++s)
|
||||
{
|
||||
case '\0':
|
||||
{
|
||||
/**
|
||||
* End of string.
|
||||
* Copy one character.
|
||||
*/
|
||||
*(ptr + browsed) = c;
|
||||
break;
|
||||
}
|
||||
case '&':
|
||||
{
|
||||
/**
|
||||
* Concatenate retrieved full match sub-string.
|
||||
* length - 1 to overwrite EOS.
|
||||
*/
|
||||
GetSubstring(baseIndex, ptr + browsed, textMaxLen, &length);
|
||||
browsed += length - 1;
|
||||
break;
|
||||
}
|
||||
case '`':
|
||||
{
|
||||
/**
|
||||
* Concatenate part of original text up to
|
||||
* first sub-string position.
|
||||
*/
|
||||
length = mSubStrings.at(baseIndex).start;
|
||||
memcpy(ptr + browsed, subject, length);
|
||||
browsed += length - 1;
|
||||
break;
|
||||
}
|
||||
case '\'':
|
||||
{
|
||||
/**
|
||||
* Concatenate part of original text from
|
||||
* last sub-string end position to EOS.
|
||||
*/
|
||||
length = mSubStrings.at(baseIndex).end;
|
||||
memcpy(ptr + browsed, subject + length, subjectLen - length);
|
||||
browsed += (subjectLen - length) - 1;
|
||||
break;
|
||||
}
|
||||
case '+':
|
||||
{
|
||||
/**
|
||||
* Copy the last group that was captured.
|
||||
*/
|
||||
GetSubstring(baseIndex + mMatchesSubs.at(i) - 1, ptr + browsed, textMaxLen, &length);
|
||||
browsed += length - 1;
|
||||
break;
|
||||
}
|
||||
case '_':
|
||||
{
|
||||
/**
|
||||
* Copy the entire input string.
|
||||
*/
|
||||
memcpy(ptr + browsed, subject, subjectLen);
|
||||
browsed += (subjectLen - 1);
|
||||
break;
|
||||
}
|
||||
case '$':
|
||||
case '\\':
|
||||
{
|
||||
/**
|
||||
* Copy the single character $ or \.
|
||||
*/
|
||||
*(ptr + browsed) = c;
|
||||
break;
|
||||
}
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
case '{':
|
||||
{
|
||||
/**
|
||||
* Checking backreference.
|
||||
* Which can be either $n, ${n} or ${name}.
|
||||
*/
|
||||
size_t backref = -1;
|
||||
const char *walk = s;
|
||||
bool inBrace = false;
|
||||
bool nameCheck = false;
|
||||
|
||||
/**
|
||||
* ${nn}.
|
||||
* ^
|
||||
*/
|
||||
if (*walk == '{')
|
||||
{
|
||||
inBrace = true;
|
||||
++walk;
|
||||
}
|
||||
|
||||
/**
|
||||
* Valid number.
|
||||
* $nn or ${nn}
|
||||
* ^ ^
|
||||
*/
|
||||
if (*walk >= '0' && *walk <= '9')
|
||||
{
|
||||
backref = *walk - '0';
|
||||
++walk;
|
||||
}
|
||||
else if (inBrace)
|
||||
{
|
||||
nameCheck = true;
|
||||
|
||||
/**
|
||||
* Not a valid number.
|
||||
* Checking as string.
|
||||
* ${name}
|
||||
* ^
|
||||
*/
|
||||
if (*walk)
|
||||
{
|
||||
const char *pch = strchr(walk, '}');
|
||||
|
||||
if (pch != NULL)
|
||||
{
|
||||
/**
|
||||
* A named group maximum character is 32 (PCRE).
|
||||
*/
|
||||
char name[32];
|
||||
size_t nameLength = strncopy(name, walk, pch - walk + 1);
|
||||
|
||||
int flags, num = 0;
|
||||
pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &flags);
|
||||
|
||||
/**
|
||||
* If PCRE_DUPNAMES is set, the pcre_copy_named_substring function should be used
|
||||
* as pcre_get_stringnumber output order is not defined.
|
||||
*/
|
||||
if (flags & PCRE_DUPNAMES)
|
||||
{
|
||||
memset(ovector, 0, REGEX_MAX_SUBPATTERNS);
|
||||
|
||||
/**
|
||||
* pcre_copy_named_substring needs a vector containing sub-patterns ranges
|
||||
* for a given match.
|
||||
*/
|
||||
for (size_t j = 0; j < mMatchesSubs.at(i); ++j)
|
||||
{
|
||||
ovector[2 * j] = mSubStrings.at(baseIndex + j).start;
|
||||
ovector[2 * j + 1] = mSubStrings.at(baseIndex + j).end;
|
||||
}
|
||||
|
||||
num = pcre_copy_named_substring(re, subject, ovector, mMatchesSubs.at(i), name, ptr + browsed, (int)textMaxLen);
|
||||
|
||||
if (num != PCRE_ERROR_NOSUBSTRING)
|
||||
{
|
||||
browsed += num - 1;
|
||||
s = pch;
|
||||
break;
|
||||
}
|
||||
++pch;
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* Retrieve sub-pattern index from a give name.
|
||||
*/
|
||||
num = pcre_get_stringnumber(re, name);
|
||||
if (num != PCRE_ERROR_NOSUBSTRING)
|
||||
{
|
||||
backref = num;
|
||||
walk = ++pch;
|
||||
}
|
||||
}
|
||||
|
||||
if (num == PCRE_ERROR_NOSUBSTRING || num >= (int)mMatchesSubs.at(i))
|
||||
{
|
||||
/**
|
||||
* If a sub-string for a given match is not found, or if > to
|
||||
* number of sub-patterns we still need to check if this
|
||||
* group name is a valid one because if so we want to escape it.
|
||||
* Looking at the name table.
|
||||
*/
|
||||
bool found = false;
|
||||
for (size_t i = 0; i < mSubsNameTable.length(); ++i)
|
||||
{
|
||||
if (!mSubsNameTable.at(i).name.compare(name))
|
||||
{
|
||||
--browsed;
|
||||
s = --pch;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!nameCheck)
|
||||
{
|
||||
/**
|
||||
* Valid second number.
|
||||
* $nn or ${nn}
|
||||
* ^ ^
|
||||
*/
|
||||
if (*walk && *walk >= '0' && *walk <= '9')
|
||||
{
|
||||
backref = backref * 10 + *walk - '0';
|
||||
++walk;
|
||||
}
|
||||
|
||||
if (inBrace)
|
||||
{
|
||||
/**
|
||||
* Invalid specifier
|
||||
* Either hit EOS or missing }.
|
||||
* ${n or ${nn or ${nx or ${nnx
|
||||
* ^ ^ ^ ^
|
||||
*/
|
||||
if (*walk == '\0' || *walk != '}')
|
||||
{
|
||||
backref = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
++walk;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
length = walk - s;
|
||||
s = --walk;
|
||||
|
||||
/**
|
||||
* We can't provide a capture number >= to total that pcre_exec has found.
|
||||
* 0 is implicitly accepted, same behavior as $&.
|
||||
*/
|
||||
if (backref >= 0 && (int)backref < mNumSubpatterns)
|
||||
{
|
||||
/**
|
||||
* Valid available index for a given match.
|
||||
*/
|
||||
if (backref < mMatchesSubs.at(i))
|
||||
{
|
||||
/**
|
||||
* Concatenate retrieved sub-string.
|
||||
* length - 1 to overwrite EOS.
|
||||
*/
|
||||
GetSubstring(baseIndex + backref, ptr + browsed, textMaxLen, &length);
|
||||
browsed += length - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* Valid unavailable index for a given match.
|
||||
*/
|
||||
--browsed;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* If we here it means the syntax is valid but sub-pattern doesn't exist.
|
||||
* So, copy as it is, including $.
|
||||
*/
|
||||
memcpy(ptr + browsed, s - length, length + 1);
|
||||
browsed += length;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
/**
|
||||
* Not a valid format modifier.
|
||||
* So we copy characters as it is.
|
||||
*/
|
||||
*(ptr + browsed) = *s;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* At this point, direct copy.
|
||||
*/
|
||||
*(ptr + browsed) = c;
|
||||
}
|
||||
}
|
||||
|
||||
*(ptr + browsed) = '\0';
|
||||
|
||||
/**
|
||||
* Concatenate only replace string of each match,
|
||||
* as we don't want to copy unmatched characters.
|
||||
*/
|
||||
if (flags & REGEX_FORMAT_NOCOPY)
|
||||
{
|
||||
/**
|
||||
* We want just the first occurrence.
|
||||
*/
|
||||
if (total++ && (flags & REGEX_FORMAT_FIRSTONLY))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
strncat(output, toReplace, textMaxLen + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* Retrieves full string of a given match.
|
||||
*/
|
||||
const char *search = GetSubstring(baseIndex, toSearch, textMaxLen, &searchLen);
|
||||
|
||||
/**
|
||||
* We get something to replace, but the sub-pattern to search is empty.
|
||||
* We insert replacement either a the start end or string.
|
||||
*/
|
||||
if (*toReplace && !searchLen)
|
||||
{
|
||||
if (output - text > 0)
|
||||
{
|
||||
strncat(output, toReplace, textMaxLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
strncat(toReplace, text, textMaxLen);
|
||||
strncopy(text, toReplace, strlen(toReplace) + 1);
|
||||
}
|
||||
|
||||
++total;
|
||||
}
|
||||
else if ((output = UTIL_ReplaceEx(text + mSubStrings.at(baseIndex).start + diffLength, textMaxLen, search, searchLen, toReplace, browsed, false)) != NULL)
|
||||
{
|
||||
/**
|
||||
* Then we simply do a replace.
|
||||
* Probably not the most efficient, but this should be at least safe.
|
||||
* To avoid issue where the function could find a string which is not at the expected index,
|
||||
* We force the input string to start from index of the full match.
|
||||
*/
|
||||
++total;
|
||||
}
|
||||
|
||||
if (total && (flags & REGEX_FORMAT_FIRSTONLY))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* mMatchesSubs is a flat list containing all sub-patterns of all matches.
|
||||
* A number of sub-patterns can vary per match. So we calculate the position in the list,
|
||||
* from where the first sub-pattern result of current match starts.
|
||||
*/
|
||||
baseIndex += mMatchesSubs.at(i);
|
||||
diffLength += browsed - searchLen;
|
||||
}
|
||||
|
||||
delete[] toReplace;
|
||||
|
||||
if (toSearch != NULL)
|
||||
{
|
||||
delete[] toSearch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of successful replacements.
|
||||
*/
|
||||
return total;
|
||||
}
|
||||
|
@ -34,6 +34,19 @@
|
||||
#define _INCLUDE_CREGEX_H
|
||||
|
||||
#include <am-vector.h>
|
||||
#include <am-string.h>
|
||||
|
||||
/**
|
||||
* Maximum number of sub-patterns, here 50 (this should be a multiple of 3).
|
||||
*/
|
||||
#define REGEX_MAX_SUBPATTERNS 150
|
||||
|
||||
/**
|
||||
* Flags to used with regex_replace, to control the replacement behavior.
|
||||
*/
|
||||
#define REGEX_FORMAT_DEFAULT 0 // Uses the standard formatting rules to replace matches.
|
||||
#define REGEX_FORMAT_NOCOPY 1 // The sections that do not match the regular expression are not copied when replacing matches.
|
||||
#define REGEX_FORMAT_FIRSTONLY 2 // Only the first occurrence of a regular expression is replaced.
|
||||
|
||||
class RegEx
|
||||
{
|
||||
@ -42,6 +55,11 @@ public:
|
||||
int start, end;
|
||||
};
|
||||
|
||||
struct NamedGroup {
|
||||
ke::AString name;
|
||||
size_t index;
|
||||
};
|
||||
|
||||
RegEx();
|
||||
~RegEx();
|
||||
|
||||
@ -52,8 +70,11 @@ public:
|
||||
int Compile(const char *pattern, int iFlags);
|
||||
int Match(const char *str);
|
||||
int MatchAll(const char *str);
|
||||
int Replace(char *text, size_t text_maxlen, const char *replace, size_t replaceLen, int flags = 0);
|
||||
void ClearMatch();
|
||||
const char *GetSubstring(int s, char buffer[], int max);
|
||||
const char *GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen = NULL);
|
||||
const char *GetSubstring(size_t start, size_t end, char buffer[], size_t max, size_t *outlen = NULL);
|
||||
void MakeSubpatternsTable(int numSubpatterns);
|
||||
|
||||
public:
|
||||
int mErrorOffset;
|
||||
@ -63,9 +84,12 @@ public:
|
||||
private:
|
||||
pcre *re;
|
||||
bool mFree;
|
||||
int ovector[30];
|
||||
int ovector[REGEX_MAX_SUBPATTERNS];
|
||||
char *subject;
|
||||
ke::Vector<RegExSub> mSubStrings;
|
||||
ke::Vector<size_t> mMatchesSubs;
|
||||
ke::Vector<NamedGroup> mSubsNameTable;
|
||||
int mNumSubpatterns;
|
||||
};
|
||||
|
||||
#endif //_INCLUDE_CREGEX_H
|
||||
|
@ -153,6 +153,8 @@ cell match(AMX *amx, cell *params, bool all)
|
||||
else
|
||||
{
|
||||
*errorCode = x->Count();
|
||||
if (all)
|
||||
return x->Count();
|
||||
}
|
||||
|
||||
return id + 1;
|
||||
@ -272,6 +274,43 @@ static cell AMX_NATIVE_CALL regex_free(AMX *amx, cell *params)
|
||||
return 1;
|
||||
}
|
||||
|
||||
//native regex_replace(Regex:pattern, string[], maxLen, const replace[], flags = REGEX_FORMAT_DEFAULT, &errcode = 0);
|
||||
static cell AMX_NATIVE_CALL regex_replace(AMX *amx, cell *params)
|
||||
{
|
||||
int id = params[1] - 1;
|
||||
if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
|
||||
{
|
||||
MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int textLen, replaceLen;
|
||||
char *text = MF_GetAmxString(amx, params[2], 0, &textLen);
|
||||
const char *replace = MF_GetAmxString(amx, params[4], 1, &replaceLen);
|
||||
|
||||
cell *erroCode = MF_GetAmxAddr(amx, params[6]);
|
||||
|
||||
RegEx *x = PEL[id];
|
||||
int e = x->Replace(text, params[3] + 1, replace, replaceLen, params[5]);
|
||||
|
||||
if (e == -1)
|
||||
{
|
||||
*erroCode = x->mErrorOffset;
|
||||
x->ClearMatch();
|
||||
return -2;
|
||||
}
|
||||
else if (e == 0)
|
||||
{
|
||||
*erroCode = 0;
|
||||
x->ClearMatch();
|
||||
return 0;
|
||||
}
|
||||
|
||||
MF_SetAmxString(amx, params[2], text, params[3]);
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
AMX_NATIVE_INFO regex_Natives[] = {
|
||||
{"regex_compile", regex_compile},
|
||||
{"regex_compile_ex", regex_compile_ex},
|
||||
@ -280,6 +319,7 @@ AMX_NATIVE_INFO regex_Natives[] = {
|
||||
{"regex_match_all", regex_match_all},
|
||||
{"regex_match_all_c", regex_match_all_c},
|
||||
{"regex_substr", regex_substr},
|
||||
{"regex_replace", regex_replace},
|
||||
{"regex_free", regex_free},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
@ -99,6 +99,7 @@
|
||||
<ClCompile Include="..\CRegEx.cpp" />
|
||||
<ClCompile Include="..\module.cpp" />
|
||||
<ClCompile Include="..\sdk\amxxmodule.cpp" />
|
||||
<ClCompile Include="..\utils.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\CRegEx.h" />
|
||||
@ -107,6 +108,7 @@
|
||||
<ClInclude Include="..\sdk\moduleconfig.h" />
|
||||
<ClInclude Include="..\sdk\CVector.h" />
|
||||
<ClInclude Include="..\sdk\amxxmodule.h" />
|
||||
<ClInclude Include="..\utils.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\..\..\plugins\include\regex.inc" />
|
||||
|
@ -32,6 +32,9 @@
|
||||
<ClCompile Include="..\sdk\amxxmodule.cpp">
|
||||
<Filter>Module SDK\SDK Base</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\utils.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\CRegEx.h">
|
||||
@ -52,6 +55,9 @@
|
||||
<ClInclude Include="..\sdk\amxxmodule.h">
|
||||
<Filter>Module SDK\SDK Base</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\utils.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\..\..\plugins\include\regex.inc">
|
||||
|
236
dlls/regex/utils.cpp
Normal file
236
dlls/regex/utils.cpp
Normal file
@ -0,0 +1,236 @@
|
||||
|
||||
#include "amxxmodule.h"
|
||||
#include <string.h>
|
||||
#include "utils.h"
|
||||
|
||||
int UTIL_CheckValidChar(char *c)
|
||||
{
|
||||
int count;
|
||||
int bytecount = 0;
|
||||
|
||||
for (count = 1; (*c & 0xC0) == 0x80; count++)
|
||||
{
|
||||
c--;
|
||||
}
|
||||
|
||||
switch (*c & 0xF0)
|
||||
{
|
||||
case 0xC0:
|
||||
case 0xD0:
|
||||
{
|
||||
bytecount = 2;
|
||||
break;
|
||||
}
|
||||
case 0xE0:
|
||||
{
|
||||
bytecount = 3;
|
||||
break;
|
||||
}
|
||||
case 0xF0:
|
||||
{
|
||||
bytecount = 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (bytecount != count)
|
||||
{
|
||||
return count;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned int strncopy(char *dest, const char *src, size_t count)
|
||||
{
|
||||
if (!count)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *start = dest;
|
||||
while ((*src) && (--count))
|
||||
{
|
||||
*dest++ = *src++;
|
||||
}
|
||||
*dest = '\0';
|
||||
|
||||
return (dest - start);
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE: Do not edit this for the love of god unless you have
|
||||
* read the test cases and understand the code behind each one.
|
||||
* While I don't guarantee there aren't mistakes, I do guarantee
|
||||
* that plugins will end up relying on tiny idiosyncrasies of this
|
||||
* function, just like they did with AMX Mod X.
|
||||
*
|
||||
* There are explicitly more cases than the AMX Mod X version because
|
||||
* we're not doing a blind copy. Each case is specifically optimized
|
||||
* for what needs to be done. Even better, we don't have to error on
|
||||
* bad buffer sizes. Instead, this function will smartly cut off the
|
||||
* string in a way that pushes old data out.
|
||||
*/
|
||||
char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive)
|
||||
{
|
||||
char *ptr = subject;
|
||||
size_t browsed = 0;
|
||||
size_t textLen = strlen(subject);
|
||||
|
||||
/* It's not possible to search or replace */
|
||||
if (searchLen > textLen)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Handle the case of one byte replacement.
|
||||
* It's only valid in one case.
|
||||
*/
|
||||
if (maxLen == 1)
|
||||
{
|
||||
/* If the search matches and the replace length is 0,
|
||||
* we can just terminate the string and be done.
|
||||
*/
|
||||
if ((caseSensitive ? strcmp(subject, search) : stricmp(subject, search)) == 0 && replaceLen == 0)
|
||||
{
|
||||
*subject = '\0';
|
||||
return subject;
|
||||
}
|
||||
else
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Subtract one off the maxlength so we can include the null terminator */
|
||||
maxLen--;
|
||||
|
||||
while (*ptr != '\0' && (browsed <= textLen - searchLen))
|
||||
{
|
||||
/* See if we get a comparison */
|
||||
if ((caseSensitive ? strncmp(ptr, search, searchLen) : strnicmp(ptr, search, searchLen)) == 0)
|
||||
{
|
||||
if (replaceLen > searchLen)
|
||||
{
|
||||
/* First, see if we have enough space to do this operation */
|
||||
if (maxLen - textLen < replaceLen - searchLen)
|
||||
{
|
||||
/* First, see if the replacement length goes out of bounds. */
|
||||
if (browsed + replaceLen >= maxLen)
|
||||
{
|
||||
/* EXAMPLE CASE:
|
||||
* Subject: AABBBCCC
|
||||
* Buffer : 12 bytes
|
||||
* Search : BBB
|
||||
* Replace: DDDDDDDDDD
|
||||
* OUTPUT : AADDDDDDDDD
|
||||
* POSITION: ^
|
||||
*/
|
||||
/* If it does, we'll just bound the length and do a strcpy. */
|
||||
replaceLen = maxLen - browsed;
|
||||
|
||||
/* Note, we add one to the final result for the null terminator */
|
||||
strncopy(ptr, replace, replaceLen + 1);
|
||||
|
||||
/* Don't truncate a multi-byte character */
|
||||
if (*(ptr + replaceLen - 1) & 1 << 7)
|
||||
{
|
||||
replaceLen -= UTIL_CheckValidChar(ptr + replaceLen - 1);
|
||||
*(ptr + replaceLen) = '\0';
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* EXAMPLE CASE:
|
||||
* Subject: AABBBCCC
|
||||
* Buffer : 12 bytes
|
||||
* Search : BBB
|
||||
* Replace: DDDDDDD
|
||||
* OUTPUT : AADDDDDDDCC
|
||||
* POSITION: ^
|
||||
*/
|
||||
/* We're going to have some bytes left over... */
|
||||
size_t origBytesToCopy = (textLen - (browsed + searchLen)) + 1;
|
||||
size_t realBytesToCopy = (maxLen - (browsed + replaceLen)) + 1;
|
||||
char *moveFrom = ptr + searchLen + (origBytesToCopy - realBytesToCopy);
|
||||
char *moveTo = ptr + replaceLen;
|
||||
|
||||
/* First, move our old data out of the way. */
|
||||
memmove(moveTo, moveFrom, realBytesToCopy);
|
||||
|
||||
/* Now, do our replacement. */
|
||||
memcpy(ptr, replace, replaceLen);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* EXAMPLE CASE:
|
||||
* Subject: AABBBCCC
|
||||
* Buffer : 12 bytes
|
||||
* Search : BBB
|
||||
* Replace: DDDD
|
||||
* OUTPUT : AADDDDCCC
|
||||
* POSITION: ^
|
||||
*/
|
||||
/* Yes, we have enough space. Do a normal move operation. */
|
||||
char *moveFrom = ptr + searchLen;
|
||||
char *moveTo = ptr + replaceLen;
|
||||
|
||||
/* First move our old data out of the way. */
|
||||
size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
|
||||
memmove(moveTo, moveFrom, bytesToCopy);
|
||||
|
||||
/* Now do our replacement. */
|
||||
memcpy(ptr, replace, replaceLen);
|
||||
}
|
||||
}
|
||||
else if (replaceLen < searchLen)
|
||||
{
|
||||
/* EXAMPLE CASE:
|
||||
* Subject: AABBBCCC
|
||||
* Buffer : 12 bytes
|
||||
* Search : BBB
|
||||
* Replace: D
|
||||
* OUTPUT : AADCCC
|
||||
* POSITION: ^
|
||||
*/
|
||||
/* If the replacement does not grow the string length, we do not
|
||||
* need to do any fancy checking at all. Yay!
|
||||
*/
|
||||
char *moveFrom = ptr + searchLen; /* Start after the search pointer */
|
||||
char *moveTo = ptr + replaceLen; /* Copy to where the replacement ends */
|
||||
|
||||
/* Copy our replacement in, if any */
|
||||
if (replaceLen)
|
||||
{
|
||||
memcpy(ptr, replace, replaceLen);
|
||||
}
|
||||
|
||||
/* Figure out how many bytes to move down, including null terminator */
|
||||
size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
|
||||
|
||||
/* Move the rest of the string down */
|
||||
memmove(moveTo, moveFrom, bytesToCopy);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* EXAMPLE CASE:
|
||||
* Subject: AABBBCCC
|
||||
* Buffer : 12 bytes
|
||||
* Search : BBB
|
||||
* Replace: DDD
|
||||
* OUTPUT : AADDDCCC
|
||||
* POSITION: ^
|
||||
*/
|
||||
/* We don't have to move anything around, just do a straight copy */
|
||||
memcpy(ptr, replace, replaceLen);
|
||||
}
|
||||
|
||||
return ptr + replaceLen;
|
||||
}
|
||||
ptr++;
|
||||
browsed++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
8
dlls/regex/utils.h
Normal file
8
dlls/regex/utils.h
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef UTILS_H
|
||||
#define UTILS_H
|
||||
|
||||
int UTIL_CheckValidChar(char *c);
|
||||
char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive);
|
||||
unsigned int strncopy(char *dest, const char *src, size_t count);
|
||||
|
||||
#endif // UTILS_H
|
Reference in New Issue
Block a user