Regex: Add regex_replace native.
This commit is contained in:
parent
287f471ac4
commit
939a724b1a
|
@ -18,6 +18,7 @@ binary.sources = [
|
|||
'sdk/amxxmodule.cpp',
|
||||
'module.cpp',
|
||||
'CRegEx.cpp',
|
||||
'utils.cpp',
|
||||
]
|
||||
|
||||
AMXX.modules += [builder.Add(binary)]
|
||||
|
|
|
@ -30,10 +30,12 @@
|
|||
* you do not wish to do so, delete this exception statement from your
|
||||
* version.
|
||||
*/
|
||||
#include "amxxmodule.h"
|
||||
#include "pcre.h"
|
||||
#include "CRegEx.h"
|
||||
#include <string.h>
|
||||
#include "amxxmodule.h"
|
||||
#include <ctype.h>
|
||||
#include "utils.h"
|
||||
|
||||
RegEx::RegEx()
|
||||
{
|
||||
|
@ -43,6 +45,9 @@ RegEx::RegEx()
|
|||
mFree = true;
|
||||
subject = NULL;
|
||||
mSubStrings.clear();
|
||||
mMatchesSubs.clear();
|
||||
mSubsNameTable.clear();
|
||||
mNumSubpatterns = 0;
|
||||
}
|
||||
|
||||
void RegEx::Clear()
|
||||
|
@ -57,6 +62,9 @@ void RegEx::Clear()
|
|||
delete[] subject;
|
||||
subject = NULL;
|
||||
mSubStrings.clear();
|
||||
mMatchesSubs.clear();
|
||||
mSubsNameTable.clear();
|
||||
mNumSubpatterns = 0;
|
||||
}
|
||||
|
||||
RegEx::~RegEx()
|
||||
|
@ -143,6 +151,19 @@ int RegEx::Compile(const char *pattern, int iFlags)
|
|||
|
||||
mFree = false;
|
||||
|
||||
/**
|
||||
* Retrieve the number of captured groups
|
||||
* including the full match.
|
||||
*/
|
||||
pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &mNumSubpatterns);
|
||||
++mNumSubpatterns;
|
||||
|
||||
/**
|
||||
* Build the table with the named groups,
|
||||
* which contain an index and a name per group.
|
||||
*/
|
||||
MakeSubpatternsTable(mNumSubpatterns);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -153,13 +174,13 @@ int RegEx::Match(const char *str)
|
|||
if (mFree || re == NULL)
|
||||
return -1;
|
||||
|
||||
this->ClearMatch();
|
||||
ClearMatch();
|
||||
|
||||
//save str
|
||||
subject = new char[strlen(str) + 1];
|
||||
strcpy(subject, str);
|
||||
|
||||
rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30);
|
||||
rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, REGEX_MAX_SUBPATTERNS);
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
|
@ -188,61 +209,103 @@ int RegEx::Match(const char *str)
|
|||
|
||||
int RegEx::MatchAll(const char *str)
|
||||
{
|
||||
int rc = 0;
|
||||
int rr = 0;
|
||||
int offset = 0;
|
||||
int rc = 0;
|
||||
int startOffset = 0;
|
||||
int exoptions = 0;
|
||||
int notEmpty = 0;
|
||||
int sizeOffsets = mNumSubpatterns * 3;
|
||||
int subjectLen = strlen(str);
|
||||
|
||||
if (mFree || re == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
this->ClearMatch();
|
||||
ClearMatch();
|
||||
|
||||
//save str
|
||||
subject = new char[strlen(str) + 1];
|
||||
subject = new char[subjectLen + 1];
|
||||
strcpy(subject, str);
|
||||
|
||||
RegExSub sub, whole;
|
||||
while ((rr = pcre_exec(re, NULL, subject, (int)strlen(subject), offset, 0, ovector, 30)))
|
||||
RegExSub sub;
|
||||
|
||||
while (1)
|
||||
{
|
||||
if (rr < 0)
|
||||
rr = pcre_exec(re, NULL, subject, (int)subjectLen, startOffset, exoptions | notEmpty, ovector, REGEX_MAX_SUBPATTERNS);
|
||||
|
||||
/**
|
||||
* The string was already proved to be valid UTF-8
|
||||
*/
|
||||
exoptions |= PCRE_NO_UTF8_CHECK;
|
||||
|
||||
/**
|
||||
* Too many substrings
|
||||
*/
|
||||
if (rr == 0)
|
||||
{
|
||||
if (rr == PCRE_ERROR_NOMATCH)
|
||||
rr = sizeOffsets / 3;
|
||||
}
|
||||
|
||||
if (rr > 0)
|
||||
{
|
||||
mMatchesSubs.append(rr);
|
||||
|
||||
for (int s = 0; s < rr; ++s)
|
||||
{
|
||||
break;
|
||||
sub.start = ovector[2 * s];
|
||||
sub.end = ovector[2 * s + 1];
|
||||
|
||||
mSubStrings.append(sub);
|
||||
}
|
||||
}
|
||||
else if (rr == PCRE_ERROR_NOMATCH)
|
||||
{
|
||||
/**
|
||||
* If we previously set PCRE_NOTEMPTY after a null match,
|
||||
* this is not necessarily the end. We need to advance
|
||||
* the start offset, and continue. Fudge the offset values
|
||||
* to achieve this, unless we're already at the end of the string.
|
||||
*/
|
||||
if (notEmpty && startOffset < (int)subjectLen)
|
||||
{
|
||||
ovector[0] = startOffset;
|
||||
ovector[1] = startOffset + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
mErrorOffset = rr;
|
||||
|
||||
if (rc)
|
||||
this->ClearMatch();
|
||||
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rc += rr;
|
||||
mSubStrings.ensure(rc);
|
||||
|
||||
for (int s = 1; s < rr; ++s)
|
||||
else
|
||||
{
|
||||
sub.start = ovector[2 * s];
|
||||
sub.end = ovector[2 * s + 1];
|
||||
mSubStrings.append(sub);
|
||||
mErrorOffset = rr;
|
||||
|
||||
if (mMatchesSubs.length())
|
||||
{
|
||||
ClearMatch();
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = ovector[1];
|
||||
/**
|
||||
* If we have matched an empty string, mimic what Perl's /g options does.
|
||||
* This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
|
||||
* the match again at the same point. If this fails (picked up above) we
|
||||
* advance to the next character.
|
||||
*/
|
||||
notEmpty = (ovector[1] == ovector[0]) ? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
|
||||
|
||||
/**
|
||||
* Advance to the next piece.
|
||||
*/
|
||||
startOffset = ovector[1];
|
||||
}
|
||||
|
||||
if (!rc)
|
||||
if (!mMatchesSubs.length())
|
||||
{
|
||||
return 0;
|
||||
|
||||
sub = mSubStrings.at(0);
|
||||
whole.start = sub.start;
|
||||
sub = mSubStrings.back();
|
||||
whole.end = sub.end;
|
||||
|
||||
mSubStrings.insert(0, whole);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -256,18 +319,14 @@ void RegEx::ClearMatch()
|
|||
delete[] subject;
|
||||
subject = NULL;
|
||||
mSubStrings.clear();
|
||||
mMatchesSubs.clear();
|
||||
}
|
||||
|
||||
const char *RegEx::GetSubstring(int s, char buffer[], int max)
|
||||
const char *getSubstring(char *subject, size_t start, size_t end, char buffer[], size_t max, size_t *outlen)
|
||||
{
|
||||
int i = 0;
|
||||
if ((size_t)s >= mSubStrings.length() || s < 0)
|
||||
return NULL;
|
||||
|
||||
RegExSub sub = mSubStrings.at(s);
|
||||
|
||||
char *substr_a = subject + sub.start;
|
||||
int substr_l = sub.end - sub.start;
|
||||
size_t i;
|
||||
char * substr_a = subject + start;
|
||||
size_t substr_l = end - start;
|
||||
|
||||
for (i = 0; i < substr_l; i++)
|
||||
{
|
||||
|
@ -278,5 +337,516 @@ const char *RegEx::GetSubstring(int s, char buffer[], int max)
|
|||
|
||||
buffer[i] = '\0';
|
||||
|
||||
if (outlen)
|
||||
{
|
||||
*outlen = i;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
const char *RegEx::GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen)
|
||||
{
|
||||
if (start < 0 || start >= mSubStrings.length())
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
RegExSub sub = mSubStrings.at(start);
|
||||
|
||||
return getSubstring(subject, sub.start, sub.end, buffer, max, outlen);
|
||||
}
|
||||
|
||||
const char *RegEx::GetSubstring(size_t startOffset, size_t endOffset, char buffer[], size_t max, size_t *outlen)
|
||||
{
|
||||
if (startOffset < 0 || endOffset < 0)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return getSubstring(subject, startOffset, endOffset, buffer, max, outlen);
|
||||
}
|
||||
|
||||
void RegEx::MakeSubpatternsTable(int numSubpatterns)
|
||||
{
|
||||
int nameCount = 0;
|
||||
int rc = pcre_fullinfo(re, NULL, PCRE_INFO_NAMECOUNT, &nameCount);
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (nameCount > 0)
|
||||
{
|
||||
const char *nameTable;
|
||||
int nameSize = 0;
|
||||
int i = 0;
|
||||
|
||||
int rc1 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMETABLE, &nameTable);
|
||||
int rc2 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameSize);
|
||||
|
||||
rc = rc2 ? rc2 : rc1;
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
mSubsNameTable.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
NamedGroup data;
|
||||
|
||||
while (i++ < nameCount)
|
||||
{
|
||||
data.index = 0xff * (unsigned char)nameTable[0] + (unsigned char)nameTable[1];
|
||||
data.name = nameTable + 2;
|
||||
|
||||
mSubsNameTable.append(ke::Move(data));
|
||||
nameTable += nameSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int RegEx::Replace(char *text, size_t textMaxLen, const char *replace, size_t replaceLen, int flags)
|
||||
{
|
||||
char *output = text;
|
||||
|
||||
/**
|
||||
* Retrieve all matches and store them in
|
||||
* mSubStrings list.
|
||||
*/
|
||||
if (MatchAll(output) == -1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t subjectLen = strlen(subject);
|
||||
size_t total = 0;
|
||||
size_t baseIndex = 0;
|
||||
size_t diffLength = 0;
|
||||
|
||||
char *toReplace = new char[textMaxLen + 1];
|
||||
char *toSearch = NULL;
|
||||
|
||||
/**
|
||||
* All characters which is not matched are not copied when replacing matches.
|
||||
* Then original text (output buffer) should be considerated as empty.
|
||||
*/
|
||||
if (flags & REGEX_FORMAT_NOCOPY)
|
||||
{
|
||||
*output = '\0';
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* This is used only when we do replace matches.
|
||||
*/
|
||||
toSearch = new char[textMaxLen + 1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Loop over all matches found.
|
||||
*/
|
||||
for (size_t i = 0; i < mMatchesSubs.length(); ++i)
|
||||
{
|
||||
char *ptr = toReplace;
|
||||
|
||||
size_t browsed = 0;
|
||||
size_t searchLen = 0;
|
||||
size_t length = 0;
|
||||
|
||||
/**
|
||||
* Build the replace string as it can contain backreference
|
||||
* and this needs to be parsed.
|
||||
*/
|
||||
for (const char *s = replace, *end = s + replaceLen; s < end && browsed <= textMaxLen; ++s, ++browsed)
|
||||
{
|
||||
unsigned int c = *s;
|
||||
|
||||
/**
|
||||
* Supported format specifiers:
|
||||
*
|
||||
* $number : Substitutes the substring matched by group number.
|
||||
* n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
|
||||
* ${name} : Substitutes the substring matched by the named group name (a maximum of 32 characters).
|
||||
* $& : Substitutes a copy of the whole match.
|
||||
* $` : Substitutes all the text of the input string before the match.
|
||||
* $' : Substitutes all the text of the input string after the match.
|
||||
* $+ : Substitutes the last group that was captured.
|
||||
* $_ : Substitutes the entire input string.
|
||||
* $$ : Substitutes a literal "$".
|
||||
*/
|
||||
if (c == '$' || c == '\\')
|
||||
{
|
||||
switch (*++s)
|
||||
{
|
||||
case '\0':
|
||||
{
|
||||
/**
|
||||
* End of string.
|
||||
* Copy one character.
|
||||
*/
|
||||
*(ptr + browsed) = c;
|
||||
break;
|
||||
}
|
||||
case '&':
|
||||
{
|
||||
/**
|
||||
* Concatenate retrieved full match sub-string.
|
||||
* length - 1 to overwrite EOS.
|
||||
*/
|
||||
GetSubstring(baseIndex, ptr + browsed, textMaxLen, &length);
|
||||
browsed += length - 1;
|
||||
break;
|
||||
}
|
||||
case '`':
|
||||
{
|
||||
/**
|
||||
* Concatenate part of original text up to
|
||||
* first sub-string position.
|
||||
*/
|
||||
length = mSubStrings.at(baseIndex).start;
|
||||
memcpy(ptr + browsed, subject, length);
|
||||
browsed += length - 1;
|
||||
break;
|
||||
}
|
||||
case '\'':
|
||||
{
|
||||
/**
|
||||
* Concatenate part of original text from
|
||||
* last sub-string end position to EOS.
|
||||
*/
|
||||
length = mSubStrings.at(baseIndex).end;
|
||||
memcpy(ptr + browsed, subject + length, subjectLen - length);
|
||||
browsed += (subjectLen - length) - 1;
|
||||
break;
|
||||
}
|
||||
case '+':
|
||||
{
|
||||
/**
|
||||
* Copy the last group that was captured.
|
||||
*/
|
||||
GetSubstring(baseIndex + mMatchesSubs.at(i) - 1, ptr + browsed, textMaxLen, &length);
|
||||
browsed += length - 1;
|
||||
break;
|
||||
}
|
||||
case '_':
|
||||
{
|
||||
/**
|
||||
* Copy the entire input string.
|
||||
*/
|
||||
memcpy(ptr + browsed, subject, subjectLen);
|
||||
browsed += (subjectLen - 1);
|
||||
break;
|
||||
}
|
||||
case '$':
|
||||
case '\\':
|
||||
{
|
||||
/**
|
||||
* Copy the single character $ or \.
|
||||
*/
|
||||
*(ptr + browsed) = c;
|
||||
break;
|
||||
}
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
case '{':
|
||||
{
|
||||
/**
|
||||
* Checking backreference.
|
||||
* Which can be either $n, ${n} or ${name}.
|
||||
*/
|
||||
size_t backref = -1;
|
||||
const char *walk = s;
|
||||
bool inBrace = false;
|
||||
bool nameCheck = false;
|
||||
|
||||
/**
|
||||
* ${nn}.
|
||||
* ^
|
||||
*/
|
||||
if (*walk == '{')
|
||||
{
|
||||
inBrace = true;
|
||||
++walk;
|
||||
}
|
||||
|
||||
/**
|
||||
* Valid number.
|
||||
* $nn or ${nn}
|
||||
* ^ ^
|
||||
*/
|
||||
if (*walk >= '0' && *walk <= '9')
|
||||
{
|
||||
backref = *walk - '0';
|
||||
++walk;
|
||||
}
|
||||
else if (inBrace)
|
||||
{
|
||||
nameCheck = true;
|
||||
|
||||
/**
|
||||
* Not a valid number.
|
||||
* Checking as string.
|
||||
* ${name}
|
||||
* ^
|
||||
*/
|
||||
if (*walk)
|
||||
{
|
||||
const char *pch = strchr(walk, '}');
|
||||
|
||||
if (pch != NULL)
|
||||
{
|
||||
/**
|
||||
* A named group maximum character is 32 (PCRE).
|
||||
*/
|
||||
char name[32];
|
||||
size_t nameLength = strncopy(name, walk, pch - walk + 1);
|
||||
|
||||
int flags, num = 0;
|
||||
pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &flags);
|
||||
|
||||
/**
|
||||
* If PCRE_DUPNAMES is set, the pcre_copy_named_substring function should be used
|
||||
* as pcre_get_stringnumber output order is not defined.
|
||||
*/
|
||||
if (flags & PCRE_DUPNAMES)
|
||||
{
|
||||
memset(ovector, 0, REGEX_MAX_SUBPATTERNS);
|
||||
|
||||
/**
|
||||
* pcre_copy_named_substring needs a vector containing sub-patterns ranges
|
||||
* for a given match.
|
||||
*/
|
||||
for (size_t j = 0; j < mMatchesSubs.at(i); ++j)
|
||||
{
|
||||
ovector[2 * j] = mSubStrings.at(baseIndex + j).start;
|
||||
ovector[2 * j + 1] = mSubStrings.at(baseIndex + j).end;
|
||||
}
|
||||
|
||||
num = pcre_copy_named_substring(re, subject, ovector, mMatchesSubs.at(i), name, ptr + browsed, (int)textMaxLen);
|
||||
|
||||
if (num != PCRE_ERROR_NOSUBSTRING)
|
||||
{
|
||||
browsed += num - 1;
|
||||
s = pch;
|
||||
break;
|
||||
}
|
||||
++pch;
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* Retrieve sub-pattern index from a give name.
|
||||
*/
|
||||
num = pcre_get_stringnumber(re, name);
|
||||
if (num != PCRE_ERROR_NOSUBSTRING)
|
||||
{
|
||||
backref = num;
|
||||
walk = ++pch;
|
||||
}
|
||||
}
|
||||
|
||||
if (num == PCRE_ERROR_NOSUBSTRING || num >= (int)mMatchesSubs.at(i))
|
||||
{
|
||||
/**
|
||||
* If a sub-string for a given match is not found, or if > to
|
||||
* number of sub-patterns we still need to check if this
|
||||
* group name is a valid one because if so we want to escape it.
|
||||
* Looking at the name table.
|
||||
*/
|
||||
bool found = false;
|
||||
for (size_t i = 0; i < mSubsNameTable.length(); ++i)
|
||||
{
|
||||
if (!mSubsNameTable.at(i).name.compare(name))
|
||||
{
|
||||
--browsed;
|
||||
s = --pch;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!nameCheck)
|
||||
{
|
||||
/**
|
||||
* Valid second number.
|
||||
* $nn or ${nn}
|
||||
* ^ ^
|
||||
*/
|
||||
if (*walk && *walk >= '0' && *walk <= '9')
|
||||
{
|
||||
backref = backref * 10 + *walk - '0';
|
||||
++walk;
|
||||
}
|
||||
|
||||
if (inBrace)
|
||||
{
|
||||
/**
|
||||
* Invalid specifier
|
||||
* Either hit EOS or missing }.
|
||||
* ${n or ${nn or ${nx or ${nnx
|
||||
* ^ ^ ^ ^
|
||||
*/
|
||||
if (*walk == '\0' || *walk != '}')
|
||||
{
|
||||
backref = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
++walk;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
length = walk - s;
|
||||
s = --walk;
|
||||
|
||||
/**
|
||||
* We can't provide a capture number >= to total that pcre_exec has found.
|
||||
* 0 is implicitly accepted, same behavior as $&.
|
||||
*/
|
||||
if (backref >= 0 && (int)backref < mNumSubpatterns)
|
||||
{
|
||||
/**
|
||||
* Valid available index for a given match.
|
||||
*/
|
||||
if (backref < mMatchesSubs.at(i))
|
||||
{
|
||||
/**
|
||||
* Concatenate retrieved sub-string.
|
||||
* length - 1 to overwrite EOS.
|
||||
*/
|
||||
GetSubstring(baseIndex + backref, ptr + browsed, textMaxLen, &length);
|
||||
browsed += length - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* Valid unavailable index for a given match.
|
||||
*/
|
||||
--browsed;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* If we here it means the syntax is valid but sub-pattern doesn't exist.
|
||||
* So, copy as it is, including $.
|
||||
*/
|
||||
memcpy(ptr + browsed, s - length, length + 1);
|
||||
browsed += length;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
/**
|
||||
* Not a valid format modifier.
|
||||
* So we copy characters as it is.
|
||||
*/
|
||||
*(ptr + browsed) = *s;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* At this point, direct copy.
|
||||
*/
|
||||
*(ptr + browsed) = c;
|
||||
}
|
||||
}
|
||||
|
||||
*(ptr + browsed) = '\0';
|
||||
|
||||
/**
|
||||
* Concatenate only replace string of each match,
|
||||
* as we don't want to copy unmatched characters.
|
||||
*/
|
||||
if (flags & REGEX_FORMAT_NOCOPY)
|
||||
{
|
||||
/**
|
||||
* We want just the first occurrence.
|
||||
*/
|
||||
if (total++ && (flags & REGEX_FORMAT_FIRSTONLY))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
strncat(output, toReplace, textMaxLen + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* Retrieves full string of a given match.
|
||||
*/
|
||||
const char *search = GetSubstring(baseIndex, toSearch, textMaxLen, &searchLen);
|
||||
|
||||
/**
|
||||
* We get something to replace, but the sub-pattern to search is empty.
|
||||
* We insert replacement either a the start end or string.
|
||||
*/
|
||||
if (*toReplace && !searchLen)
|
||||
{
|
||||
if (output - text > 0)
|
||||
{
|
||||
strncat(output, toReplace, textMaxLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
strncat(toReplace, text, textMaxLen);
|
||||
strncopy(text, toReplace, strlen(toReplace) + 1);
|
||||
}
|
||||
|
||||
++total;
|
||||
}
|
||||
else if ((output = UTIL_ReplaceEx(text + mSubStrings.at(baseIndex).start + diffLength, textMaxLen, search, searchLen, toReplace, browsed, false)) != NULL)
|
||||
{
|
||||
/**
|
||||
* Then we simply do a replace.
|
||||
* Probably not the most efficient, but this should be at least safe.
|
||||
* To avoid issue where the function could find a string which is not at the expected index,
|
||||
* We force the input string to start from index of the full match.
|
||||
*/
|
||||
++total;
|
||||
}
|
||||
|
||||
if (total && (flags & REGEX_FORMAT_FIRSTONLY))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* mMatchesSubs is a flat list containing all sub-patterns of all matches.
|
||||
* A number of sub-patterns can vary per match. So we calculate the position in the list,
|
||||
* from where the first sub-pattern result of current match starts.
|
||||
*/
|
||||
baseIndex += mMatchesSubs.at(i);
|
||||
diffLength += browsed - searchLen;
|
||||
}
|
||||
|
||||
delete[] toReplace;
|
||||
|
||||
if (toSearch != NULL)
|
||||
{
|
||||
delete[] toSearch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of successful replacements.
|
||||
*/
|
||||
return total;
|
||||
}
|
||||
|
|
|
@ -34,6 +34,19 @@
|
|||
#define _INCLUDE_CREGEX_H
|
||||
|
||||
#include <am-vector.h>
|
||||
#include <am-string.h>
|
||||
|
||||
/**
|
||||
* Maximum number of sub-patterns, here 50 (this should be a multiple of 3).
|
||||
*/
|
||||
#define REGEX_MAX_SUBPATTERNS 150
|
||||
|
||||
/**
|
||||
* Flags to used with regex_replace, to control the replacement behavior.
|
||||
*/
|
||||
#define REGEX_FORMAT_DEFAULT 0 // Uses the standard formatting rules to replace matches.
|
||||
#define REGEX_FORMAT_NOCOPY 1 // The sections that do not match the regular expression are not copied when replacing matches.
|
||||
#define REGEX_FORMAT_FIRSTONLY 2 // Only the first occurrence of a regular expression is replaced.
|
||||
|
||||
class RegEx
|
||||
{
|
||||
|
@ -42,6 +55,11 @@ public:
|
|||
int start, end;
|
||||
};
|
||||
|
||||
struct NamedGroup {
|
||||
ke::AString name;
|
||||
size_t index;
|
||||
};
|
||||
|
||||
RegEx();
|
||||
~RegEx();
|
||||
|
||||
|
@ -52,8 +70,11 @@ public:
|
|||
int Compile(const char *pattern, int iFlags);
|
||||
int Match(const char *str);
|
||||
int MatchAll(const char *str);
|
||||
int Replace(char *text, size_t text_maxlen, const char *replace, size_t replaceLen, int flags = 0);
|
||||
void ClearMatch();
|
||||
const char *GetSubstring(int s, char buffer[], int max);
|
||||
const char *GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen = NULL);
|
||||
const char *GetSubstring(size_t start, size_t end, char buffer[], size_t max, size_t *outlen = NULL);
|
||||
void MakeSubpatternsTable(int numSubpatterns);
|
||||
|
||||
public:
|
||||
int mErrorOffset;
|
||||
|
@ -63,9 +84,12 @@ public:
|
|||
private:
|
||||
pcre *re;
|
||||
bool mFree;
|
||||
int ovector[30];
|
||||
int ovector[REGEX_MAX_SUBPATTERNS];
|
||||
char *subject;
|
||||
ke::Vector<RegExSub> mSubStrings;
|
||||
ke::Vector<size_t> mMatchesSubs;
|
||||
ke::Vector<NamedGroup> mSubsNameTable;
|
||||
int mNumSubpatterns;
|
||||
};
|
||||
|
||||
#endif //_INCLUDE_CREGEX_H
|
||||
|
|
|
@ -153,6 +153,8 @@ cell match(AMX *amx, cell *params, bool all)
|
|||
else
|
||||
{
|
||||
*errorCode = x->Count();
|
||||
if (all)
|
||||
return x->Count();
|
||||
}
|
||||
|
||||
return id + 1;
|
||||
|
@ -272,6 +274,43 @@ static cell AMX_NATIVE_CALL regex_free(AMX *amx, cell *params)
|
|||
return 1;
|
||||
}
|
||||
|
||||
//native regex_replace(Regex:pattern, string[], maxLen, const replace[], flags = REGEX_FORMAT_DEFAULT, &errcode = 0);
|
||||
static cell AMX_NATIVE_CALL regex_replace(AMX *amx, cell *params)
|
||||
{
|
||||
int id = params[1] - 1;
|
||||
if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
|
||||
{
|
||||
MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int textLen, replaceLen;
|
||||
char *text = MF_GetAmxString(amx, params[2], 0, &textLen);
|
||||
const char *replace = MF_GetAmxString(amx, params[4], 1, &replaceLen);
|
||||
|
||||
cell *erroCode = MF_GetAmxAddr(amx, params[6]);
|
||||
|
||||
RegEx *x = PEL[id];
|
||||
int e = x->Replace(text, params[3] + 1, replace, replaceLen, params[5]);
|
||||
|
||||
if (e == -1)
|
||||
{
|
||||
*erroCode = x->mErrorOffset;
|
||||
x->ClearMatch();
|
||||
return -2;
|
||||
}
|
||||
else if (e == 0)
|
||||
{
|
||||
*erroCode = 0;
|
||||
x->ClearMatch();
|
||||
return 0;
|
||||
}
|
||||
|
||||
MF_SetAmxString(amx, params[2], text, params[3]);
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
AMX_NATIVE_INFO regex_Natives[] = {
|
||||
{"regex_compile", regex_compile},
|
||||
{"regex_compile_ex", regex_compile_ex},
|
||||
|
@ -280,6 +319,7 @@ AMX_NATIVE_INFO regex_Natives[] = {
|
|||
{"regex_match_all", regex_match_all},
|
||||
{"regex_match_all_c", regex_match_all_c},
|
||||
{"regex_substr", regex_substr},
|
||||
{"regex_replace", regex_replace},
|
||||
{"regex_free", regex_free},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
|
|
@ -99,6 +99,7 @@
|
|||
<ClCompile Include="..\CRegEx.cpp" />
|
||||
<ClCompile Include="..\module.cpp" />
|
||||
<ClCompile Include="..\sdk\amxxmodule.cpp" />
|
||||
<ClCompile Include="..\utils.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\CRegEx.h" />
|
||||
|
@ -107,6 +108,7 @@
|
|||
<ClInclude Include="..\sdk\moduleconfig.h" />
|
||||
<ClInclude Include="..\sdk\CVector.h" />
|
||||
<ClInclude Include="..\sdk\amxxmodule.h" />
|
||||
<ClInclude Include="..\utils.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\..\..\plugins\include\regex.inc" />
|
||||
|
|
|
@ -32,6 +32,9 @@
|
|||
<ClCompile Include="..\sdk\amxxmodule.cpp">
|
||||
<Filter>Module SDK\SDK Base</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\utils.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\CRegEx.h">
|
||||
|
@ -52,6 +55,9 @@
|
|||
<ClInclude Include="..\sdk\amxxmodule.h">
|
||||
<Filter>Module SDK\SDK Base</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\utils.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\..\..\plugins\include\regex.inc">
|
||||
|
|
236
dlls/regex/utils.cpp
Normal file
236
dlls/regex/utils.cpp
Normal file
|
@ -0,0 +1,236 @@
|
|||
|
||||
#include "amxxmodule.h"
|
||||
#include <string.h>
|
||||
#include "utils.h"
|
||||
|
||||
int UTIL_CheckValidChar(char *c)
|
||||
{
|
||||
int count;
|
||||
int bytecount = 0;
|
||||
|
||||
for (count = 1; (*c & 0xC0) == 0x80; count++)
|
||||
{
|
||||
c--;
|
||||
}
|
||||
|
||||
switch (*c & 0xF0)
|
||||
{
|
||||
case 0xC0:
|
||||
case 0xD0:
|
||||
{
|
||||
bytecount = 2;
|
||||
break;
|
||||
}
|
||||
case 0xE0:
|
||||
{
|
||||
bytecount = 3;
|
||||
break;
|
||||
}
|
||||
case 0xF0:
|
||||
{
|
||||
bytecount = 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (bytecount != count)
|
||||
{
|
||||
return count;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned int strncopy(char *dest, const char *src, size_t count)
|
||||
{
|
||||
if (!count)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *start = dest;
|
||||
while ((*src) && (--count))
|
||||
{
|
||||
*dest++ = *src++;
|
||||
}
|
||||
*dest = '\0';
|
||||
|
||||
return (dest - start);
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE: Do not edit this for the love of god unless you have
|
||||
* read the test cases and understand the code behind each one.
|
||||
* While I don't guarantee there aren't mistakes, I do guarantee
|
||||
* that plugins will end up relying on tiny idiosyncrasies of this
|
||||
* function, just like they did with AMX Mod X.
|
||||
*
|
||||
* There are explicitly more cases than the AMX Mod X version because
|
||||
* we're not doing a blind copy. Each case is specifically optimized
|
||||
* for what needs to be done. Even better, we don't have to error on
|
||||
* bad buffer sizes. Instead, this function will smartly cut off the
|
||||
* string in a way that pushes old data out.
|
||||
*/
|
||||
char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive)
|
||||
{
|
||||
char *ptr = subject;
|
||||
size_t browsed = 0;
|
||||
size_t textLen = strlen(subject);
|
||||
|
||||
/* It's not possible to search or replace */
|
||||
if (searchLen > textLen)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Handle the case of one byte replacement.
|
||||
* It's only valid in one case.
|
||||
*/
|
||||
if (maxLen == 1)
|
||||
{
|
||||
/* If the search matches and the replace length is 0,
|
||||
* we can just terminate the string and be done.
|
||||
*/
|
||||
if ((caseSensitive ? strcmp(subject, search) : stricmp(subject, search)) == 0 && replaceLen == 0)
|
||||
{
|
||||
*subject = '\0';
|
||||
return subject;
|
||||
}
|
||||
else
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Subtract one off the maxlength so we can include the null terminator */
|
||||
maxLen--;
|
||||
|
||||
while (*ptr != '\0' && (browsed <= textLen - searchLen))
|
||||
{
|
||||
/* See if we get a comparison */
|
||||
if ((caseSensitive ? strncmp(ptr, search, searchLen) : strnicmp(ptr, search, searchLen)) == 0)
|
||||
{
|
||||
if (replaceLen > searchLen)
|
||||
{
|
||||
/* First, see if we have enough space to do this operation */
|
||||
if (maxLen - textLen < replaceLen - searchLen)
|
||||
{
|
||||
/* First, see if the replacement length goes out of bounds. */
|
||||
if (browsed + replaceLen >= maxLen)
|
||||
{
|
||||
/* EXAMPLE CASE:
|
||||
* Subject: AABBBCCC
|
||||
* Buffer : 12 bytes
|
||||
* Search : BBB
|
||||
* Replace: DDDDDDDDDD
|
||||
* OUTPUT : AADDDDDDDDD
|
||||
* POSITION: ^
|
||||
*/
|
||||
/* If it does, we'll just bound the length and do a strcpy. */
|
||||
replaceLen = maxLen - browsed;
|
||||
|
||||
/* Note, we add one to the final result for the null terminator */
|
||||
strncopy(ptr, replace, replaceLen + 1);
|
||||
|
||||
/* Don't truncate a multi-byte character */
|
||||
if (*(ptr + replaceLen - 1) & 1 << 7)
|
||||
{
|
||||
replaceLen -= UTIL_CheckValidChar(ptr + replaceLen - 1);
|
||||
*(ptr + replaceLen) = '\0';
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* EXAMPLE CASE:
|
||||
* Subject: AABBBCCC
|
||||
* Buffer : 12 bytes
|
||||
* Search : BBB
|
||||
* Replace: DDDDDDD
|
||||
* OUTPUT : AADDDDDDDCC
|
||||
* POSITION: ^
|
||||
*/
|
||||
/* We're going to have some bytes left over... */
|
||||
size_t origBytesToCopy = (textLen - (browsed + searchLen)) + 1;
|
||||
size_t realBytesToCopy = (maxLen - (browsed + replaceLen)) + 1;
|
||||
char *moveFrom = ptr + searchLen + (origBytesToCopy - realBytesToCopy);
|
||||
char *moveTo = ptr + replaceLen;
|
||||
|
||||
/* First, move our old data out of the way. */
|
||||
memmove(moveTo, moveFrom, realBytesToCopy);
|
||||
|
||||
/* Now, do our replacement. */
|
||||
memcpy(ptr, replace, replaceLen);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* EXAMPLE CASE:
|
||||
* Subject: AABBBCCC
|
||||
* Buffer : 12 bytes
|
||||
* Search : BBB
|
||||
* Replace: DDDD
|
||||
* OUTPUT : AADDDDCCC
|
||||
* POSITION: ^
|
||||
*/
|
||||
/* Yes, we have enough space. Do a normal move operation. */
|
||||
char *moveFrom = ptr + searchLen;
|
||||
char *moveTo = ptr + replaceLen;
|
||||
|
||||
/* First move our old data out of the way. */
|
||||
size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
|
||||
memmove(moveTo, moveFrom, bytesToCopy);
|
||||
|
||||
/* Now do our replacement. */
|
||||
memcpy(ptr, replace, replaceLen);
|
||||
}
|
||||
}
|
||||
else if (replaceLen < searchLen)
|
||||
{
|
||||
/* EXAMPLE CASE:
|
||||
* Subject: AABBBCCC
|
||||
* Buffer : 12 bytes
|
||||
* Search : BBB
|
||||
* Replace: D
|
||||
* OUTPUT : AADCCC
|
||||
* POSITION: ^
|
||||
*/
|
||||
/* If the replacement does not grow the string length, we do not
|
||||
* need to do any fancy checking at all. Yay!
|
||||
*/
|
||||
char *moveFrom = ptr + searchLen; /* Start after the search pointer */
|
||||
char *moveTo = ptr + replaceLen; /* Copy to where the replacement ends */
|
||||
|
||||
/* Copy our replacement in, if any */
|
||||
if (replaceLen)
|
||||
{
|
||||
memcpy(ptr, replace, replaceLen);
|
||||
}
|
||||
|
||||
/* Figure out how many bytes to move down, including null terminator */
|
||||
size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
|
||||
|
||||
/* Move the rest of the string down */
|
||||
memmove(moveTo, moveFrom, bytesToCopy);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* EXAMPLE CASE:
|
||||
* Subject: AABBBCCC
|
||||
* Buffer : 12 bytes
|
||||
* Search : BBB
|
||||
* Replace: DDD
|
||||
* OUTPUT : AADDDCCC
|
||||
* POSITION: ^
|
||||
*/
|
||||
/* We don't have to move anything around, just do a straight copy */
|
||||
memcpy(ptr, replace, replaceLen);
|
||||
}
|
||||
|
||||
return ptr + replaceLen;
|
||||
}
|
||||
ptr++;
|
||||
browsed++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
8
dlls/regex/utils.h
Normal file
8
dlls/regex/utils.h
Normal file
|
@ -0,0 +1,8 @@
|
|||
#ifndef UTILS_H
|
||||
#define UTILS_H
|
||||
|
||||
int UTIL_CheckValidChar(char *c);
|
||||
char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive);
|
||||
unsigned int strncopy(char *dest, const char *src, size_t count);
|
||||
|
||||
#endif // UTILS_H
|
|
@ -44,10 +44,10 @@
|
|||
|
||||
enum Regex
|
||||
{
|
||||
REGEX_MATCH_FAIL = -2,
|
||||
REGEX_PATTERN_FAIL,
|
||||
REGEX_NO_MATCH,
|
||||
REGEX_OK
|
||||
REGEX_MATCH_FAIL = -2,
|
||||
REGEX_PATTERN_FAIL = -1,
|
||||
REGEX_NO_MATCH = 0,
|
||||
REGEX_OK = 1
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -231,8 +231,7 @@ native regex_free(&Regex:id);
|
|||
* @note Use this if you intend on using the ame expression multiple times.
|
||||
* Pass the regex handle returned here to regex_match_ex() to check for matches.
|
||||
*
|
||||
* @note Unlike regex_compile(), this allows you to use directly PCRE flags, and
|
||||
* to get a more complete set of regular expression error codes.
|
||||
* @note Unlike regex_compile(), this allows you to use directly PCRE flags.
|
||||
*
|
||||
* @param pattern The regular expression pattern.
|
||||
* @param flags General flags for the regular expression, see PCRE_* defines.
|
||||
|
@ -306,6 +305,7 @@ native Regex:regex_match_all(const string[], const pattern[], flags = 0, error[]
|
|||
* @param flags General flags for the regular expression.
|
||||
* @param error Error message, if applicable.
|
||||
* @param maxLen Maximum length of the error buffer.
|
||||
* @param errcode Regex type error code encountered, if applicable. See REGEX_ERROR_* defines.
|
||||
*
|
||||
* @return -2 = Matching error (error code is stored in ret)
|
||||
* -1 = Pattern error (error code is stored in ret)
|
||||
|
@ -327,3 +327,40 @@ stock regex_match_simple(const str[], const pattern[], flags = 0, error[]= "", m
|
|||
|
||||
return substrings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Flags to used with regex_replace, to control the replacement behavior.
|
||||
*/
|
||||
#define REGEX_FORMAT_DEFAULT 0 /* Uses the standard formatting rules to replace matches */
|
||||
#define REGEX_FORMAT_NOCOPY (1<<0) /* The sectionsthat do not match the regular expression are not copied when replacing matches. */
|
||||
#define REGEX_FORMAT_FIRSTONLY (1<<1) /* Only the first occurrence of a regular expression is replaced. */
|
||||
|
||||
/**
|
||||
* Perform a regular expression search and replace.
|
||||
*
|
||||
* An optional parameter, flags, allows to specify options on how format the expression.
|
||||
* Supported format specifiers for replace parameter:
|
||||
* $number : Substitutes the substring matched by group number.
|
||||
* n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
|
||||
* ${name} : Substitutes the substring matched by the named group name (a maximum of 32 characters).
|
||||
* $& : Substitutes a copy of the whole match.
|
||||
* $` : Substitutes all the text of the input string before the match.
|
||||
* $' : Substitutes all the text of the input string after the match.
|
||||
* $+ : Substitutes the last group that was captured.
|
||||
* $_ : Substitutes the entire input string.
|
||||
* $$ : Substitutes a literal "$".
|
||||
* As note, the character \ can be also used with format specifier, this is same hehavior as $.
|
||||
*
|
||||
* @param pattern The regular expression pattern.
|
||||
* @param string The string to check.
|
||||
* @param error Error message, if applicable.
|
||||
* @param maxLen Maximum length of the error buffer.
|
||||
* @param replace The string will be used to replace any matches. See above for format specifiers.
|
||||
* @param flags General flags to control how is replaced the string. See REGEX_FORMAT_* defines.
|
||||
* @param errcode Regex type error code encountered, if applicable. See REGEX_ERROR_* defines.
|
||||
*
|
||||
* @return -2 = Matching error (error code is stored in ret)
|
||||
* 0 = No match.
|
||||
* >1 = Number of matches.
|
||||
*/
|
||||
native regex_replace(Regex:pattern, string[], maxLen, const replace[], flags = REGEX_FORMAT_DEFAULT, &errcode = 0);
|
451
plugins/testsuite/regex_test.sma
Normal file
451
plugins/testsuite/regex_test.sma
Normal file
|
@ -0,0 +1,451 @@
|
|||
#include <amxmodx>
|
||||
#include <regex>
|
||||
|
||||
/**
|
||||
* Warning: To get expected result, file encoding must be UTF-8 without BOM.
|
||||
*/
|
||||
|
||||
public plugin_init()
|
||||
{
|
||||
register_plugin("UTF-8 Test", AMXX_VERSION_STR, "AMXX Dev Team");
|
||||
register_srvcmd("regex_test", "OnServerCommand");
|
||||
}
|
||||
|
||||
new FailedCount;
|
||||
new PassedCount;
|
||||
|
||||
test(const regex[], const replace[], const string[], const expectedString[], expectedCount = -1, regexFlags = 0, formatFlags = 0, bufferlen = -1)
|
||||
{
|
||||
new errorCode, error[128];
|
||||
new Regex:r = regex_compile_ex(regex, regexFlags, error, charsmax(error), errorCode);
|
||||
|
||||
if (r == REGEX_PATTERN_FAIL || errorCode)
|
||||
{
|
||||
server_print("^t^t#%d. Pattern fail : ^"%s^"(%d)", ++FailedCount + PassedCount, error, errorCode);
|
||||
}
|
||||
else
|
||||
{
|
||||
new buffer[512];
|
||||
copy(buffer, charsmax(buffer), string);
|
||||
|
||||
new errorCode;
|
||||
new count = regex_replace(r, buffer, bufferlen != -1 ? bufferlen : charsmax(buffer), replace, formatFlags, errorCode);
|
||||
|
||||
if (expectedCount != -1 && count != expectedCount)
|
||||
{
|
||||
server_print("^t^t#%d. Failed - count = %d, expected count = %d", ++FailedCount + PassedCount, count, expectedCount);
|
||||
}
|
||||
else if (!equal(buffer, expectedString))
|
||||
{
|
||||
server_print("^t^t#%d. Failed - output = %s, expected output = %s", ++FailedCount + PassedCount, buffer, expectedString);
|
||||
}
|
||||
else
|
||||
{
|
||||
++PassedCount;
|
||||
}
|
||||
|
||||
regex_free(r);
|
||||
}
|
||||
}
|
||||
end()
|
||||
{
|
||||
server_print("Tests successful: %d/%d", PassedCount, PassedCount + FailedCount);
|
||||
}
|
||||
|
||||
public OnServerCommand()
|
||||
{
|
||||
server_print("Testing regex_replace()");
|
||||
|
||||
server_print("^tChecking count...");
|
||||
{
|
||||
test( .regex = "(([0-9a-z]+)-([0-9]+))-(([0-9]+)-([0-9]+))",
|
||||
.replace = "xxxx",
|
||||
.string = "1-2-3-4 a-2-3-4 1-a-3-4 1-2-a-4 1-2-3-a a-a-a-a 4-3-2-1 100-200-300-400-500-600-700-800",
|
||||
.expectedString = "xxxx xxxx 1-a-3-4 1-2-a-4 1-2-3-a a-a-a-a xxxx xxxx-xxxx",
|
||||
.expectedCount = 5
|
||||
);
|
||||
|
||||
test( .regex = "([a-z]+)",
|
||||
.replace = "xxxx",
|
||||
.string = "Here must only number like 42 and 13 appear",
|
||||
.expectedString = "Hxxxx xxxx xxxx xxxx xxxx 42 xxxx 13 xxxx",
|
||||
.expectedCount = 7
|
||||
);
|
||||
|
||||
test( .regex = "((V(I|1)(4|A)GR(4|A))|(V(I|1)C(0|O)D(I|1)(N|\/\\\/)))", .regexFlags = PCRE_CASELESS,
|
||||
.replace = "...",
|
||||
.string = "Viagra V14GR4 Vicodin V1C0D1/\/ v1c0d1/|/",
|
||||
.expectedString = "... ... ... ... v1c0d1/|/",
|
||||
.expectedCount = 4
|
||||
);
|
||||
|
||||
test( .regex = "\[(right)\](((?R)|[^^[]+?|\[)*)\[/\\1\]", .regexFlags = PCRE_CASELESS | PCRE_UNGREEDY,
|
||||
.replace = "",
|
||||
.string = "[CODE]<td align="$stylevar[right]">[/CODE]",
|
||||
.expectedString = "[CODE]<td align="$stylevar[right]">[/CODE]",
|
||||
.expectedCount = 0
|
||||
);
|
||||
|
||||
test( .regex = "- This is a string$",
|
||||
.replace = "This shouldn\'t work",
|
||||
.string = "123456789 - Hello, world - This is a string.",
|
||||
.expectedString = "123456789 - Hello, world - This is a string.",
|
||||
.expectedCount = 0
|
||||
);
|
||||
|
||||
test( .regex = "[0-35-9]",
|
||||
.replace = "4",
|
||||
.string = "123456789 - Hello, world - This is a string.",
|
||||
.expectedString = "444444444 - Hello, world - This is a string.",
|
||||
.expectedCount = 8
|
||||
);
|
||||
|
||||
test( .regex = "\b[hH]\w{2,4}",
|
||||
.replace = "Bonjour",
|
||||
.string = "123456789 - Hello, world - This is a string.",
|
||||
.expectedString = "123456789 - Bonjour, world - This is a string.",
|
||||
.expectedCount = 1
|
||||
);
|
||||
|
||||
test( .regex = "(\w)\s*-\s*(\w)",
|
||||
.replace = "$1. $2",
|
||||
.string = "123456789 - Hello, world - This is a string.",
|
||||
.expectedString = "123456789. Hello, world. This is a string.",
|
||||
.expectedCount = 2
|
||||
);
|
||||
|
||||
test( .regex = "([a-z]\w+)@(\w+)\.(\w+)\.([a-z]{2,})",
|
||||
.replace = "$1 at $2 dot $3 dot $4",
|
||||
.string = "josmessa@uk.ibm.com",
|
||||
.expectedString = "josmessa at uk dot ibm dot com",
|
||||
.expectedCount = 1
|
||||
);
|
||||
|
||||
test( .regex = "\b\w{1}s",
|
||||
.replace = "test",
|
||||
.string = "This is a string. (0-9) as well as parentheses",
|
||||
.expectedString = "This test a string. (0-9) test well test parentheses",
|
||||
.expectedCount = 3
|
||||
);
|
||||
|
||||
|
||||
test( .regex = "(\d{1})-(\d{1})",
|
||||
.replace = "$1 to $2",
|
||||
.string = "This is a string. It contains numbers (0-9) as well as parentheses and some other things!",
|
||||
.expectedString = "This is a string. It contains numbers (0 to 9) as well as parentheses and some other things!",
|
||||
.expectedCount = 1
|
||||
);
|
||||
|
||||
test( .regex = "[\(!\)]",
|
||||
.replace = "*",
|
||||
.string = "This is a string. It contains numbers (0-9) as well as parentheses and some other things!",
|
||||
.expectedString = "This is a string. It contains numbers *0-9* as well as parentheses and some other things*",
|
||||
.expectedCount = 3
|
||||
);
|
||||
}
|
||||
|
||||
server_print("^tChecking edges cases...");
|
||||
{
|
||||
test(.regex = "[0-9]+", .replace = "*", .string = "", .expectedString = "", .expectedCount = 0);
|
||||
test(.regex = "([0-9]+)", .replace = "", .string = "123", .expectedString = "", .expectedCount = 1);
|
||||
test(.regex = "a", .replace = "\", .string = "a", .expectedString = "\", .expectedCount = 1);
|
||||
test(.regex = "^^", .replace = "x", .string = "a", .expectedString = "xa", .expectedCount = 1);
|
||||
test(.regex = "b", .replace = "\", .string = "b", .expectedString = "\", .expectedCount = 1, .bufferlen = 1);
|
||||
test(.regex = "b", .replace = "^^", .string = "b", .expectedString = "b", .expectedCount = 0, .bufferlen = 0);
|
||||
test(.regex = "\w+", .replace = "123", .string = "abc", .expectedString = "12", .expectedCount = 1, .bufferlen = 2);
|
||||
}
|
||||
|
||||
server_print("^tChecking UTF-8 support...");
|
||||
{
|
||||
test(.regex = "(\w+)", .replace = "*", .string = "éà@É", .expectedString = "éà@É", .expectedCount = 0);
|
||||
test(.regex = "(\w+)", .replace = "*", .string = "éà@É", .expectedString = "*@*", .expectedCount = 2, .regexFlags = PCRE_UCP | PCRE_UTF8);
|
||||
test(.regex = "(\w+)", .replace = "字", .string = "éà@É", .expectedString = "字@字",.expectedCount = 2, .regexFlags = PCRE_UCP | PCRE_UTF8);
|
||||
test(.regex = "(\w+)", .replace = "字", .string = "éà@É", .expectedString = "字", .expectedCount = 2, .regexFlags = PCRE_UCP | PCRE_UTF8, .bufferlen = 3);
|
||||
}
|
||||
|
||||
server_print("^tChecking substitutions...");
|
||||
{
|
||||
test(.regex = "x", .replace = "y", .string = "text", .expectedString = "teyt" );
|
||||
test(.regex = "x", .replace = "$", .string = "text", .expectedString = "te$t" );
|
||||
test(.regex = "x", .replace = "$1", .string = "text", .expectedString = "te$1t" );
|
||||
test(.regex = "x", .replace = "${1", .string = "text", .expectedString = "te${1t" );
|
||||
test(.regex = "x", .replace = "${", .string = "text", .expectedString = "te${t" );
|
||||
test(.regex = "x", .replace = "${$0", .string = "text", .expectedString = "te${xt" );
|
||||
test(.regex = "x", .replace = "${1}", .string = "text", .expectedString = "te${1}t" );
|
||||
test(.regex = "x", .replace = "${1}", .string = "text", .expectedString = "te${1}t" );
|
||||
test(.regex = "x", .replace = "$5", .string = "text", .expectedString = "te$5t" );
|
||||
test(.regex = "x", .replace = "$5", .string = "te(x)t", .expectedString = "te($5)t" );
|
||||
test(.regex = "x", .replace = "${foo", .string = "text", .expectedString = "te${foot" );
|
||||
test(.regex = "(x)", .replace = "$5", .string = "text", .expectedString = "te$5t" );
|
||||
test(.regex = "(x)", .replace = "$1", .string = "text", .expectedString = "text" );
|
||||
test(.regex = "e(x)", .replace = "$1", .string = "text", .expectedString = "txt" );
|
||||
test(.regex = "e(x)", .replace = "$5", .string = "text", .expectedString = "t$5t" );
|
||||
test(.regex = "e(x)", .replace = "$4", .string = "text", .expectedString = "t$4t" );
|
||||
test(.regex = "e(x)", .replace = "$3", .string = "text", .expectedString = "t$3t" );
|
||||
test(.regex = "e(x)", .replace = "${1}", .string = "text", .expectedString = "txt" );
|
||||
test(.regex = "e(x)", .replace = "${3}", .string = "text", .expectedString = "t${3}t" );
|
||||
test(.regex = "e(x)", .replace = "${1}${3}", .string = "text", .expectedString = "tx${3}t" );
|
||||
test(.regex = "e(x)", .replace = "${1}${name}", .string = "text", .expectedString = "tx${name}t");
|
||||
test(.regex = "e(?<foo>x)", .replace = "${1}${name}", .string = "text", .expectedString = "tx${name}t");
|
||||
test(.regex = "e(?<foo>x)", .replace = "${1}${foo}", .string = "text", .expectedString = "txxt" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "${goll}${foo}", .string = "text", .expectedString = "t${goll}xt");
|
||||
test(.regex = "e(?<foo>x)", .replace = "${goll${foo}", .string = "text", .expectedString = "t${gollxt" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "${goll${foo}}", .string = "text", .expectedString = "t${gollx}t");
|
||||
test(.regex = "e(?<foo>x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "${${foo}}", .string = "text", .expectedString = "t${x}t" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "$${bfoo}}", .string = "text", .expectedString = "t${bfoo}}t");
|
||||
test(.regex = "e(?<foo>x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "$${foo}", .string = "text", .expectedString = "t${foo}t" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "$$", .string = "text", .expectedString = "t$t" );
|
||||
test(.regex = "(e)(?<foo>x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "txext" );
|
||||
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "teext" );
|
||||
test(.regex = "(e)(?<foo>x)", .replace = "${foo}$1$2$+", .string = "text", .expectedString = "txexxt" );
|
||||
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$+", .string = "text", .expectedString = "teexxt" );
|
||||
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$_", .string = "texts", .expectedString = "teextextsts");
|
||||
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$`", .string = "texts", .expectedString = "teextts" ),
|
||||
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$'", .string = "texts", .expectedString = "teextsts" ),
|
||||
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$&", .string = "texts", .expectedString = "teexexts" ),
|
||||
test(.regex = "x", .replace = "y", .string = "text", .expectedString = "teyt" );
|
||||
test(.regex = "x", .replace = "$", .string = "text", .expectedString = "te$t" );
|
||||
test(.regex = "x", .replace = "$1", .string = "text", .expectedString = "te$1t" );
|
||||
test(.regex = "x", .replace = "${1}", .string = "text", .expectedString = "te${1}t" );
|
||||
test(.regex = "x", .replace = "$5", .string = "text", .expectedString = "te$5t" );
|
||||
test(.regex = "x", .replace = "$5", .string = "te(x)t", .expectedString = "te($5)t" );
|
||||
test(.regex = "x", .replace = "${foo", .string = "text", .expectedString = "te${foot" );
|
||||
test(.regex = "(x)", .replace = "$5", .string = "text", .expectedString = "te$5t" );
|
||||
test(.regex = "(x)", .replace = "$1", .string = "text", .expectedString = "text" );
|
||||
test(.regex = "e(x)", .replace = "$1", .string = "text", .expectedString = "txt" );
|
||||
test(.regex = "e(x)", .replace = "$5", .string = "text", .expectedString = "t$5t" );
|
||||
test(.regex = "e(x)", .replace = "$4", .string = "text", .expectedString = "t$4t" );
|
||||
test(.regex = "e(x)", .replace = "$3", .string = "text", .expectedString = "t$3t" );
|
||||
test(.regex = "e(x)", .replace = "${1}", .string = "text", .expectedString = "txt" );
|
||||
test(.regex = "e(x)", .replace = "${3}", .string = "text", .expectedString = "t${3}t" );
|
||||
test(.regex = "e(x)", .replace = "${1}${3}", .string = "text", .expectedString = "tx${3}t" );
|
||||
test(.regex = "e(x)", .replace = "${1}${name}", .string = "text", .expectedString = "tx${name}t");
|
||||
test(.regex = "e(?<foo>x)", .replace = "${1}${name}", .string = "text", .expectedString = "tx${name}t");
|
||||
test(.regex = "e(?<foo>x)", .replace = "${1}${foo}", .string = "text", .expectedString = "txxt" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "${goll}${foo}", .string = "text", .expectedString = "t${goll}xt");
|
||||
test(.regex = "e(?<foo>x)", .replace = "${goll${foo}", .string = "text", .expectedString = "t${gollxt" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "${goll${foo}}", .string = "text", .expectedString = "t${gollx}t");
|
||||
test(.regex = "e(?<foo>x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "${${foo}}", .string = "text", .expectedString = "t${x}t" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "$${bfoo}}", .string = "text", .expectedString = "t${bfoo}}t");
|
||||
test(.regex = "e(?<foo>x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "$${foo}", .string = "text", .expectedString = "t${foo}t" );
|
||||
test(.regex = "e(?<foo>x)", .replace = "$$", .string = "text", .expectedString = "t$t" );
|
||||
test(.regex = "(e)(?<foo>x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "txext" );
|
||||
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "teext" );
|
||||
test(.regex = "(e)(?<foo>x)", .replace = "${foo}$1$2$+", .string = "text", .expectedString = "txexxt" );
|
||||
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$+", .string = "text", .expectedString = "teexxt" );
|
||||
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$_", .string = "texts", .expectedString = "teextextsts");
|
||||
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$`", .string = "texts", .expectedString = "teextts" );
|
||||
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$'", .string = "texts", .expectedString = "teextsts" );
|
||||
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$&", .string = "texts", .expectedString = "teexexts" );
|
||||
test(.regex = "<(.+?)>", .replace = "[$0:$1]", .string = "<i>am not</i>", .expectedString = "[<i>:i]am not[</i>:/i]");
|
||||
test(.regex = "(?<foo>e)(?<foo>x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "teext", .regexFlags = PCRE_DUPNAMES);
|
||||
test(.regex = "\b(\w+)(\s)(\w+)\b", .replace = "$3$2$1", .string = "one two", .expectedString = "two one");
|
||||
test(.regex = "\b(\d+)\s?USD", .replace = "$$$1", .string = "103 USD", .expectedString = "$103" );
|
||||
test(.regex = "\b(?<w1>\w+)(\s)(?<w2>\w+)\b", .replace = "${w2} ${w1}", .string = "one two", .expectedString = "two one");
|
||||
test(.regex = "(\$*(\d*(\.+\d+)?){1})", .replace = "**$&", .string = "$1.30", .expectedString = "**$1.30**");
|
||||
test(.regex = "B+", .replace = "$`", .string = "AABBCC", .expectedString = "AAAACC");
|
||||
test(.regex = "B+", .replace = "$'", .string = "AABBCC", .expectedString = "AACCCC");
|
||||
test(.regex = "B+(C+)", .replace = "$+", .string = "AABBCCDD", .expectedString = "AACCDD");
|
||||
test(.regex = "B+", .replace = "$_", .string = "AABBCC", .expectedString = "AAAABBCCCC");
|
||||
test(.regex = "(F)(2)(3)(4)(5)(6)(7)(8)(9)(10)(L)\11", .replace = "${S}$11$1", .string = "F2345678910L71", .expectedString = "F2345678910L71"),
|
||||
test(.regex = "(F)(2)(3)(4)(5)(6)(7)(8)(9)(10)(L)\11", .replace = "${S}$11$1", .string = "F2345678910LL1", .expectedString = "${S}LF1");
|
||||
}
|
||||
|
||||
server_print("^tChecking moar #1...");
|
||||
{
|
||||
test(.string = "(?(w)a|o)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?(w)|o)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?(w)a)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?(w)a|)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?(w)?|a|o)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?(w)||o)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?(w)(a)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?(w))\a|)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?(2)a|o)" , .regex = "\(\?\([^^\)]+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?(|)a|o)" , .regex = "\(\?\([^^\)]+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "a\3b" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\(\d+)" , .replace = "\5", .expectedString = "a\5b");
|
||||
test(.string = "\3b" , .regex = "(?=(?:\A|[^^\\])(?:[\\]{2}){0,3})\\(\d+)" , .replace = "\5", .expectedString = "\5b");
|
||||
test(.string = "\\\3b" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\(\d+)" , .replace = "\5", .expectedString = "\\\5b");
|
||||
test(.string = "\\\k<g>" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k<(\w+)>" , .replace = "\5", .expectedString = "\\\5");
|
||||
test(.string = "\\\\k'g'" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w+)'" , .replace = "\5", .expectedString = "\\\\k'g'");
|
||||
test(.string = "a\\\\k'g'" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w+)'" , .replace = "\5", .expectedString = "a\\\\k'g'");
|
||||
test(.string = "\k'g'" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w+)'" , .replace = "\5", .expectedString = "\5");
|
||||
test(.string = "(?<n1-n2>)" , .regex = "\(\?<[A-Za-z]\w*-[A-Za-z]\w*>.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?'n1-n2'a)" , .regex = "\(\?'[A-Za-z]\w*-[A-Za-z]\w*'.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "\p{Isa}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\\[pP]\{)Is(?=\w+\})", .replace = "In", .expectedString = "\p{Ina}");
|
||||
test(.string = "\p{Is}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\\[pP]\{)Is(?=\w+\})", .replace = "In", .expectedString = "\p{Is}");
|
||||
test(.string = "\p{Isa" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\\[pP]\{)Is(?=\w+\})", .replace = "In", .expectedString = "\p{Isa");
|
||||
test(.string = "a(?#|)" , .regex = "\(\?#[^^\)]*\)" , .replace = "", .expectedString = "a");
|
||||
test(.string = "(?#|)" , .regex = "\(\?#[^^\)]*\)" , .replace = "", .expectedString = "");
|
||||
test(.string = "(?#|)" , .regex = "\#[^^\n\r]*" , .replace = "", .expectedString = "(?");
|
||||
test(.string = "(?inm-xs:\#)" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(r");
|
||||
test(.string = "(?ni:())" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(r)");
|
||||
test(.string = "(?x-i:)" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(?x-i:)");
|
||||
test(.string = "(?n:))" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(?n:))");
|
||||
test(.string = "(?<n1>)" , .regex = "\(\?<[A-Za-z]\w*>.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?'n1'y)" , .regex = "\(\?'[A-Za-z]\w*'.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?<45>y)" , .regex = "\(\?<\d+>.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?'7'o)" , .regex = "\(\?'\d+'.*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "\\\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "\\r");
|
||||
test(.string = "a\\\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "a\\r");
|
||||
test(.string = "\\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "\r");
|
||||
test(.string = "a\\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "a\r");
|
||||
test(.string = "\(" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "a\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "ar");
|
||||
test(.string = "?:" , .regex = "(?:^^\?[:imnsx=!>-]|^^\?<[!=])" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "?<!" , .regex = "(?:^^\?[:imnsx=!>-]|^^\?<[!=])" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "?-" , .regex = "(?:^^\?[:imnsx=!>-]|^^\?<[!=])" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "\(?<n>" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?<[A-Za-z]\w*>", .replace = "r", .expectedString = "\(r");
|
||||
test(.string = "a\(?'n'" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?'[A-Za-z]\w*'", .replace = "r", .expectedString = "a\(r");
|
||||
test(.string = "\\(?<2>" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?<\d+>" , .replace = "r", .expectedString = "\\(r");
|
||||
test(.string = "(?'2'" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?'\d+'" , .replace = "r", .expectedString = "(r");
|
||||
test(.string = "\[\b]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\u8]");
|
||||
test(.string = "\[a\bb]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[a\u8b]");
|
||||
test(.string = "\[\b]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\u8]");
|
||||
test(.string = "\[\b]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\u8]");
|
||||
test(.string = "\[\\b]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\\u8]");
|
||||
test(.string = "[[]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\\[" , .expectedString = "[\[]");
|
||||
test(.string = "\[[]" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\[" , .expectedString = "\[[]");
|
||||
test(.string = "\[\[]" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\[" , .expectedString = "\[\[]");
|
||||
test(.string = "\[\[]" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\[" , .expectedString = "\[\[]");
|
||||
test(.string = "\{" , .regex = "(?=(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\\{");
|
||||
test(.string = "\{" , .regex = "(?=(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\\{");
|
||||
test(.string = "\{1,2}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\{1,2}");
|
||||
test(.string = "\{1}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\{1}");
|
||||
test(.string = "\{1,}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\{1,}");
|
||||
test(.string = "\{1" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\\{", .expectedString = "\{1");
|
||||
test(.string = "\\(?!{1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "\5", .expectedString = "?!");
|
||||
test(.string = "{1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "r");
|
||||
test(.string = "({1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?{1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "(?{1}");
|
||||
test(.string = "(?:{1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "r");
|
||||
test(.string = "\({1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "\({1}");
|
||||
test(.string = "\p{Isa}" , .regex = "(?!\\[pP]\{)Is(?=\w+\})" , .replace = "In", .expectedString = "\p{Ina}");
|
||||
test(.string = "\p{Is}" , .regex = "(?!\\[pP]\{)Is(?=\w+\})" , .replace = "In", .expectedString = "\p{Is}");
|
||||
test(.string = "\p{Isa" , .regex = "(?!\\[pP]\{)Is(?=\w+\})" , .replace = "In", .expectedString = "\p{Isa");
|
||||
test(.string = "\}" , .regex = "(?!(\\A|[^^\\])(\\{2})*\\{\\d\\d*(,(\\d\\d*)?)?)\\}", .replace = "\\}", .expectedString = "\}");
|
||||
test(.string = "{\}" , .regex = "(?!(\A|[^^\^^])(\^^{2})*\{\d\d*(,(\d\d*)?)?)\}", .replace = "\\}", .expectedString = "{\\}");
|
||||
test(.string = "{1,2}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "{1,2\}");
|
||||
test(.string = "\{1}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "\{1\}");
|
||||
test(.string = "\{1\}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "\{1\\}");
|
||||
test(.string = "\{1}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "\{1\}");
|
||||
test(.string = "{1,}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "{1,\}");
|
||||
test(.string = "a(?<!b*c)" , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)" , .replace = "r", .expectedString = "ar");
|
||||
test(.string = "a(?<!b+c)" , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)" , .replace = "r", .expectedString = "ar");
|
||||
test(.string = "(?<!b{1}c))" , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)" , .replace = "r", .expectedString = "(?<!b{1}c))");
|
||||
test(.string = "(?<!b{1,}c)" , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)" , .replace = "r", .expectedString = "r");
|
||||
test(.string = "(?<!b{1,4}c)" , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)" , .replace = "r", .expectedString = "(?<!b{1,4}c)");
|
||||
test(.string = "a\3b" , .regex = "\\(\d+)" , .replace = "\5", .expectedString = "a\5b");
|
||||
test(.string = "\3b" , .regex = "\\(\d+)" , .replace = "\5", .expectedString = "\5b");
|
||||
test(.string = "\\3b" , .regex = "(?!\\\\)\\(\d)" , .replace = "\5", . expectedString = "\\5b");
|
||||
test(.string = "a\\3b" , .regex = "(?:(\\){0,3})\\(\d)" , .replace = "\5", . expectedString = "a\5b");
|
||||
test(.string = "\\k<g>" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k<(\w)>" , .replace = "\5", .expectedString = "\\5");
|
||||
test(.string = "a\\k<g>" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k<(\w)>" , .replace = "\5", .expectedString = "a\\5");
|
||||
test(.string = "\\k'g'" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w)'" , .replace = "\5", .expectedString = "\\5");
|
||||
test(.string = "a\\k'g'" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w)'" , .replace = "\5", .expectedString = "a\\5");
|
||||
test(.string = "\k'g'" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w)'" , .replace = "\5", .expectedString = "\5");
|
||||
}
|
||||
|
||||
server_print("^tChecking moar #2...");
|
||||
{
|
||||
test(.regex = "^^((?>[a-zA-Z\d!#$%&'*+\-\/=?^^_`{|}~]+\x20*|^"((?=[\x01-\x7f])[^^^"\\]|\\[\x01-\x7f])*^"\x20*)*(?<angle><))?((?!\.)(?>\.?[a-zA-Z\d!#$%&'*+\-\/=?^^_`{|}~]+)+|^"((?=[\x01-\x7f])[^^^"\\]|\\[\x01-\x7f])*^")@(((?!-)[a-zA-Z\d\-]+(?<!-)\.)+[a-zA-Z]{2,}|\[(((?(?<!\[)\.)(25[0-5]|2[0-4]\d|[01]?\d?\d)){4}|[a-zA-Z\d\-]*[a-zA-Z\d]:((?=[\x01-\x7f])[^^\\\[\]]|\\[\x01-\x7f])+)\])(?(angle)>)$" ,
|
||||
.replace = "$1$4@$7net>",
|
||||
.string = "Name Surname <name.surname@blah.com>",
|
||||
.expectedString = "Name Surname <name.surname@blah.net>"
|
||||
);
|
||||
|
||||
test(.regex = "([A-Z])\w+",
|
||||
.replace = "*snip*",
|
||||
.string = "Welcome to RegExr v2.0 by gskinner.com!\
|
||||
\
|
||||
Edit the Expression & Text to see matches. Roll over matches or the expression for details. Undo mistakes with ctrl-z. Save & Share expressions with friends or the Community. A full Reference & Help is available in the Library, or watch the video Tutorial.\
|
||||
\
|
||||
Sample text for testing:\
|
||||
abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ\
|
||||
:0123456789 +-.,!@#$%^^&*();\/|<>^"'\
|
||||
12345 -98.7 3.141 .6180 9,000 +42\
|
||||
555.123.4567 +1-(800)-555-2468\
|
||||
foo@demo.net bar.ba@test.co.uk\
|
||||
www.demo.com http://foo.co.uk/\
|
||||
http://regexr.com/foo.html?q=bar",
|
||||
|
||||
.expectedString = "*snip* to *snip* v2.0 by gskinner.com!\
|
||||
\
|
||||
*snip* the *snip* & *snip* to see matches. *snip* over matches or the expression for details. *snip* mistakes with ctrl-z. *snip* & *snip* expressions with friends or the *snip*. A full *snip* & *snip* is available in the *snip*, or watch the video *snip*.\
|
||||
\
|
||||
*snip* text for testing:\
|
||||
abcdefghijklmnopqrstuvwxyz *snip*\
|
||||
:0123456789 +-.,!@#$%^^&*();\/|<>^"'\
|
||||
12345 -98.7 3.141 .6180 9,000 +42\
|
||||
555.123.4567 +1-(800)-555-2468\
|
||||
foo@demo.net bar.ba@test.co.uk\
|
||||
www.de",
|
||||
.regexFlags = PCRE_EXTENDED
|
||||
);
|
||||
|
||||
test(.regex = "/\*(?>[^^*/]+|\*[^^/]|/[^^*]|/\*(?>[^^*/]+|\*[^^/]|/[^^*])*\*/)*\*/",
|
||||
.replace = "",
|
||||
.string = "/* comment */\
|
||||
no comment\
|
||||
/* comment\
|
||||
spanning\
|
||||
multiple\
|
||||
lines */\
|
||||
/* comment /* nesting */ of /* two */ levels supported */\
|
||||
/* comment /* nesting */ of /* /* more than */ two levels */ not supported */",
|
||||
.expectedString = "no comment\
|
||||
/* comment of not supported */"
|
||||
);
|
||||
|
||||
test(.regex = "\b(?<protocol>https?|ftp)://(?<domain>[A-Z0-9.-]+)(?<file>/[A-Z0-9+&@#/%=~_|!:,.;-]*)?(?<parameters>\?[A-Z0-9+&@#/%=~_|!:,.;]*)?",
|
||||
.replace = "${protocol}s://site.com${file}^n",
|
||||
.string = "http://www.alliedmods.net http://www.alliedmods.net/ http://www.alliedmods.net/test.php http://www.alliedmods.net/index.php?secret=x Something interesting at http://www.alliedmods.net.",
|
||||
.expectedString = "https://site.com^nhttps://site.com/^nhttps://site.com/test.php^nhttps://site.com/index.php^nhttps://site.com^n",
|
||||
.regexFlags = PCRE_CASELESS | PCRE_EXTENDED,
|
||||
.formatFlags = REGEX_FORMAT_NOCOPY
|
||||
);
|
||||
|
||||
test(.regex = "\b(https?|ftp)://([A-Z0-9.-]+)(/[A-Z0-9+&@#/%=~_|!:,.;-]*)?(\?[A-Z0-9+&@#/%=~_|!:,.;]*)?",
|
||||
.replace = "$1s://site.com$3^n",
|
||||
.string = "http://www.alliedmods.net http://www.alliedmods.net/ http://www.alliedmods.net/test.php http://www.alliedmods.net/index.php?secret=x Something interesting at http://www.alliedmods.net.",
|
||||
.expectedString = "https://site.com^nhttps://site.com/^nhttps://site.com/test.php^nhttps://site.com/index.php^nhttps://site.com^n",
|
||||
.regexFlags = PCRE_CASELESS | PCRE_EXTENDED,
|
||||
.formatFlags = REGEX_FORMAT_NOCOPY
|
||||
);
|
||||
|
||||
test(.regex = "\b(https?|ftp)://([A-Z0-9.-]+)(/[A-Z0-9+&@#/%=~_|!:,.;-]*)?(\?[A-Z0-9+&@#/%=~_|!:,.;]*)?",
|
||||
.replace = "$1s://site.com$3^n",
|
||||
.string = "http://www.alliedmods.net http://www.alliedmods.net/ http://www.alliedmods.net/test.php http://www.alliedmods.net/index.php?secret=x Something interesting at http://www.alliedmods.net.",
|
||||
.expectedString = "https://site.com^n",
|
||||
.regexFlags = PCRE_CASELESS | PCRE_EXTENDED,
|
||||
.formatFlags = REGEX_FORMAT_NOCOPY | REGEX_FORMAT_FIRSTONLY
|
||||
);
|
||||
|
||||
test(.regex = "^^(.++)\r?\n(?=(?:^^(?!\1$).*+\r?\n)*+\1$)",
|
||||
.replace = "",
|
||||
.string = "one^n\
|
||||
two^n\
|
||||
three^n\
|
||||
four^n\
|
||||
two^n\
|
||||
three^n\
|
||||
four^n\
|
||||
three^n\
|
||||
four^n\
|
||||
four",
|
||||
.expectedString = "one^n\
|
||||
two^n\
|
||||
three^n\
|
||||
four",
|
||||
.regexFlags = PCRE_EXTENDED | PCRE_MULTILINE
|
||||
);
|
||||
}
|
||||
|
||||
end();
|
||||
}
|
|
@ -225,6 +225,7 @@ scripting_files = [
|
|||
'testsuite/menutest.sma',
|
||||
'testsuite/native_test.sma',
|
||||
'testsuite/nvault_test.sma',
|
||||
'testsuite/regex_test.sma',
|
||||
'testsuite/sorttest.sma',
|
||||
'testsuite/strbreak.sma',
|
||||
'testsuite/sqlxtest.sma',
|
||||
|
|
Loading…
Reference in New Issue
Block a user