Regex: Add regex_replace native.

This commit is contained in:
Arkshine 2014-07-17 11:20:52 +02:00
parent 287f471ac4
commit 939a724b1a
11 changed files with 1432 additions and 56 deletions

View File

@ -18,6 +18,7 @@ binary.sources = [
'sdk/amxxmodule.cpp',
'module.cpp',
'CRegEx.cpp',
'utils.cpp',
]
AMXX.modules += [builder.Add(binary)]

View File

@ -30,10 +30,12 @@
* you do not wish to do so, delete this exception statement from your
* version.
*/
#include "amxxmodule.h"
#include "pcre.h"
#include "CRegEx.h"
#include <string.h>
#include "amxxmodule.h"
#include <ctype.h>
#include "utils.h"
RegEx::RegEx()
{
@ -43,6 +45,9 @@ RegEx::RegEx()
mFree = true;
subject = NULL;
mSubStrings.clear();
mMatchesSubs.clear();
mSubsNameTable.clear();
mNumSubpatterns = 0;
}
void RegEx::Clear()
@ -57,6 +62,9 @@ void RegEx::Clear()
delete[] subject;
subject = NULL;
mSubStrings.clear();
mMatchesSubs.clear();
mSubsNameTable.clear();
mNumSubpatterns = 0;
}
RegEx::~RegEx()
@ -143,6 +151,19 @@ int RegEx::Compile(const char *pattern, int iFlags)
mFree = false;
/**
* Retrieve the number of captured groups
* including the full match.
*/
pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &mNumSubpatterns);
++mNumSubpatterns;
/**
* Build the table with the named groups,
* which contain an index and a name per group.
*/
MakeSubpatternsTable(mNumSubpatterns);
return 1;
}
@ -153,13 +174,13 @@ int RegEx::Match(const char *str)
if (mFree || re == NULL)
return -1;
this->ClearMatch();
ClearMatch();
//save str
subject = new char[strlen(str) + 1];
strcpy(subject, str);
rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30);
rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, REGEX_MAX_SUBPATTERNS);
if (rc < 0)
{
@ -188,61 +209,103 @@ int RegEx::Match(const char *str)
int RegEx::MatchAll(const char *str)
{
int rc = 0;
int rr = 0;
int offset = 0;
int rc = 0;
int startOffset = 0;
int exoptions = 0;
int notEmpty = 0;
int sizeOffsets = mNumSubpatterns * 3;
int subjectLen = strlen(str);
if (mFree || re == NULL)
{
return -1;
}
this->ClearMatch();
ClearMatch();
//save str
subject = new char[strlen(str) + 1];
subject = new char[subjectLen + 1];
strcpy(subject, str);
RegExSub sub, whole;
while ((rr = pcre_exec(re, NULL, subject, (int)strlen(subject), offset, 0, ovector, 30)))
RegExSub sub;
while (1)
{
if (rr < 0)
rr = pcre_exec(re, NULL, subject, (int)subjectLen, startOffset, exoptions | notEmpty, ovector, REGEX_MAX_SUBPATTERNS);
/**
* The string was already proved to be valid UTF-8
*/
exoptions |= PCRE_NO_UTF8_CHECK;
/**
* Too many substrings
*/
if (rr == 0)
{
if (rr == PCRE_ERROR_NOMATCH)
rr = sizeOffsets / 3;
}
if (rr > 0)
{
mMatchesSubs.append(rr);
for (int s = 0; s < rr; ++s)
{
break;
sub.start = ovector[2 * s];
sub.end = ovector[2 * s + 1];
mSubStrings.append(sub);
}
}
else if (rr == PCRE_ERROR_NOMATCH)
{
/**
* If we previously set PCRE_NOTEMPTY after a null match,
* this is not necessarily the end. We need to advance
* the start offset, and continue. Fudge the offset values
* to achieve this, unless we're already at the end of the string.
*/
if (notEmpty && startOffset < (int)subjectLen)
{
ovector[0] = startOffset;
ovector[1] = startOffset + 1;
}
else
{
mErrorOffset = rr;
if (rc)
this->ClearMatch();
return -1;
break;
}
}
rc += rr;
mSubStrings.ensure(rc);
for (int s = 1; s < rr; ++s)
else
{
sub.start = ovector[2 * s];
sub.end = ovector[2 * s + 1];
mSubStrings.append(sub);
mErrorOffset = rr;
if (mMatchesSubs.length())
{
ClearMatch();
}
return -1;
}
offset = ovector[1];
/**
* If we have matched an empty string, mimic what Perl's /g options does.
* This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
* the match again at the same point. If this fails (picked up above) we
* advance to the next character.
*/
notEmpty = (ovector[1] == ovector[0]) ? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
/**
* Advance to the next piece.
*/
startOffset = ovector[1];
}
if (!rc)
if (!mMatchesSubs.length())
{
return 0;
sub = mSubStrings.at(0);
whole.start = sub.start;
sub = mSubStrings.back();
whole.end = sub.end;
mSubStrings.insert(0, whole);
}
return 1;
}
@ -256,18 +319,14 @@ void RegEx::ClearMatch()
delete[] subject;
subject = NULL;
mSubStrings.clear();
mMatchesSubs.clear();
}
const char *RegEx::GetSubstring(int s, char buffer[], int max)
const char *getSubstring(char *subject, size_t start, size_t end, char buffer[], size_t max, size_t *outlen)
{
int i = 0;
if ((size_t)s >= mSubStrings.length() || s < 0)
return NULL;
RegExSub sub = mSubStrings.at(s);
char *substr_a = subject + sub.start;
int substr_l = sub.end - sub.start;
size_t i;
char * substr_a = subject + start;
size_t substr_l = end - start;
for (i = 0; i < substr_l; i++)
{
@ -278,5 +337,516 @@ const char *RegEx::GetSubstring(int s, char buffer[], int max)
buffer[i] = '\0';
if (outlen)
{
*outlen = i;
}
return buffer;
}
const char *RegEx::GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen)
{
if (start < 0 || start >= mSubStrings.length())
{
return NULL;
}
RegExSub sub = mSubStrings.at(start);
return getSubstring(subject, sub.start, sub.end, buffer, max, outlen);
}
const char *RegEx::GetSubstring(size_t startOffset, size_t endOffset, char buffer[], size_t max, size_t *outlen)
{
if (startOffset < 0 || endOffset < 0)
{
return NULL;
}
return getSubstring(subject, startOffset, endOffset, buffer, max, outlen);
}
void RegEx::MakeSubpatternsTable(int numSubpatterns)
{
int nameCount = 0;
int rc = pcre_fullinfo(re, NULL, PCRE_INFO_NAMECOUNT, &nameCount);
if (rc < 0)
{
return;
}
if (nameCount > 0)
{
const char *nameTable;
int nameSize = 0;
int i = 0;
int rc1 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMETABLE, &nameTable);
int rc2 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameSize);
rc = rc2 ? rc2 : rc1;
if (rc < 0)
{
mSubsNameTable.clear();
return;
}
NamedGroup data;
while (i++ < nameCount)
{
data.index = 0xff * (unsigned char)nameTable[0] + (unsigned char)nameTable[1];
data.name = nameTable + 2;
mSubsNameTable.append(ke::Move(data));
nameTable += nameSize;
}
}
}
int RegEx::Replace(char *text, size_t textMaxLen, const char *replace, size_t replaceLen, int flags)
{
char *output = text;
/**
* Retrieve all matches and store them in
* mSubStrings list.
*/
if (MatchAll(output) == -1)
{
return -1;
}
size_t subjectLen = strlen(subject);
size_t total = 0;
size_t baseIndex = 0;
size_t diffLength = 0;
char *toReplace = new char[textMaxLen + 1];
char *toSearch = NULL;
/**
* All characters which is not matched are not copied when replacing matches.
* Then original text (output buffer) should be considerated as empty.
*/
if (flags & REGEX_FORMAT_NOCOPY)
{
*output = '\0';
}
else
{
/**
* This is used only when we do replace matches.
*/
toSearch = new char[textMaxLen + 1];
}
/**
* Loop over all matches found.
*/
for (size_t i = 0; i < mMatchesSubs.length(); ++i)
{
char *ptr = toReplace;
size_t browsed = 0;
size_t searchLen = 0;
size_t length = 0;
/**
* Build the replace string as it can contain backreference
* and this needs to be parsed.
*/
for (const char *s = replace, *end = s + replaceLen; s < end && browsed <= textMaxLen; ++s, ++browsed)
{
unsigned int c = *s;
/**
* Supported format specifiers:
*
* $number : Substitutes the substring matched by group number.
* n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
* ${name} : Substitutes the substring matched by the named group name (a maximum of 32 characters).
* $& : Substitutes a copy of the whole match.
* $` : Substitutes all the text of the input string before the match.
* $' : Substitutes all the text of the input string after the match.
* $+ : Substitutes the last group that was captured.
* $_ : Substitutes the entire input string.
* $$ : Substitutes a literal "$".
*/
if (c == '$' || c == '\\')
{
switch (*++s)
{
case '\0':
{
/**
* End of string.
* Copy one character.
*/
*(ptr + browsed) = c;
break;
}
case '&':
{
/**
* Concatenate retrieved full match sub-string.
* length - 1 to overwrite EOS.
*/
GetSubstring(baseIndex, ptr + browsed, textMaxLen, &length);
browsed += length - 1;
break;
}
case '`':
{
/**
* Concatenate part of original text up to
* first sub-string position.
*/
length = mSubStrings.at(baseIndex).start;
memcpy(ptr + browsed, subject, length);
browsed += length - 1;
break;
}
case '\'':
{
/**
* Concatenate part of original text from
* last sub-string end position to EOS.
*/
length = mSubStrings.at(baseIndex).end;
memcpy(ptr + browsed, subject + length, subjectLen - length);
browsed += (subjectLen - length) - 1;
break;
}
case '+':
{
/**
* Copy the last group that was captured.
*/
GetSubstring(baseIndex + mMatchesSubs.at(i) - 1, ptr + browsed, textMaxLen, &length);
browsed += length - 1;
break;
}
case '_':
{
/**
* Copy the entire input string.
*/
memcpy(ptr + browsed, subject, subjectLen);
browsed += (subjectLen - 1);
break;
}
case '$':
case '\\':
{
/**
* Copy the single character $ or \.
*/
*(ptr + browsed) = c;
break;
}
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '{':
{
/**
* Checking backreference.
* Which can be either $n, ${n} or ${name}.
*/
size_t backref = -1;
const char *walk = s;
bool inBrace = false;
bool nameCheck = false;
/**
* ${nn}.
* ^
*/
if (*walk == '{')
{
inBrace = true;
++walk;
}
/**
* Valid number.
* $nn or ${nn}
* ^ ^
*/
if (*walk >= '0' && *walk <= '9')
{
backref = *walk - '0';
++walk;
}
else if (inBrace)
{
nameCheck = true;
/**
* Not a valid number.
* Checking as string.
* ${name}
* ^
*/
if (*walk)
{
const char *pch = strchr(walk, '}');
if (pch != NULL)
{
/**
* A named group maximum character is 32 (PCRE).
*/
char name[32];
size_t nameLength = strncopy(name, walk, pch - walk + 1);
int flags, num = 0;
pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &flags);
/**
* If PCRE_DUPNAMES is set, the pcre_copy_named_substring function should be used
* as pcre_get_stringnumber output order is not defined.
*/
if (flags & PCRE_DUPNAMES)
{
memset(ovector, 0, REGEX_MAX_SUBPATTERNS);
/**
* pcre_copy_named_substring needs a vector containing sub-patterns ranges
* for a given match.
*/
for (size_t j = 0; j < mMatchesSubs.at(i); ++j)
{
ovector[2 * j] = mSubStrings.at(baseIndex + j).start;
ovector[2 * j + 1] = mSubStrings.at(baseIndex + j).end;
}
num = pcre_copy_named_substring(re, subject, ovector, mMatchesSubs.at(i), name, ptr + browsed, (int)textMaxLen);
if (num != PCRE_ERROR_NOSUBSTRING)
{
browsed += num - 1;
s = pch;
break;
}
++pch;
}
else
{
/**
* Retrieve sub-pattern index from a give name.
*/
num = pcre_get_stringnumber(re, name);
if (num != PCRE_ERROR_NOSUBSTRING)
{
backref = num;
walk = ++pch;
}
}
if (num == PCRE_ERROR_NOSUBSTRING || num >= (int)mMatchesSubs.at(i))
{
/**
* If a sub-string for a given match is not found, or if > to
* number of sub-patterns we still need to check if this
* group name is a valid one because if so we want to escape it.
* Looking at the name table.
*/
bool found = false;
for (size_t i = 0; i < mSubsNameTable.length(); ++i)
{
if (!mSubsNameTable.at(i).name.compare(name))
{
--browsed;
s = --pch;
found = true;
break;
}
}
if (found)
{
continue;
}
}
}
}
}
if (!nameCheck)
{
/**
* Valid second number.
* $nn or ${nn}
* ^ ^
*/
if (*walk && *walk >= '0' && *walk <= '9')
{
backref = backref * 10 + *walk - '0';
++walk;
}
if (inBrace)
{
/**
* Invalid specifier
* Either hit EOS or missing }.
* ${n or ${nn or ${nx or ${nnx
* ^ ^ ^ ^
*/
if (*walk == '\0' || *walk != '}')
{
backref = -1;
}
else
{
++walk;
}
}
}
length = walk - s;
s = --walk;
/**
* We can't provide a capture number >= to total that pcre_exec has found.
* 0 is implicitly accepted, same behavior as $&.
*/
if (backref >= 0 && (int)backref < mNumSubpatterns)
{
/**
* Valid available index for a given match.
*/
if (backref < mMatchesSubs.at(i))
{
/**
* Concatenate retrieved sub-string.
* length - 1 to overwrite EOS.
*/
GetSubstring(baseIndex + backref, ptr + browsed, textMaxLen, &length);
browsed += length - 1;
}
else
{
/**
* Valid unavailable index for a given match.
*/
--browsed;
}
}
else
{
/**
* If we here it means the syntax is valid but sub-pattern doesn't exist.
* So, copy as it is, including $.
*/
memcpy(ptr + browsed, s - length, length + 1);
browsed += length;
}
break;
}
default:
{
/**
* Not a valid format modifier.
* So we copy characters as it is.
*/
*(ptr + browsed) = *s;
break;
}
}
}
else
{
/**
* At this point, direct copy.
*/
*(ptr + browsed) = c;
}
}
*(ptr + browsed) = '\0';
/**
* Concatenate only replace string of each match,
* as we don't want to copy unmatched characters.
*/
if (flags & REGEX_FORMAT_NOCOPY)
{
/**
* We want just the first occurrence.
*/
if (total++ && (flags & REGEX_FORMAT_FIRSTONLY))
{
break;
}
strncat(output, toReplace, textMaxLen + 1);
}
else
{
/**
* Retrieves full string of a given match.
*/
const char *search = GetSubstring(baseIndex, toSearch, textMaxLen, &searchLen);
/**
* We get something to replace, but the sub-pattern to search is empty.
* We insert replacement either a the start end or string.
*/
if (*toReplace && !searchLen)
{
if (output - text > 0)
{
strncat(output, toReplace, textMaxLen);
}
else
{
strncat(toReplace, text, textMaxLen);
strncopy(text, toReplace, strlen(toReplace) + 1);
}
++total;
}
else if ((output = UTIL_ReplaceEx(text + mSubStrings.at(baseIndex).start + diffLength, textMaxLen, search, searchLen, toReplace, browsed, false)) != NULL)
{
/**
* Then we simply do a replace.
* Probably not the most efficient, but this should be at least safe.
* To avoid issue where the function could find a string which is not at the expected index,
* We force the input string to start from index of the full match.
*/
++total;
}
if (total && (flags & REGEX_FORMAT_FIRSTONLY))
{
break;
}
}
/**
* mMatchesSubs is a flat list containing all sub-patterns of all matches.
* A number of sub-patterns can vary per match. So we calculate the position in the list,
* from where the first sub-pattern result of current match starts.
*/
baseIndex += mMatchesSubs.at(i);
diffLength += browsed - searchLen;
}
delete[] toReplace;
if (toSearch != NULL)
{
delete[] toSearch;
}
/**
* Return the number of successful replacements.
*/
return total;
}

View File

@ -34,6 +34,19 @@
#define _INCLUDE_CREGEX_H
#include <am-vector.h>
#include <am-string.h>
/**
* Maximum number of sub-patterns, here 50 (this should be a multiple of 3).
*/
#define REGEX_MAX_SUBPATTERNS 150
/**
* Flags to used with regex_replace, to control the replacement behavior.
*/
#define REGEX_FORMAT_DEFAULT 0 // Uses the standard formatting rules to replace matches.
#define REGEX_FORMAT_NOCOPY 1 // The sections that do not match the regular expression are not copied when replacing matches.
#define REGEX_FORMAT_FIRSTONLY 2 // Only the first occurrence of a regular expression is replaced.
class RegEx
{
@ -42,6 +55,11 @@ public:
int start, end;
};
struct NamedGroup {
ke::AString name;
size_t index;
};
RegEx();
~RegEx();
@ -52,8 +70,11 @@ public:
int Compile(const char *pattern, int iFlags);
int Match(const char *str);
int MatchAll(const char *str);
int Replace(char *text, size_t text_maxlen, const char *replace, size_t replaceLen, int flags = 0);
void ClearMatch();
const char *GetSubstring(int s, char buffer[], int max);
const char *GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen = NULL);
const char *GetSubstring(size_t start, size_t end, char buffer[], size_t max, size_t *outlen = NULL);
void MakeSubpatternsTable(int numSubpatterns);
public:
int mErrorOffset;
@ -63,9 +84,12 @@ public:
private:
pcre *re;
bool mFree;
int ovector[30];
int ovector[REGEX_MAX_SUBPATTERNS];
char *subject;
ke::Vector<RegExSub> mSubStrings;
ke::Vector<size_t> mMatchesSubs;
ke::Vector<NamedGroup> mSubsNameTable;
int mNumSubpatterns;
};
#endif //_INCLUDE_CREGEX_H

View File

@ -153,6 +153,8 @@ cell match(AMX *amx, cell *params, bool all)
else
{
*errorCode = x->Count();
if (all)
return x->Count();
}
return id + 1;
@ -272,6 +274,43 @@ static cell AMX_NATIVE_CALL regex_free(AMX *amx, cell *params)
return 1;
}
//native regex_replace(Regex:pattern, string[], maxLen, const replace[], flags = REGEX_FORMAT_DEFAULT, &errcode = 0);
static cell AMX_NATIVE_CALL regex_replace(AMX *amx, cell *params)
{
int id = params[1] - 1;
if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
{
MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id);
return 0;
}
int textLen, replaceLen;
char *text = MF_GetAmxString(amx, params[2], 0, &textLen);
const char *replace = MF_GetAmxString(amx, params[4], 1, &replaceLen);
cell *erroCode = MF_GetAmxAddr(amx, params[6]);
RegEx *x = PEL[id];
int e = x->Replace(text, params[3] + 1, replace, replaceLen, params[5]);
if (e == -1)
{
*erroCode = x->mErrorOffset;
x->ClearMatch();
return -2;
}
else if (e == 0)
{
*erroCode = 0;
x->ClearMatch();
return 0;
}
MF_SetAmxString(amx, params[2], text, params[3]);
return e;
}
AMX_NATIVE_INFO regex_Natives[] = {
{"regex_compile", regex_compile},
{"regex_compile_ex", regex_compile_ex},
@ -280,6 +319,7 @@ AMX_NATIVE_INFO regex_Natives[] = {
{"regex_match_all", regex_match_all},
{"regex_match_all_c", regex_match_all_c},
{"regex_substr", regex_substr},
{"regex_replace", regex_replace},
{"regex_free", regex_free},
{NULL, NULL},
};

View File

@ -99,6 +99,7 @@
<ClCompile Include="..\CRegEx.cpp" />
<ClCompile Include="..\module.cpp" />
<ClCompile Include="..\sdk\amxxmodule.cpp" />
<ClCompile Include="..\utils.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\CRegEx.h" />
@ -107,6 +108,7 @@
<ClInclude Include="..\sdk\moduleconfig.h" />
<ClInclude Include="..\sdk\CVector.h" />
<ClInclude Include="..\sdk\amxxmodule.h" />
<ClInclude Include="..\utils.h" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\..\plugins\include\regex.inc" />

View File

@ -32,6 +32,9 @@
<ClCompile Include="..\sdk\amxxmodule.cpp">
<Filter>Module SDK\SDK Base</Filter>
</ClCompile>
<ClCompile Include="..\utils.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\CRegEx.h">
@ -52,6 +55,9 @@
<ClInclude Include="..\sdk\amxxmodule.h">
<Filter>Module SDK\SDK Base</Filter>
</ClInclude>
<ClInclude Include="..\utils.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="..\..\..\plugins\include\regex.inc">

236
dlls/regex/utils.cpp Normal file
View File

@ -0,0 +1,236 @@
#include "amxxmodule.h"
#include <string.h>
#include "utils.h"
int UTIL_CheckValidChar(char *c)
{
int count;
int bytecount = 0;
for (count = 1; (*c & 0xC0) == 0x80; count++)
{
c--;
}
switch (*c & 0xF0)
{
case 0xC0:
case 0xD0:
{
bytecount = 2;
break;
}
case 0xE0:
{
bytecount = 3;
break;
}
case 0xF0:
{
bytecount = 4;
break;
}
}
if (bytecount != count)
{
return count;
}
return 0;
}
unsigned int strncopy(char *dest, const char *src, size_t count)
{
if (!count)
{
return 0;
}
char *start = dest;
while ((*src) && (--count))
{
*dest++ = *src++;
}
*dest = '\0';
return (dest - start);
}
/**
* NOTE: Do not edit this for the love of god unless you have
* read the test cases and understand the code behind each one.
* While I don't guarantee there aren't mistakes, I do guarantee
* that plugins will end up relying on tiny idiosyncrasies of this
* function, just like they did with AMX Mod X.
*
* There are explicitly more cases than the AMX Mod X version because
* we're not doing a blind copy. Each case is specifically optimized
* for what needs to be done. Even better, we don't have to error on
* bad buffer sizes. Instead, this function will smartly cut off the
* string in a way that pushes old data out.
*/
char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive)
{
char *ptr = subject;
size_t browsed = 0;
size_t textLen = strlen(subject);
/* It's not possible to search or replace */
if (searchLen > textLen)
{
return NULL;
}
/* Handle the case of one byte replacement.
* It's only valid in one case.
*/
if (maxLen == 1)
{
/* If the search matches and the replace length is 0,
* we can just terminate the string and be done.
*/
if ((caseSensitive ? strcmp(subject, search) : stricmp(subject, search)) == 0 && replaceLen == 0)
{
*subject = '\0';
return subject;
}
else
{
return NULL;
}
}
/* Subtract one off the maxlength so we can include the null terminator */
maxLen--;
while (*ptr != '\0' && (browsed <= textLen - searchLen))
{
/* See if we get a comparison */
if ((caseSensitive ? strncmp(ptr, search, searchLen) : strnicmp(ptr, search, searchLen)) == 0)
{
if (replaceLen > searchLen)
{
/* First, see if we have enough space to do this operation */
if (maxLen - textLen < replaceLen - searchLen)
{
/* First, see if the replacement length goes out of bounds. */
if (browsed + replaceLen >= maxLen)
{
/* EXAMPLE CASE:
* Subject: AABBBCCC
* Buffer : 12 bytes
* Search : BBB
* Replace: DDDDDDDDDD
* OUTPUT : AADDDDDDDDD
* POSITION: ^
*/
/* If it does, we'll just bound the length and do a strcpy. */
replaceLen = maxLen - browsed;
/* Note, we add one to the final result for the null terminator */
strncopy(ptr, replace, replaceLen + 1);
/* Don't truncate a multi-byte character */
if (*(ptr + replaceLen - 1) & 1 << 7)
{
replaceLen -= UTIL_CheckValidChar(ptr + replaceLen - 1);
*(ptr + replaceLen) = '\0';
}
}
else
{
/* EXAMPLE CASE:
* Subject: AABBBCCC
* Buffer : 12 bytes
* Search : BBB
* Replace: DDDDDDD
* OUTPUT : AADDDDDDDCC
* POSITION: ^
*/
/* We're going to have some bytes left over... */
size_t origBytesToCopy = (textLen - (browsed + searchLen)) + 1;
size_t realBytesToCopy = (maxLen - (browsed + replaceLen)) + 1;
char *moveFrom = ptr + searchLen + (origBytesToCopy - realBytesToCopy);
char *moveTo = ptr + replaceLen;
/* First, move our old data out of the way. */
memmove(moveTo, moveFrom, realBytesToCopy);
/* Now, do our replacement. */
memcpy(ptr, replace, replaceLen);
}
}
else
{
/* EXAMPLE CASE:
* Subject: AABBBCCC
* Buffer : 12 bytes
* Search : BBB
* Replace: DDDD
* OUTPUT : AADDDDCCC
* POSITION: ^
*/
/* Yes, we have enough space. Do a normal move operation. */
char *moveFrom = ptr + searchLen;
char *moveTo = ptr + replaceLen;
/* First move our old data out of the way. */
size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
memmove(moveTo, moveFrom, bytesToCopy);
/* Now do our replacement. */
memcpy(ptr, replace, replaceLen);
}
}
else if (replaceLen < searchLen)
{
/* EXAMPLE CASE:
* Subject: AABBBCCC
* Buffer : 12 bytes
* Search : BBB
* Replace: D
* OUTPUT : AADCCC
* POSITION: ^
*/
/* If the replacement does not grow the string length, we do not
* need to do any fancy checking at all. Yay!
*/
char *moveFrom = ptr + searchLen; /* Start after the search pointer */
char *moveTo = ptr + replaceLen; /* Copy to where the replacement ends */
/* Copy our replacement in, if any */
if (replaceLen)
{
memcpy(ptr, replace, replaceLen);
}
/* Figure out how many bytes to move down, including null terminator */
size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
/* Move the rest of the string down */
memmove(moveTo, moveFrom, bytesToCopy);
}
else
{
/* EXAMPLE CASE:
* Subject: AABBBCCC
* Buffer : 12 bytes
* Search : BBB
* Replace: DDD
* OUTPUT : AADDDCCC
* POSITION: ^
*/
/* We don't have to move anything around, just do a straight copy */
memcpy(ptr, replace, replaceLen);
}
return ptr + replaceLen;
}
ptr++;
browsed++;
}
return NULL;
}

8
dlls/regex/utils.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef UTILS_H
#define UTILS_H
int UTIL_CheckValidChar(char *c);
char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive);
unsigned int strncopy(char *dest, const char *src, size_t count);
#endif // UTILS_H

View File

@ -44,10 +44,10 @@
enum Regex
{
REGEX_MATCH_FAIL = -2,
REGEX_PATTERN_FAIL,
REGEX_NO_MATCH,
REGEX_OK
REGEX_MATCH_FAIL = -2,
REGEX_PATTERN_FAIL = -1,
REGEX_NO_MATCH = 0,
REGEX_OK = 1
};
/**
@ -231,8 +231,7 @@ native regex_free(&Regex:id);
* @note Use this if you intend on using the ame expression multiple times.
* Pass the regex handle returned here to regex_match_ex() to check for matches.
*
* @note Unlike regex_compile(), this allows you to use directly PCRE flags, and
* to get a more complete set of regular expression error codes.
* @note Unlike regex_compile(), this allows you to use directly PCRE flags.
*
* @param pattern The regular expression pattern.
* @param flags General flags for the regular expression, see PCRE_* defines.
@ -306,6 +305,7 @@ native Regex:regex_match_all(const string[], const pattern[], flags = 0, error[]
* @param flags General flags for the regular expression.
* @param error Error message, if applicable.
* @param maxLen Maximum length of the error buffer.
* @param errcode Regex type error code encountered, if applicable. See REGEX_ERROR_* defines.
*
* @return -2 = Matching error (error code is stored in ret)
* -1 = Pattern error (error code is stored in ret)
@ -327,3 +327,40 @@ stock regex_match_simple(const str[], const pattern[], flags = 0, error[]= "", m
return substrings;
}
/**
* Flags to used with regex_replace, to control the replacement behavior.
*/
#define REGEX_FORMAT_DEFAULT 0 /* Uses the standard formatting rules to replace matches */
#define REGEX_FORMAT_NOCOPY (1<<0) /* The sectionsthat do not match the regular expression are not copied when replacing matches. */
#define REGEX_FORMAT_FIRSTONLY (1<<1) /* Only the first occurrence of a regular expression is replaced. */
/**
* Perform a regular expression search and replace.
*
* An optional parameter, flags, allows to specify options on how format the expression.
* Supported format specifiers for replace parameter:
* $number : Substitutes the substring matched by group number.
* n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
* ${name} : Substitutes the substring matched by the named group name (a maximum of 32 characters).
* $& : Substitutes a copy of the whole match.
* $` : Substitutes all the text of the input string before the match.
* $' : Substitutes all the text of the input string after the match.
* $+ : Substitutes the last group that was captured.
* $_ : Substitutes the entire input string.
* $$ : Substitutes a literal "$".
* As note, the character \ can be also used with format specifier, this is same hehavior as $.
*
* @param pattern The regular expression pattern.
* @param string The string to check.
* @param error Error message, if applicable.
* @param maxLen Maximum length of the error buffer.
* @param replace The string will be used to replace any matches. See above for format specifiers.
* @param flags General flags to control how is replaced the string. See REGEX_FORMAT_* defines.
* @param errcode Regex type error code encountered, if applicable. See REGEX_ERROR_* defines.
*
* @return -2 = Matching error (error code is stored in ret)
* 0 = No match.
* >1 = Number of matches.
*/
native regex_replace(Regex:pattern, string[], maxLen, const replace[], flags = REGEX_FORMAT_DEFAULT, &errcode = 0);

View File

@ -0,0 +1,451 @@
#include <amxmodx>
#include <regex>
/**
* Warning: To get expected result, file encoding must be UTF-8 without BOM.
*/
public plugin_init()
{
register_plugin("UTF-8 Test", AMXX_VERSION_STR, "AMXX Dev Team");
register_srvcmd("regex_test", "OnServerCommand");
}
new FailedCount;
new PassedCount;
test(const regex[], const replace[], const string[], const expectedString[], expectedCount = -1, regexFlags = 0, formatFlags = 0, bufferlen = -1)
{
new errorCode, error[128];
new Regex:r = regex_compile_ex(regex, regexFlags, error, charsmax(error), errorCode);
if (r == REGEX_PATTERN_FAIL || errorCode)
{
server_print("^t^t#%d. Pattern fail : ^"%s^"(%d)", ++FailedCount + PassedCount, error, errorCode);
}
else
{
new buffer[512];
copy(buffer, charsmax(buffer), string);
new errorCode;
new count = regex_replace(r, buffer, bufferlen != -1 ? bufferlen : charsmax(buffer), replace, formatFlags, errorCode);
if (expectedCount != -1 && count != expectedCount)
{
server_print("^t^t#%d. Failed - count = %d, expected count = %d", ++FailedCount + PassedCount, count, expectedCount);
}
else if (!equal(buffer, expectedString))
{
server_print("^t^t#%d. Failed - output = %s, expected output = %s", ++FailedCount + PassedCount, buffer, expectedString);
}
else
{
++PassedCount;
}
regex_free(r);
}
}
end()
{
server_print("Tests successful: %d/%d", PassedCount, PassedCount + FailedCount);
}
public OnServerCommand()
{
server_print("Testing regex_replace()");
server_print("^tChecking count...");
{
test( .regex = "(([0-9a-z]+)-([0-9]+))-(([0-9]+)-([0-9]+))",
.replace = "xxxx",
.string = "1-2-3-4 a-2-3-4 1-a-3-4 1-2-a-4 1-2-3-a a-a-a-a 4-3-2-1 100-200-300-400-500-600-700-800",
.expectedString = "xxxx xxxx 1-a-3-4 1-2-a-4 1-2-3-a a-a-a-a xxxx xxxx-xxxx",
.expectedCount = 5
);
test( .regex = "([a-z]+)",
.replace = "xxxx",
.string = "Here must only number like 42 and 13 appear",
.expectedString = "Hxxxx xxxx xxxx xxxx xxxx 42 xxxx 13 xxxx",
.expectedCount = 7
);
test( .regex = "((V(I|1)(4|A)GR(4|A))|(V(I|1)C(0|O)D(I|1)(N|\/\\\/)))", .regexFlags = PCRE_CASELESS,
.replace = "...",
.string = "Viagra V14GR4 Vicodin V1C0D1/\/ v1c0d1/|/",
.expectedString = "... ... ... ... v1c0d1/|/",
.expectedCount = 4
);
test( .regex = "\[(right)\](((?R)|[^^[]+?|\[)*)\[/\\1\]", .regexFlags = PCRE_CASELESS | PCRE_UNGREEDY,
.replace = "",
.string = "[CODE]&lt;td align=&quot;$stylevar[right]&quot;&gt;[/CODE]",
.expectedString = "[CODE]&lt;td align=&quot;$stylevar[right]&quot;&gt;[/CODE]",
.expectedCount = 0
);
test( .regex = "- This is a string$",
.replace = "This shouldn\'t work",
.string = "123456789 - Hello, world - This is a string.",
.expectedString = "123456789 - Hello, world - This is a string.",
.expectedCount = 0
);
test( .regex = "[0-35-9]",
.replace = "4",
.string = "123456789 - Hello, world - This is a string.",
.expectedString = "444444444 - Hello, world - This is a string.",
.expectedCount = 8
);
test( .regex = "\b[hH]\w{2,4}",
.replace = "Bonjour",
.string = "123456789 - Hello, world - This is a string.",
.expectedString = "123456789 - Bonjour, world - This is a string.",
.expectedCount = 1
);
test( .regex = "(\w)\s*-\s*(\w)",
.replace = "$1. $2",
.string = "123456789 - Hello, world - This is a string.",
.expectedString = "123456789. Hello, world. This is a string.",
.expectedCount = 2
);
test( .regex = "([a-z]\w+)@(\w+)\.(\w+)\.([a-z]{2,})",
.replace = "$1 at $2 dot $3 dot $4",
.string = "josmessa@uk.ibm.com",
.expectedString = "josmessa at uk dot ibm dot com",
.expectedCount = 1
);
test( .regex = "\b\w{1}s",
.replace = "test",
.string = "This is a string. (0-9) as well as parentheses",
.expectedString = "This test a string. (0-9) test well test parentheses",
.expectedCount = 3
);
test( .regex = "(\d{1})-(\d{1})",
.replace = "$1 to $2",
.string = "This is a string. It contains numbers (0-9) as well as parentheses and some other things!",
.expectedString = "This is a string. It contains numbers (0 to 9) as well as parentheses and some other things!",
.expectedCount = 1
);
test( .regex = "[\(!\)]",
.replace = "*",
.string = "This is a string. It contains numbers (0-9) as well as parentheses and some other things!",
.expectedString = "This is a string. It contains numbers *0-9* as well as parentheses and some other things*",
.expectedCount = 3
);
}
server_print("^tChecking edges cases...");
{
test(.regex = "[0-9]+", .replace = "*", .string = "", .expectedString = "", .expectedCount = 0);
test(.regex = "([0-9]+)", .replace = "", .string = "123", .expectedString = "", .expectedCount = 1);
test(.regex = "a", .replace = "\", .string = "a", .expectedString = "\", .expectedCount = 1);
test(.regex = "^^", .replace = "x", .string = "a", .expectedString = "xa", .expectedCount = 1);
test(.regex = "b", .replace = "\", .string = "b", .expectedString = "\", .expectedCount = 1, .bufferlen = 1);
test(.regex = "b", .replace = "^^", .string = "b", .expectedString = "b", .expectedCount = 0, .bufferlen = 0);
test(.regex = "\w+", .replace = "123", .string = "abc", .expectedString = "12", .expectedCount = 1, .bufferlen = 2);
}
server_print("^tChecking UTF-8 support...");
{
test(.regex = "(\w+)", .replace = "*", .string = "éà@É", .expectedString = "éà@É", .expectedCount = 0);
test(.regex = "(\w+)", .replace = "*", .string = "éà@É", .expectedString = "*@*", .expectedCount = 2, .regexFlags = PCRE_UCP | PCRE_UTF8);
test(.regex = "(\w+)", .replace = "字", .string = "éà@É", .expectedString = "字@字",.expectedCount = 2, .regexFlags = PCRE_UCP | PCRE_UTF8);
test(.regex = "(\w+)", .replace = "字", .string = "éà@É", .expectedString = "字", .expectedCount = 2, .regexFlags = PCRE_UCP | PCRE_UTF8, .bufferlen = 3);
}
server_print("^tChecking substitutions...");
{
test(.regex = "x", .replace = "y", .string = "text", .expectedString = "teyt" );
test(.regex = "x", .replace = "$", .string = "text", .expectedString = "te$t" );
test(.regex = "x", .replace = "$1", .string = "text", .expectedString = "te$1t" );
test(.regex = "x", .replace = "${1", .string = "text", .expectedString = "te${1t" );
test(.regex = "x", .replace = "${", .string = "text", .expectedString = "te${t" );
test(.regex = "x", .replace = "${$0", .string = "text", .expectedString = "te${xt" );
test(.regex = "x", .replace = "${1}", .string = "text", .expectedString = "te${1}t" );
test(.regex = "x", .replace = "${1}", .string = "text", .expectedString = "te${1}t" );
test(.regex = "x", .replace = "$5", .string = "text", .expectedString = "te$5t" );
test(.regex = "x", .replace = "$5", .string = "te(x)t", .expectedString = "te($5)t" );
test(.regex = "x", .replace = "${foo", .string = "text", .expectedString = "te${foot" );
test(.regex = "(x)", .replace = "$5", .string = "text", .expectedString = "te$5t" );
test(.regex = "(x)", .replace = "$1", .string = "text", .expectedString = "text" );
test(.regex = "e(x)", .replace = "$1", .string = "text", .expectedString = "txt" );
test(.regex = "e(x)", .replace = "$5", .string = "text", .expectedString = "t$5t" );
test(.regex = "e(x)", .replace = "$4", .string = "text", .expectedString = "t$4t" );
test(.regex = "e(x)", .replace = "$3", .string = "text", .expectedString = "t$3t" );
test(.regex = "e(x)", .replace = "${1}", .string = "text", .expectedString = "txt" );
test(.regex = "e(x)", .replace = "${3}", .string = "text", .expectedString = "t${3}t" );
test(.regex = "e(x)", .replace = "${1}${3}", .string = "text", .expectedString = "tx${3}t" );
test(.regex = "e(x)", .replace = "${1}${name}", .string = "text", .expectedString = "tx${name}t");
test(.regex = "e(?<foo>x)", .replace = "${1}${name}", .string = "text", .expectedString = "tx${name}t");
test(.regex = "e(?<foo>x)", .replace = "${1}${foo}", .string = "text", .expectedString = "txxt" );
test(.regex = "e(?<foo>x)", .replace = "${goll}${foo}", .string = "text", .expectedString = "t${goll}xt");
test(.regex = "e(?<foo>x)", .replace = "${goll${foo}", .string = "text", .expectedString = "t${gollxt" );
test(.regex = "e(?<foo>x)", .replace = "${goll${foo}}", .string = "text", .expectedString = "t${gollx}t");
test(.regex = "e(?<foo>x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" );
test(.regex = "e(?<foo>x)", .replace = "${${foo}}", .string = "text", .expectedString = "t${x}t" );
test(.regex = "e(?<foo>x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" );
test(.regex = "e(?<foo>x)", .replace = "$${bfoo}}", .string = "text", .expectedString = "t${bfoo}}t");
test(.regex = "e(?<foo>x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" );
test(.regex = "e(?<foo>x)", .replace = "$${foo}", .string = "text", .expectedString = "t${foo}t" );
test(.regex = "e(?<foo>x)", .replace = "$$", .string = "text", .expectedString = "t$t" );
test(.regex = "(e)(?<foo>x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "txext" );
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "teext" );
test(.regex = "(e)(?<foo>x)", .replace = "${foo}$1$2$+", .string = "text", .expectedString = "txexxt" );
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$+", .string = "text", .expectedString = "teexxt" );
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$_", .string = "texts", .expectedString = "teextextsts");
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$`", .string = "texts", .expectedString = "teextts" ),
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$'", .string = "texts", .expectedString = "teextsts" ),
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$&", .string = "texts", .expectedString = "teexexts" ),
test(.regex = "x", .replace = "y", .string = "text", .expectedString = "teyt" );
test(.regex = "x", .replace = "$", .string = "text", .expectedString = "te$t" );
test(.regex = "x", .replace = "$1", .string = "text", .expectedString = "te$1t" );
test(.regex = "x", .replace = "${1}", .string = "text", .expectedString = "te${1}t" );
test(.regex = "x", .replace = "$5", .string = "text", .expectedString = "te$5t" );
test(.regex = "x", .replace = "$5", .string = "te(x)t", .expectedString = "te($5)t" );
test(.regex = "x", .replace = "${foo", .string = "text", .expectedString = "te${foot" );
test(.regex = "(x)", .replace = "$5", .string = "text", .expectedString = "te$5t" );
test(.regex = "(x)", .replace = "$1", .string = "text", .expectedString = "text" );
test(.regex = "e(x)", .replace = "$1", .string = "text", .expectedString = "txt" );
test(.regex = "e(x)", .replace = "$5", .string = "text", .expectedString = "t$5t" );
test(.regex = "e(x)", .replace = "$4", .string = "text", .expectedString = "t$4t" );
test(.regex = "e(x)", .replace = "$3", .string = "text", .expectedString = "t$3t" );
test(.regex = "e(x)", .replace = "${1}", .string = "text", .expectedString = "txt" );
test(.regex = "e(x)", .replace = "${3}", .string = "text", .expectedString = "t${3}t" );
test(.regex = "e(x)", .replace = "${1}${3}", .string = "text", .expectedString = "tx${3}t" );
test(.regex = "e(x)", .replace = "${1}${name}", .string = "text", .expectedString = "tx${name}t");
test(.regex = "e(?<foo>x)", .replace = "${1}${name}", .string = "text", .expectedString = "tx${name}t");
test(.regex = "e(?<foo>x)", .replace = "${1}${foo}", .string = "text", .expectedString = "txxt" );
test(.regex = "e(?<foo>x)", .replace = "${goll}${foo}", .string = "text", .expectedString = "t${goll}xt");
test(.regex = "e(?<foo>x)", .replace = "${goll${foo}", .string = "text", .expectedString = "t${gollxt" );
test(.regex = "e(?<foo>x)", .replace = "${goll${foo}}", .string = "text", .expectedString = "t${gollx}t");
test(.regex = "e(?<foo>x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" );
test(.regex = "e(?<foo>x)", .replace = "${${foo}}", .string = "text", .expectedString = "t${x}t" );
test(.regex = "e(?<foo>x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" );
test(.regex = "e(?<foo>x)", .replace = "$${bfoo}}", .string = "text", .expectedString = "t${bfoo}}t");
test(.regex = "e(?<foo>x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" );
test(.regex = "e(?<foo>x)", .replace = "$${foo}", .string = "text", .expectedString = "t${foo}t" );
test(.regex = "e(?<foo>x)", .replace = "$$", .string = "text", .expectedString = "t$t" );
test(.regex = "(e)(?<foo>x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "txext" );
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "teext" );
test(.regex = "(e)(?<foo>x)", .replace = "${foo}$1$2$+", .string = "text", .expectedString = "txexxt" );
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$+", .string = "text", .expectedString = "teexxt" );
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$_", .string = "texts", .expectedString = "teextextsts");
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$`", .string = "texts", .expectedString = "teextts" );
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$'", .string = "texts", .expectedString = "teextsts" );
test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$&", .string = "texts", .expectedString = "teexexts" );
test(.regex = "<(.+?)>", .replace = "[$0:$1]", .string = "<i>am not</i>", .expectedString = "[<i>:i]am not[</i>:/i]");
test(.regex = "(?<foo>e)(?<foo>x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "teext", .regexFlags = PCRE_DUPNAMES);
test(.regex = "\b(\w+)(\s)(\w+)\b", .replace = "$3$2$1", .string = "one two", .expectedString = "two one");
test(.regex = "\b(\d+)\s?USD", .replace = "$$$1", .string = "103 USD", .expectedString = "$103" );
test(.regex = "\b(?<w1>\w+)(\s)(?<w2>\w+)\b", .replace = "${w2} ${w1}", .string = "one two", .expectedString = "two one");
test(.regex = "(\$*(\d*(\.+\d+)?){1})", .replace = "**$&", .string = "$1.30", .expectedString = "**$1.30**");
test(.regex = "B+", .replace = "$`", .string = "AABBCC", .expectedString = "AAAACC");
test(.regex = "B+", .replace = "$'", .string = "AABBCC", .expectedString = "AACCCC");
test(.regex = "B+(C+)", .replace = "$+", .string = "AABBCCDD", .expectedString = "AACCDD");
test(.regex = "B+", .replace = "$_", .string = "AABBCC", .expectedString = "AAAABBCCCC");
test(.regex = "(F)(2)(3)(4)(5)(6)(7)(8)(9)(10)(L)\11", .replace = "${S}$11$1", .string = "F2345678910L71", .expectedString = "F2345678910L71"),
test(.regex = "(F)(2)(3)(4)(5)(6)(7)(8)(9)(10)(L)\11", .replace = "${S}$11$1", .string = "F2345678910LL1", .expectedString = "${S}LF1");
}
server_print("^tChecking moar #1...");
{
test(.string = "(?(w)a|o)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?(w)|o)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?(w)a)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?(w)a|)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?(w)?|a|o)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?(w)||o)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?(w)(a)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?(w))\a|)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?(2)a|o)" , .regex = "\(\?\([^^\)]+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?(|)a|o)" , .regex = "\(\?\([^^\)]+\).*\|?.*\)" , .replace = "r", .expectedString = "r");
test(.string = "a\3b" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\(\d+)" , .replace = "\5", .expectedString = "a\5b");
test(.string = "\3b" , .regex = "(?=(?:\A|[^^\\])(?:[\\]{2}){0,3})\\(\d+)" , .replace = "\5", .expectedString = "\5b");
test(.string = "\\\3b" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\(\d+)" , .replace = "\5", .expectedString = "\\\5b");
test(.string = "\\\k<g>" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k<(\w+)>" , .replace = "\5", .expectedString = "\\\5");
test(.string = "\\\\k'g'" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w+)'" , .replace = "\5", .expectedString = "\\\\k'g'");
test(.string = "a\\\\k'g'" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w+)'" , .replace = "\5", .expectedString = "a\\\\k'g'");
test(.string = "\k'g'" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w+)'" , .replace = "\5", .expectedString = "\5");
test(.string = "(?<n1-n2>)" , .regex = "\(\?<[A-Za-z]\w*-[A-Za-z]\w*>.*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?'n1-n2'a)" , .regex = "\(\?'[A-Za-z]\w*-[A-Za-z]\w*'.*\)" , .replace = "r", .expectedString = "r");
test(.string = "\p{Isa}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\\[pP]\{)Is(?=\w+\})", .replace = "In", .expectedString = "\p{Ina}");
test(.string = "\p{Is}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\\[pP]\{)Is(?=\w+\})", .replace = "In", .expectedString = "\p{Is}");
test(.string = "\p{Isa" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\\[pP]\{)Is(?=\w+\})", .replace = "In", .expectedString = "\p{Isa");
test(.string = "a(?#|)" , .regex = "\(\?#[^^\)]*\)" , .replace = "", .expectedString = "a");
test(.string = "(?#|)" , .regex = "\(\?#[^^\)]*\)" , .replace = "", .expectedString = "");
test(.string = "(?#|)" , .regex = "\#[^^\n\r]*" , .replace = "", .expectedString = "(?");
test(.string = "(?inm-xs:\#)" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(r");
test(.string = "(?ni:())" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(r)");
test(.string = "(?x-i:)" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(?x-i:)");
test(.string = "(?n:))" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(?n:))");
test(.string = "(?<n1>)" , .regex = "\(\?<[A-Za-z]\w*>.*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?'n1'y)" , .regex = "\(\?'[A-Za-z]\w*'.*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?<45>y)" , .regex = "\(\?<\d+>.*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?'7'o)" , .regex = "\(\?'\d+'.*\)" , .replace = "r", .expectedString = "r");
test(.string = "\\\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "\\r");
test(.string = "a\\\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "a\\r");
test(.string = "\\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "\r");
test(.string = "a\\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "a\r");
test(.string = "\(" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "r");
test(.string = "a\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "ar");
test(.string = "?:" , .regex = "(?:^^\?[:imnsx=!>-]|^^\?<[!=])" , .replace = "r", .expectedString = "r");
test(.string = "?<!" , .regex = "(?:^^\?[:imnsx=!>-]|^^\?<[!=])" , .replace = "r", .expectedString = "r");
test(.string = "?-" , .regex = "(?:^^\?[:imnsx=!>-]|^^\?<[!=])" , .replace = "r", .expectedString = "r");
test(.string = "\(?<n>" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?<[A-Za-z]\w*>", .replace = "r", .expectedString = "\(r");
test(.string = "a\(?'n'" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?'[A-Za-z]\w*'", .replace = "r", .expectedString = "a\(r");
test(.string = "\\(?<2>" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?<\d+>" , .replace = "r", .expectedString = "\\(r");
test(.string = "(?'2'" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?'\d+'" , .replace = "r", .expectedString = "(r");
test(.string = "\[\b]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\u8]");
test(.string = "\[a\bb]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[a\u8b]");
test(.string = "\[\b]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\u8]");
test(.string = "\[\b]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\u8]");
test(.string = "\[\\b]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\\u8]");
test(.string = "[[]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\\[" , .expectedString = "[\[]");
test(.string = "\[[]" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\[" , .expectedString = "\[[]");
test(.string = "\[\[]" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\[" , .expectedString = "\[\[]");
test(.string = "\[\[]" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\[" , .expectedString = "\[\[]");
test(.string = "\{" , .regex = "(?=(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\\{");
test(.string = "\{" , .regex = "(?=(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\\{");
test(.string = "\{1,2}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\{1,2}");
test(.string = "\{1}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\{1}");
test(.string = "\{1,}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\{1,}");
test(.string = "\{1" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\\{", .expectedString = "\{1");
test(.string = "\\(?!{1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "\5", .expectedString = "?!");
test(.string = "{1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "r");
test(.string = "({1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "r");
test(.string = "(?{1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "(?{1}");
test(.string = "(?:{1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "r");
test(.string = "\({1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "\({1}");
test(.string = "\p{Isa}" , .regex = "(?!\\[pP]\{)Is(?=\w+\})" , .replace = "In", .expectedString = "\p{Ina}");
test(.string = "\p{Is}" , .regex = "(?!\\[pP]\{)Is(?=\w+\})" , .replace = "In", .expectedString = "\p{Is}");
test(.string = "\p{Isa" , .regex = "(?!\\[pP]\{)Is(?=\w+\})" , .replace = "In", .expectedString = "\p{Isa");
test(.string = "\}" , .regex = "(?!(\\A|[^^\\])(\\{2})*\\{\\d\\d*(,(\\d\\d*)?)?)\\}", .replace = "\\}", .expectedString = "\}");
test(.string = "{\}" , .regex = "(?!(\A|[^^\^^])(\^^{2})*\{\d\d*(,(\d\d*)?)?)\}", .replace = "\\}", .expectedString = "{\\}");
test(.string = "{1,2}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "{1,2\}");
test(.string = "\{1}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "\{1\}");
test(.string = "\{1\}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "\{1\\}");
test(.string = "\{1}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "\{1\}");
test(.string = "{1,}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "{1,\}");
test(.string = "a(?<!b*c)" , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)" , .replace = "r", .expectedString = "ar");
test(.string = "a(?<!b+c)" , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)" , .replace = "r", .expectedString = "ar");
test(.string = "(?<!b{1}c))" , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)" , .replace = "r", .expectedString = "(?<!b{1}c))");
test(.string = "(?<!b{1,}c)" , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)" , .replace = "r", .expectedString = "r");
test(.string = "(?<!b{1,4}c)" , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)" , .replace = "r", .expectedString = "(?<!b{1,4}c)");
test(.string = "a\3b" , .regex = "\\(\d+)" , .replace = "\5", .expectedString = "a\5b");
test(.string = "\3b" , .regex = "\\(\d+)" , .replace = "\5", .expectedString = "\5b");
test(.string = "\\3b" , .regex = "(?!\\\\)\\(\d)" , .replace = "\5", . expectedString = "\\5b");
test(.string = "a\\3b" , .regex = "(?:(\\){0,3})\\(\d)" , .replace = "\5", . expectedString = "a\5b");
test(.string = "\\k<g>" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k<(\w)>" , .replace = "\5", .expectedString = "\\5");
test(.string = "a\\k<g>" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k<(\w)>" , .replace = "\5", .expectedString = "a\\5");
test(.string = "\\k'g'" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w)'" , .replace = "\5", .expectedString = "\\5");
test(.string = "a\\k'g'" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w)'" , .replace = "\5", .expectedString = "a\\5");
test(.string = "\k'g'" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w)'" , .replace = "\5", .expectedString = "\5");
}
server_print("^tChecking moar #2...");
{
test(.regex = "^^((?>[a-zA-Z\d!#$%&'*+\-\/=?^^_`{|}~]+\x20*|^"((?=[\x01-\x7f])[^^^"\\]|\\[\x01-\x7f])*^"\x20*)*(?<angle><))?((?!\.)(?>\.?[a-zA-Z\d!#$%&'*+\-\/=?^^_`{|}~]+)+|^"((?=[\x01-\x7f])[^^^"\\]|\\[\x01-\x7f])*^")@(((?!-)[a-zA-Z\d\-]+(?<!-)\.)+[a-zA-Z]{2,}|\[(((?(?<!\[)\.)(25[0-5]|2[0-4]\d|[01]?\d?\d)){4}|[a-zA-Z\d\-]*[a-zA-Z\d]:((?=[\x01-\x7f])[^^\\\[\]]|\\[\x01-\x7f])+)\])(?(angle)>)$" ,
.replace = "$1$4@$7net>",
.string = "Name Surname <name.surname@blah.com>",
.expectedString = "Name Surname <name.surname@blah.net>"
);
test(.regex = "([A-Z])\w+",
.replace = "*snip*",
.string = "Welcome to RegExr v2.0 by gskinner.com!\
\
Edit the Expression & Text to see matches. Roll over matches or the expression for details. Undo mistakes with ctrl-z. Save & Share expressions with friends or the Community. A full Reference & Help is available in the Library, or watch the video Tutorial.\
\
Sample text for testing:\
abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ\
:0123456789 +-.,!@#$%^^&*();\/|<>^"'\
12345 -98.7 3.141 .6180 9,000 +42\
555.123.4567 +1-(800)-555-2468\
foo@demo.net bar.ba@test.co.uk\
www.demo.com http://foo.co.uk/\
http://regexr.com/foo.html?q=bar",
.expectedString = "*snip* to *snip* v2.0 by gskinner.com!\
\
*snip* the *snip* & *snip* to see matches. *snip* over matches or the expression for details. *snip* mistakes with ctrl-z. *snip* & *snip* expressions with friends or the *snip*. A full *snip* & *snip* is available in the *snip*, or watch the video *snip*.\
\
*snip* text for testing:\
abcdefghijklmnopqrstuvwxyz *snip*\
:0123456789 +-.,!@#$%^^&*();\/|<>^"'\
12345 -98.7 3.141 .6180 9,000 +42\
555.123.4567 +1-(800)-555-2468\
foo@demo.net bar.ba@test.co.uk\
www.de",
.regexFlags = PCRE_EXTENDED
);
test(.regex = "/\*(?>[^^*/]+|\*[^^/]|/[^^*]|/\*(?>[^^*/]+|\*[^^/]|/[^^*])*\*/)*\*/",
.replace = "",
.string = "/* comment */\
no comment\
/* comment\
spanning\
multiple\
lines */\
/* comment /* nesting */ of /* two */ levels supported */\
/* comment /* nesting */ of /* /* more than */ two levels */ not supported */",
.expectedString = "no comment\
/* comment of not supported */"
);
test(.regex = "\b(?<protocol>https?|ftp)://(?<domain>[A-Z0-9.-]+)(?<file>/[A-Z0-9+&@#/%=~_|!:,.;-]*)?(?<parameters>\?[A-Z0-9+&@#/%=~_|!:,.;]*)?",
.replace = "${protocol}s://site.com${file}^n",
.string = "http://www.alliedmods.net http://www.alliedmods.net/ http://www.alliedmods.net/test.php http://www.alliedmods.net/index.php?secret=x Something interesting at http://www.alliedmods.net.",
.expectedString = "https://site.com^nhttps://site.com/^nhttps://site.com/test.php^nhttps://site.com/index.php^nhttps://site.com^n",
.regexFlags = PCRE_CASELESS | PCRE_EXTENDED,
.formatFlags = REGEX_FORMAT_NOCOPY
);
test(.regex = "\b(https?|ftp)://([A-Z0-9.-]+)(/[A-Z0-9+&@#/%=~_|!:,.;-]*)?(\?[A-Z0-9+&@#/%=~_|!:,.;]*)?",
.replace = "$1s://site.com$3^n",
.string = "http://www.alliedmods.net http://www.alliedmods.net/ http://www.alliedmods.net/test.php http://www.alliedmods.net/index.php?secret=x Something interesting at http://www.alliedmods.net.",
.expectedString = "https://site.com^nhttps://site.com/^nhttps://site.com/test.php^nhttps://site.com/index.php^nhttps://site.com^n",
.regexFlags = PCRE_CASELESS | PCRE_EXTENDED,
.formatFlags = REGEX_FORMAT_NOCOPY
);
test(.regex = "\b(https?|ftp)://([A-Z0-9.-]+)(/[A-Z0-9+&@#/%=~_|!:,.;-]*)?(\?[A-Z0-9+&@#/%=~_|!:,.;]*)?",
.replace = "$1s://site.com$3^n",
.string = "http://www.alliedmods.net http://www.alliedmods.net/ http://www.alliedmods.net/test.php http://www.alliedmods.net/index.php?secret=x Something interesting at http://www.alliedmods.net.",
.expectedString = "https://site.com^n",
.regexFlags = PCRE_CASELESS | PCRE_EXTENDED,
.formatFlags = REGEX_FORMAT_NOCOPY | REGEX_FORMAT_FIRSTONLY
);
test(.regex = "^^(.++)\r?\n(?=(?:^^(?!\1$).*+\r?\n)*+\1$)",
.replace = "",
.string = "one^n\
two^n\
three^n\
four^n\
two^n\
three^n\
four^n\
three^n\
four^n\
four",
.expectedString = "one^n\
two^n\
three^n\
four",
.regexFlags = PCRE_EXTENDED | PCRE_MULTILINE
);
}
end();
}

View File

@ -225,6 +225,7 @@ scripting_files = [
'testsuite/menutest.sma',
'testsuite/native_test.sma',
'testsuite/nvault_test.sma',
'testsuite/regex_test.sma',
'testsuite/sorttest.sma',
'testsuite/strbreak.sma',
'testsuite/sqlxtest.sma',