diff --git a/dlls/regex/AMBuilder b/dlls/regex/AMBuilder index 9c79e1c7..ae88213a 100644 --- a/dlls/regex/AMBuilder +++ b/dlls/regex/AMBuilder @@ -18,6 +18,7 @@ binary.sources = [ 'sdk/amxxmodule.cpp', 'module.cpp', 'CRegEx.cpp', + 'utils.cpp', ] AMXX.modules += [builder.Add(binary)] diff --git a/dlls/regex/CRegEx.cpp b/dlls/regex/CRegEx.cpp index 4073621f..1a1cfdef 100755 --- a/dlls/regex/CRegEx.cpp +++ b/dlls/regex/CRegEx.cpp @@ -30,10 +30,12 @@ * you do not wish to do so, delete this exception statement from your * version. */ +#include "amxxmodule.h" #include "pcre.h" #include "CRegEx.h" #include -#include "amxxmodule.h" +#include +#include "utils.h" RegEx::RegEx() { @@ -43,6 +45,9 @@ RegEx::RegEx() mFree = true; subject = NULL; mSubStrings.clear(); + mMatchesSubs.clear(); + mSubsNameTable.clear(); + mNumSubpatterns = 0; } void RegEx::Clear() @@ -57,6 +62,9 @@ void RegEx::Clear() delete[] subject; subject = NULL; mSubStrings.clear(); + mMatchesSubs.clear(); + mSubsNameTable.clear(); + mNumSubpatterns = 0; } RegEx::~RegEx() @@ -143,6 +151,19 @@ int RegEx::Compile(const char *pattern, int iFlags) mFree = false; + /** + * Retrieve the number of captured groups + * including the full match. + */ + pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &mNumSubpatterns); + ++mNumSubpatterns; + + /** + * Build the table with the named groups, + * which contain an index and a name per group. + */ + MakeSubpatternsTable(mNumSubpatterns); + return 1; } @@ -153,13 +174,13 @@ int RegEx::Match(const char *str) if (mFree || re == NULL) return -1; - this->ClearMatch(); + ClearMatch(); //save str subject = new char[strlen(str) + 1]; strcpy(subject, str); - rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30); + rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, REGEX_MAX_SUBPATTERNS); if (rc < 0) { @@ -188,61 +209,103 @@ int RegEx::Match(const char *str) int RegEx::MatchAll(const char *str) { - int rc = 0; int rr = 0; - int offset = 0; + int rc = 0; + int startOffset = 0; + int exoptions = 0; + int notEmpty = 0; + int sizeOffsets = mNumSubpatterns * 3; + int subjectLen = strlen(str); if (mFree || re == NULL) + { return -1; + } - this->ClearMatch(); - - //save str - subject = new char[strlen(str) + 1]; + ClearMatch(); + + subject = new char[subjectLen + 1]; strcpy(subject, str); - RegExSub sub, whole; - while ((rr = pcre_exec(re, NULL, subject, (int)strlen(subject), offset, 0, ovector, 30))) + RegExSub sub; + + while (1) { - if (rr < 0) + rr = pcre_exec(re, NULL, subject, (int)subjectLen, startOffset, exoptions | notEmpty, ovector, REGEX_MAX_SUBPATTERNS); + + /** + * The string was already proved to be valid UTF-8 + */ + exoptions |= PCRE_NO_UTF8_CHECK; + + /** + * Too many substrings + */ + if (rr == 0) { - if (rr == PCRE_ERROR_NOMATCH) + rr = sizeOffsets / 3; + } + + if (rr > 0) + { + mMatchesSubs.append(rr); + + for (int s = 0; s < rr; ++s) { - break; + sub.start = ovector[2 * s]; + sub.end = ovector[2 * s + 1]; + + mSubStrings.append(sub); + } + } + else if (rr == PCRE_ERROR_NOMATCH) + { + /** + * If we previously set PCRE_NOTEMPTY after a null match, + * this is not necessarily the end. We need to advance + * the start offset, and continue. Fudge the offset values + * to achieve this, unless we're already at the end of the string. + */ + if (notEmpty && startOffset < (int)subjectLen) + { + ovector[0] = startOffset; + ovector[1] = startOffset + 1; } else { - mErrorOffset = rr; - - if (rc) - this->ClearMatch(); - - return -1; + break; } } - - rc += rr; - mSubStrings.ensure(rc); - - for (int s = 1; s < rr; ++s) + else { - sub.start = ovector[2 * s]; - sub.end = ovector[2 * s + 1]; - mSubStrings.append(sub); + mErrorOffset = rr; + + if (mMatchesSubs.length()) + { + ClearMatch(); + } + + return -1; } - offset = ovector[1]; + /** + * If we have matched an empty string, mimic what Perl's /g options does. + * This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try + * the match again at the same point. If this fails (picked up above) we + * advance to the next character. + */ + notEmpty = (ovector[1] == ovector[0]) ? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; + + /** + * Advance to the next piece. + */ + startOffset = ovector[1]; } - if (!rc) + if (!mMatchesSubs.length()) + { return 0; - - sub = mSubStrings.at(0); - whole.start = sub.start; - sub = mSubStrings.back(); - whole.end = sub.end; - - mSubStrings.insert(0, whole); + } return 1; } @@ -256,18 +319,14 @@ void RegEx::ClearMatch() delete[] subject; subject = NULL; mSubStrings.clear(); + mMatchesSubs.clear(); } -const char *RegEx::GetSubstring(int s, char buffer[], int max) +const char *getSubstring(char *subject, size_t start, size_t end, char buffer[], size_t max, size_t *outlen) { - int i = 0; - if ((size_t)s >= mSubStrings.length() || s < 0) - return NULL; - - RegExSub sub = mSubStrings.at(s); - - char *substr_a = subject + sub.start; - int substr_l = sub.end - sub.start; + size_t i; + char * substr_a = subject + start; + size_t substr_l = end - start; for (i = 0; i < substr_l; i++) { @@ -278,5 +337,516 @@ const char *RegEx::GetSubstring(int s, char buffer[], int max) buffer[i] = '\0'; + if (outlen) + { + *outlen = i; + } + return buffer; -} \ No newline at end of file +} + +const char *RegEx::GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen) +{ + if (start < 0 || start >= mSubStrings.length()) + { + return NULL; + } + + RegExSub sub = mSubStrings.at(start); + + return getSubstring(subject, sub.start, sub.end, buffer, max, outlen); +} + +const char *RegEx::GetSubstring(size_t startOffset, size_t endOffset, char buffer[], size_t max, size_t *outlen) +{ + if (startOffset < 0 || endOffset < 0) + { + return NULL; + } + + return getSubstring(subject, startOffset, endOffset, buffer, max, outlen); +} + +void RegEx::MakeSubpatternsTable(int numSubpatterns) +{ + int nameCount = 0; + int rc = pcre_fullinfo(re, NULL, PCRE_INFO_NAMECOUNT, &nameCount); + + if (rc < 0) + { + return; + } + + if (nameCount > 0) + { + const char *nameTable; + int nameSize = 0; + int i = 0; + + int rc1 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMETABLE, &nameTable); + int rc2 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameSize); + + rc = rc2 ? rc2 : rc1; + + if (rc < 0) + { + mSubsNameTable.clear(); + return; + } + + NamedGroup data; + + while (i++ < nameCount) + { + data.index = 0xff * (unsigned char)nameTable[0] + (unsigned char)nameTable[1]; + data.name = nameTable + 2; + + mSubsNameTable.append(ke::Move(data)); + nameTable += nameSize; + } + } +} + +int RegEx::Replace(char *text, size_t textMaxLen, const char *replace, size_t replaceLen, int flags) +{ + char *output = text; + + /** + * Retrieve all matches and store them in + * mSubStrings list. + */ + if (MatchAll(output) == -1) + { + return -1; + } + + size_t subjectLen = strlen(subject); + size_t total = 0; + size_t baseIndex = 0; + size_t diffLength = 0; + + char *toReplace = new char[textMaxLen + 1]; + char *toSearch = NULL; + + /** + * All characters which is not matched are not copied when replacing matches. + * Then original text (output buffer) should be considerated as empty. + */ + if (flags & REGEX_FORMAT_NOCOPY) + { + *output = '\0'; + } + else + { + /** + * This is used only when we do replace matches. + */ + toSearch = new char[textMaxLen + 1]; + } + + /** + * Loop over all matches found. + */ + for (size_t i = 0; i < mMatchesSubs.length(); ++i) + { + char *ptr = toReplace; + + size_t browsed = 0; + size_t searchLen = 0; + size_t length = 0; + + /** + * Build the replace string as it can contain backreference + * and this needs to be parsed. + */ + for (const char *s = replace, *end = s + replaceLen; s < end && browsed <= textMaxLen; ++s, ++browsed) + { + unsigned int c = *s; + + /** + * Supported format specifiers: + * + * $number : Substitutes the substring matched by group number. + * n must be an integer value designating a valid backreference, greater than 0, and of two digits at most. + * ${name} : Substitutes the substring matched by the named group name (a maximum of 32 characters). + * $& : Substitutes a copy of the whole match. + * $` : Substitutes all the text of the input string before the match. + * $' : Substitutes all the text of the input string after the match. + * $+ : Substitutes the last group that was captured. + * $_ : Substitutes the entire input string. + * $$ : Substitutes a literal "$". + */ + if (c == '$' || c == '\\') + { + switch (*++s) + { + case '\0': + { + /** + * End of string. + * Copy one character. + */ + *(ptr + browsed) = c; + break; + } + case '&': + { + /** + * Concatenate retrieved full match sub-string. + * length - 1 to overwrite EOS. + */ + GetSubstring(baseIndex, ptr + browsed, textMaxLen, &length); + browsed += length - 1; + break; + } + case '`': + { + /** + * Concatenate part of original text up to + * first sub-string position. + */ + length = mSubStrings.at(baseIndex).start; + memcpy(ptr + browsed, subject, length); + browsed += length - 1; + break; + } + case '\'': + { + /** + * Concatenate part of original text from + * last sub-string end position to EOS. + */ + length = mSubStrings.at(baseIndex).end; + memcpy(ptr + browsed, subject + length, subjectLen - length); + browsed += (subjectLen - length) - 1; + break; + } + case '+': + { + /** + * Copy the last group that was captured. + */ + GetSubstring(baseIndex + mMatchesSubs.at(i) - 1, ptr + browsed, textMaxLen, &length); + browsed += length - 1; + break; + } + case '_': + { + /** + * Copy the entire input string. + */ + memcpy(ptr + browsed, subject, subjectLen); + browsed += (subjectLen - 1); + break; + } + case '$': + case '\\': + { + /** + * Copy the single character $ or \. + */ + *(ptr + browsed) = c; + break; + } + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '{': + { + /** + * Checking backreference. + * Which can be either $n, ${n} or ${name}. + */ + size_t backref = -1; + const char *walk = s; + bool inBrace = false; + bool nameCheck = false; + + /** + * ${nn}. + * ^ + */ + if (*walk == '{') + { + inBrace = true; + ++walk; + } + + /** + * Valid number. + * $nn or ${nn} + * ^ ^ + */ + if (*walk >= '0' && *walk <= '9') + { + backref = *walk - '0'; + ++walk; + } + else if (inBrace) + { + nameCheck = true; + + /** + * Not a valid number. + * Checking as string. + * ${name} + * ^ + */ + if (*walk) + { + const char *pch = strchr(walk, '}'); + + if (pch != NULL) + { + /** + * A named group maximum character is 32 (PCRE). + */ + char name[32]; + size_t nameLength = strncopy(name, walk, pch - walk + 1); + + int flags, num = 0; + pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &flags); + + /** + * If PCRE_DUPNAMES is set, the pcre_copy_named_substring function should be used + * as pcre_get_stringnumber output order is not defined. + */ + if (flags & PCRE_DUPNAMES) + { + memset(ovector, 0, REGEX_MAX_SUBPATTERNS); + + /** + * pcre_copy_named_substring needs a vector containing sub-patterns ranges + * for a given match. + */ + for (size_t j = 0; j < mMatchesSubs.at(i); ++j) + { + ovector[2 * j] = mSubStrings.at(baseIndex + j).start; + ovector[2 * j + 1] = mSubStrings.at(baseIndex + j).end; + } + + num = pcre_copy_named_substring(re, subject, ovector, mMatchesSubs.at(i), name, ptr + browsed, (int)textMaxLen); + + if (num != PCRE_ERROR_NOSUBSTRING) + { + browsed += num - 1; + s = pch; + break; + } + ++pch; + } + else + { + /** + * Retrieve sub-pattern index from a give name. + */ + num = pcre_get_stringnumber(re, name); + if (num != PCRE_ERROR_NOSUBSTRING) + { + backref = num; + walk = ++pch; + } + } + + if (num == PCRE_ERROR_NOSUBSTRING || num >= (int)mMatchesSubs.at(i)) + { + /** + * If a sub-string for a given match is not found, or if > to + * number of sub-patterns we still need to check if this + * group name is a valid one because if so we want to escape it. + * Looking at the name table. + */ + bool found = false; + for (size_t i = 0; i < mSubsNameTable.length(); ++i) + { + if (!mSubsNameTable.at(i).name.compare(name)) + { + --browsed; + s = --pch; + found = true; + break; + } + } + + if (found) + { + continue; + } + } + } + } + } + + if (!nameCheck) + { + /** + * Valid second number. + * $nn or ${nn} + * ^ ^ + */ + if (*walk && *walk >= '0' && *walk <= '9') + { + backref = backref * 10 + *walk - '0'; + ++walk; + } + + if (inBrace) + { + /** + * Invalid specifier + * Either hit EOS or missing }. + * ${n or ${nn or ${nx or ${nnx + * ^ ^ ^ ^ + */ + if (*walk == '\0' || *walk != '}') + { + backref = -1; + } + else + { + ++walk; + } + } + } + + length = walk - s; + s = --walk; + + /** + * We can't provide a capture number >= to total that pcre_exec has found. + * 0 is implicitly accepted, same behavior as $&. + */ + if (backref >= 0 && (int)backref < mNumSubpatterns) + { + /** + * Valid available index for a given match. + */ + if (backref < mMatchesSubs.at(i)) + { + /** + * Concatenate retrieved sub-string. + * length - 1 to overwrite EOS. + */ + GetSubstring(baseIndex + backref, ptr + browsed, textMaxLen, &length); + browsed += length - 1; + } + else + { + /** + * Valid unavailable index for a given match. + */ + --browsed; + } + } + else + { + /** + * If we here it means the syntax is valid but sub-pattern doesn't exist. + * So, copy as it is, including $. + */ + memcpy(ptr + browsed, s - length, length + 1); + browsed += length; + } + + break; + } + default: + { + /** + * Not a valid format modifier. + * So we copy characters as it is. + */ + *(ptr + browsed) = *s; + break; + } + } + } + else + { + /** + * At this point, direct copy. + */ + *(ptr + browsed) = c; + } + } + + *(ptr + browsed) = '\0'; + + /** + * Concatenate only replace string of each match, + * as we don't want to copy unmatched characters. + */ + if (flags & REGEX_FORMAT_NOCOPY) + { + /** + * We want just the first occurrence. + */ + if (total++ && (flags & REGEX_FORMAT_FIRSTONLY)) + { + break; + } + + strncat(output, toReplace, textMaxLen + 1); + } + else + { + /** + * Retrieves full string of a given match. + */ + const char *search = GetSubstring(baseIndex, toSearch, textMaxLen, &searchLen); + + /** + * We get something to replace, but the sub-pattern to search is empty. + * We insert replacement either a the start end or string. + */ + if (*toReplace && !searchLen) + { + if (output - text > 0) + { + strncat(output, toReplace, textMaxLen); + } + else + { + strncat(toReplace, text, textMaxLen); + strncopy(text, toReplace, strlen(toReplace) + 1); + } + + ++total; + } + else if ((output = UTIL_ReplaceEx(text + mSubStrings.at(baseIndex).start + diffLength, textMaxLen, search, searchLen, toReplace, browsed, false)) != NULL) + { + /** + * Then we simply do a replace. + * Probably not the most efficient, but this should be at least safe. + * To avoid issue where the function could find a string which is not at the expected index, + * We force the input string to start from index of the full match. + */ + ++total; + } + + if (total && (flags & REGEX_FORMAT_FIRSTONLY)) + { + break; + } + } + + /** + * mMatchesSubs is a flat list containing all sub-patterns of all matches. + * A number of sub-patterns can vary per match. So we calculate the position in the list, + * from where the first sub-pattern result of current match starts. + */ + baseIndex += mMatchesSubs.at(i); + diffLength += browsed - searchLen; + } + + delete[] toReplace; + + if (toSearch != NULL) + { + delete[] toSearch; + } + + /** + * Return the number of successful replacements. + */ + return total; +} diff --git a/dlls/regex/CRegEx.h b/dlls/regex/CRegEx.h index ed88824f..d1f19912 100755 --- a/dlls/regex/CRegEx.h +++ b/dlls/regex/CRegEx.h @@ -34,6 +34,19 @@ #define _INCLUDE_CREGEX_H #include +#include + +/** + * Maximum number of sub-patterns, here 50 (this should be a multiple of 3). + */ +#define REGEX_MAX_SUBPATTERNS 150 + +/** + * Flags to used with regex_replace, to control the replacement behavior. + */ +#define REGEX_FORMAT_DEFAULT 0 // Uses the standard formatting rules to replace matches. +#define REGEX_FORMAT_NOCOPY 1 // The sections that do not match the regular expression are not copied when replacing matches. +#define REGEX_FORMAT_FIRSTONLY 2 // Only the first occurrence of a regular expression is replaced. class RegEx { @@ -42,6 +55,11 @@ public: int start, end; }; + struct NamedGroup { + ke::AString name; + size_t index; + }; + RegEx(); ~RegEx(); @@ -52,8 +70,11 @@ public: int Compile(const char *pattern, int iFlags); int Match(const char *str); int MatchAll(const char *str); + int Replace(char *text, size_t text_maxlen, const char *replace, size_t replaceLen, int flags = 0); void ClearMatch(); - const char *GetSubstring(int s, char buffer[], int max); + const char *GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen = NULL); + const char *GetSubstring(size_t start, size_t end, char buffer[], size_t max, size_t *outlen = NULL); + void MakeSubpatternsTable(int numSubpatterns); public: int mErrorOffset; @@ -63,9 +84,12 @@ public: private: pcre *re; bool mFree; - int ovector[30]; + int ovector[REGEX_MAX_SUBPATTERNS]; char *subject; ke::Vector mSubStrings; + ke::Vector mMatchesSubs; + ke::Vector mSubsNameTable; + int mNumSubpatterns; }; #endif //_INCLUDE_CREGEX_H diff --git a/dlls/regex/module.cpp b/dlls/regex/module.cpp index 9bb08737..70fbe8b1 100755 --- a/dlls/regex/module.cpp +++ b/dlls/regex/module.cpp @@ -153,6 +153,8 @@ cell match(AMX *amx, cell *params, bool all) else { *errorCode = x->Count(); + if (all) + return x->Count(); } return id + 1; @@ -272,6 +274,43 @@ static cell AMX_NATIVE_CALL regex_free(AMX *amx, cell *params) return 1; } +//native regex_replace(Regex:pattern, string[], maxLen, const replace[], flags = REGEX_FORMAT_DEFAULT, &errcode = 0); +static cell AMX_NATIVE_CALL regex_replace(AMX *amx, cell *params) +{ + int id = params[1] - 1; + if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree()) + { + MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id); + return 0; + } + + int textLen, replaceLen; + char *text = MF_GetAmxString(amx, params[2], 0, &textLen); + const char *replace = MF_GetAmxString(amx, params[4], 1, &replaceLen); + + cell *erroCode = MF_GetAmxAddr(amx, params[6]); + + RegEx *x = PEL[id]; + int e = x->Replace(text, params[3] + 1, replace, replaceLen, params[5]); + + if (e == -1) + { + *erroCode = x->mErrorOffset; + x->ClearMatch(); + return -2; + } + else if (e == 0) + { + *erroCode = 0; + x->ClearMatch(); + return 0; + } + + MF_SetAmxString(amx, params[2], text, params[3]); + + return e; +} + AMX_NATIVE_INFO regex_Natives[] = { {"regex_compile", regex_compile}, {"regex_compile_ex", regex_compile_ex}, @@ -280,6 +319,7 @@ AMX_NATIVE_INFO regex_Natives[] = { {"regex_match_all", regex_match_all}, {"regex_match_all_c", regex_match_all_c}, {"regex_substr", regex_substr}, + {"regex_replace", regex_replace}, {"regex_free", regex_free}, {NULL, NULL}, }; diff --git a/dlls/regex/msvc10/regex.vcxproj b/dlls/regex/msvc10/regex.vcxproj index f2255e89..96257caf 100644 --- a/dlls/regex/msvc10/regex.vcxproj +++ b/dlls/regex/msvc10/regex.vcxproj @@ -99,6 +99,7 @@ + @@ -107,6 +108,7 @@ + diff --git a/dlls/regex/msvc10/regex.vcxproj.filters b/dlls/regex/msvc10/regex.vcxproj.filters index 2f71fd98..b3be6a74 100644 --- a/dlls/regex/msvc10/regex.vcxproj.filters +++ b/dlls/regex/msvc10/regex.vcxproj.filters @@ -32,6 +32,9 @@ Module SDK\SDK Base + + Source Files + @@ -52,6 +55,9 @@ Module SDK\SDK Base + + Header Files + diff --git a/dlls/regex/utils.cpp b/dlls/regex/utils.cpp new file mode 100644 index 00000000..9b9debc0 --- /dev/null +++ b/dlls/regex/utils.cpp @@ -0,0 +1,236 @@ + +#include "amxxmodule.h" +#include +#include "utils.h" + +int UTIL_CheckValidChar(char *c) +{ + int count; + int bytecount = 0; + + for (count = 1; (*c & 0xC0) == 0x80; count++) + { + c--; + } + + switch (*c & 0xF0) + { + case 0xC0: + case 0xD0: + { + bytecount = 2; + break; + } + case 0xE0: + { + bytecount = 3; + break; + } + case 0xF0: + { + bytecount = 4; + break; + } + } + + if (bytecount != count) + { + return count; + } + + return 0; +} + +unsigned int strncopy(char *dest, const char *src, size_t count) +{ + if (!count) + { + return 0; + } + + char *start = dest; + while ((*src) && (--count)) + { + *dest++ = *src++; + } + *dest = '\0'; + + return (dest - start); +} + +/** +* NOTE: Do not edit this for the love of god unless you have +* read the test cases and understand the code behind each one. +* While I don't guarantee there aren't mistakes, I do guarantee +* that plugins will end up relying on tiny idiosyncrasies of this +* function, just like they did with AMX Mod X. +* +* There are explicitly more cases than the AMX Mod X version because +* we're not doing a blind copy. Each case is specifically optimized +* for what needs to be done. Even better, we don't have to error on +* bad buffer sizes. Instead, this function will smartly cut off the +* string in a way that pushes old data out. +*/ +char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive) +{ + char *ptr = subject; + size_t browsed = 0; + size_t textLen = strlen(subject); + + /* It's not possible to search or replace */ + if (searchLen > textLen) + { + return NULL; + } + + /* Handle the case of one byte replacement. + * It's only valid in one case. + */ + if (maxLen == 1) + { + /* If the search matches and the replace length is 0, + * we can just terminate the string and be done. + */ + if ((caseSensitive ? strcmp(subject, search) : stricmp(subject, search)) == 0 && replaceLen == 0) + { + *subject = '\0'; + return subject; + } + else + { + return NULL; + } + } + + /* Subtract one off the maxlength so we can include the null terminator */ + maxLen--; + + while (*ptr != '\0' && (browsed <= textLen - searchLen)) + { + /* See if we get a comparison */ + if ((caseSensitive ? strncmp(ptr, search, searchLen) : strnicmp(ptr, search, searchLen)) == 0) + { + if (replaceLen > searchLen) + { + /* First, see if we have enough space to do this operation */ + if (maxLen - textLen < replaceLen - searchLen) + { + /* First, see if the replacement length goes out of bounds. */ + if (browsed + replaceLen >= maxLen) + { + /* EXAMPLE CASE: + * Subject: AABBBCCC + * Buffer : 12 bytes + * Search : BBB + * Replace: DDDDDDDDDD + * OUTPUT : AADDDDDDDDD + * POSITION: ^ + */ + /* If it does, we'll just bound the length and do a strcpy. */ + replaceLen = maxLen - browsed; + + /* Note, we add one to the final result for the null terminator */ + strncopy(ptr, replace, replaceLen + 1); + + /* Don't truncate a multi-byte character */ + if (*(ptr + replaceLen - 1) & 1 << 7) + { + replaceLen -= UTIL_CheckValidChar(ptr + replaceLen - 1); + *(ptr + replaceLen) = '\0'; + } + } + else + { + /* EXAMPLE CASE: + * Subject: AABBBCCC + * Buffer : 12 bytes + * Search : BBB + * Replace: DDDDDDD + * OUTPUT : AADDDDDDDCC + * POSITION: ^ + */ + /* We're going to have some bytes left over... */ + size_t origBytesToCopy = (textLen - (browsed + searchLen)) + 1; + size_t realBytesToCopy = (maxLen - (browsed + replaceLen)) + 1; + char *moveFrom = ptr + searchLen + (origBytesToCopy - realBytesToCopy); + char *moveTo = ptr + replaceLen; + + /* First, move our old data out of the way. */ + memmove(moveTo, moveFrom, realBytesToCopy); + + /* Now, do our replacement. */ + memcpy(ptr, replace, replaceLen); + } + } + else + { + /* EXAMPLE CASE: + * Subject: AABBBCCC + * Buffer : 12 bytes + * Search : BBB + * Replace: DDDD + * OUTPUT : AADDDDCCC + * POSITION: ^ + */ + /* Yes, we have enough space. Do a normal move operation. */ + char *moveFrom = ptr + searchLen; + char *moveTo = ptr + replaceLen; + + /* First move our old data out of the way. */ + size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1; + memmove(moveTo, moveFrom, bytesToCopy); + + /* Now do our replacement. */ + memcpy(ptr, replace, replaceLen); + } + } + else if (replaceLen < searchLen) + { + /* EXAMPLE CASE: + * Subject: AABBBCCC + * Buffer : 12 bytes + * Search : BBB + * Replace: D + * OUTPUT : AADCCC + * POSITION: ^ + */ + /* If the replacement does not grow the string length, we do not + * need to do any fancy checking at all. Yay! + */ + char *moveFrom = ptr + searchLen; /* Start after the search pointer */ + char *moveTo = ptr + replaceLen; /* Copy to where the replacement ends */ + + /* Copy our replacement in, if any */ + if (replaceLen) + { + memcpy(ptr, replace, replaceLen); + } + + /* Figure out how many bytes to move down, including null terminator */ + size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1; + + /* Move the rest of the string down */ + memmove(moveTo, moveFrom, bytesToCopy); + } + else + { + /* EXAMPLE CASE: + * Subject: AABBBCCC + * Buffer : 12 bytes + * Search : BBB + * Replace: DDD + * OUTPUT : AADDDCCC + * POSITION: ^ + */ + /* We don't have to move anything around, just do a straight copy */ + memcpy(ptr, replace, replaceLen); + } + + return ptr + replaceLen; + } + ptr++; + browsed++; + } + + return NULL; +} \ No newline at end of file diff --git a/dlls/regex/utils.h b/dlls/regex/utils.h new file mode 100644 index 00000000..05644312 --- /dev/null +++ b/dlls/regex/utils.h @@ -0,0 +1,8 @@ +#ifndef UTILS_H +#define UTILS_H + +int UTIL_CheckValidChar(char *c); +char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive); +unsigned int strncopy(char *dest, const char *src, size_t count); + +#endif // UTILS_H \ No newline at end of file diff --git a/plugins/include/regex.inc b/plugins/include/regex.inc index 9bff0b08..1f22a722 100755 --- a/plugins/include/regex.inc +++ b/plugins/include/regex.inc @@ -44,10 +44,10 @@ enum Regex { - REGEX_MATCH_FAIL = -2, - REGEX_PATTERN_FAIL, - REGEX_NO_MATCH, - REGEX_OK + REGEX_MATCH_FAIL = -2, + REGEX_PATTERN_FAIL = -1, + REGEX_NO_MATCH = 0, + REGEX_OK = 1 }; /** @@ -231,8 +231,7 @@ native regex_free(&Regex:id); * @note Use this if you intend on using the ame expression multiple times. * Pass the regex handle returned here to regex_match_ex() to check for matches. * - * @note Unlike regex_compile(), this allows you to use directly PCRE flags, and - * to get a more complete set of regular expression error codes. + * @note Unlike regex_compile(), this allows you to use directly PCRE flags. * * @param pattern The regular expression pattern. * @param flags General flags for the regular expression, see PCRE_* defines. @@ -306,6 +305,7 @@ native Regex:regex_match_all(const string[], const pattern[], flags = 0, error[] * @param flags General flags for the regular expression. * @param error Error message, if applicable. * @param maxLen Maximum length of the error buffer. + * @param errcode Regex type error code encountered, if applicable. See REGEX_ERROR_* defines. * * @return -2 = Matching error (error code is stored in ret) * -1 = Pattern error (error code is stored in ret) @@ -326,4 +326,41 @@ stock regex_match_simple(const str[], const pattern[], flags = 0, error[]= "", m regex_free(regex); return substrings; -} \ No newline at end of file +} + +/** + * Flags to used with regex_replace, to control the replacement behavior. + */ +#define REGEX_FORMAT_DEFAULT 0 /* Uses the standard formatting rules to replace matches */ +#define REGEX_FORMAT_NOCOPY (1<<0) /* The sectionsthat do not match the regular expression are not copied when replacing matches. */ +#define REGEX_FORMAT_FIRSTONLY (1<<1) /* Only the first occurrence of a regular expression is replaced. */ + +/** + * Perform a regular expression search and replace. + * + * An optional parameter, flags, allows to specify options on how format the expression. + * Supported format specifiers for replace parameter: + * $number : Substitutes the substring matched by group number. + * n must be an integer value designating a valid backreference, greater than 0, and of two digits at most. + * ${name} : Substitutes the substring matched by the named group name (a maximum of 32 characters). + * $& : Substitutes a copy of the whole match. + * $` : Substitutes all the text of the input string before the match. + * $' : Substitutes all the text of the input string after the match. + * $+ : Substitutes the last group that was captured. + * $_ : Substitutes the entire input string. + * $$ : Substitutes a literal "$". + * As note, the character \ can be also used with format specifier, this is same hehavior as $. + * + * @param pattern The regular expression pattern. + * @param string The string to check. + * @param error Error message, if applicable. + * @param maxLen Maximum length of the error buffer. + * @param replace The string will be used to replace any matches. See above for format specifiers. + * @param flags General flags to control how is replaced the string. See REGEX_FORMAT_* defines. + * @param errcode Regex type error code encountered, if applicable. See REGEX_ERROR_* defines. + * + * @return -2 = Matching error (error code is stored in ret) + * 0 = No match. + * >1 = Number of matches. + */ +native regex_replace(Regex:pattern, string[], maxLen, const replace[], flags = REGEX_FORMAT_DEFAULT, &errcode = 0); \ No newline at end of file diff --git a/plugins/testsuite/regex_test.sma b/plugins/testsuite/regex_test.sma new file mode 100644 index 00000000..8a524bc1 --- /dev/null +++ b/plugins/testsuite/regex_test.sma @@ -0,0 +1,451 @@ +#include +#include + +/** + * Warning: To get expected result, file encoding must be UTF-8 without BOM. + */ + +public plugin_init() +{ + register_plugin("UTF-8 Test", AMXX_VERSION_STR, "AMXX Dev Team"); + register_srvcmd("regex_test", "OnServerCommand"); +} + +new FailedCount; +new PassedCount; + +test(const regex[], const replace[], const string[], const expectedString[], expectedCount = -1, regexFlags = 0, formatFlags = 0, bufferlen = -1) +{ + new errorCode, error[128]; + new Regex:r = regex_compile_ex(regex, regexFlags, error, charsmax(error), errorCode); + + if (r == REGEX_PATTERN_FAIL || errorCode) + { + server_print("^t^t#%d. Pattern fail : ^"%s^"(%d)", ++FailedCount + PassedCount, error, errorCode); + } + else + { + new buffer[512]; + copy(buffer, charsmax(buffer), string); + + new errorCode; + new count = regex_replace(r, buffer, bufferlen != -1 ? bufferlen : charsmax(buffer), replace, formatFlags, errorCode); + + if (expectedCount != -1 && count != expectedCount) + { + server_print("^t^t#%d. Failed - count = %d, expected count = %d", ++FailedCount + PassedCount, count, expectedCount); + } + else if (!equal(buffer, expectedString)) + { + server_print("^t^t#%d. Failed - output = %s, expected output = %s", ++FailedCount + PassedCount, buffer, expectedString); + } + else + { + ++PassedCount; + } + + regex_free(r); + } +} +end() +{ + server_print("Tests successful: %d/%d", PassedCount, PassedCount + FailedCount); +} + +public OnServerCommand() +{ + server_print("Testing regex_replace()"); + + server_print("^tChecking count..."); + { + test( .regex = "(([0-9a-z]+)-([0-9]+))-(([0-9]+)-([0-9]+))", + .replace = "xxxx", + .string = "1-2-3-4 a-2-3-4 1-a-3-4 1-2-a-4 1-2-3-a a-a-a-a 4-3-2-1 100-200-300-400-500-600-700-800", + .expectedString = "xxxx xxxx 1-a-3-4 1-2-a-4 1-2-3-a a-a-a-a xxxx xxxx-xxxx", + .expectedCount = 5 + ); + + test( .regex = "([a-z]+)", + .replace = "xxxx", + .string = "Here must only number like 42 and 13 appear", + .expectedString = "Hxxxx xxxx xxxx xxxx xxxx 42 xxxx 13 xxxx", + .expectedCount = 7 + ); + + test( .regex = "((V(I|1)(4|A)GR(4|A))|(V(I|1)C(0|O)D(I|1)(N|\/\\\/)))", .regexFlags = PCRE_CASELESS, + .replace = "...", + .string = "Viagra V14GR4 Vicodin V1C0D1/\/ v1c0d1/|/", + .expectedString = "... ... ... ... v1c0d1/|/", + .expectedCount = 4 + ); + + test( .regex = "\[(right)\](((?R)|[^^[]+?|\[)*)\[/\\1\]", .regexFlags = PCRE_CASELESS | PCRE_UNGREEDY, + .replace = "", + .string = "[CODE]<td align="$stylevar[right]">[/CODE]", + .expectedString = "[CODE]<td align="$stylevar[right]">[/CODE]", + .expectedCount = 0 + ); + + test( .regex = "- This is a string$", + .replace = "This shouldn\'t work", + .string = "123456789 - Hello, world - This is a string.", + .expectedString = "123456789 - Hello, world - This is a string.", + .expectedCount = 0 + ); + + test( .regex = "[0-35-9]", + .replace = "4", + .string = "123456789 - Hello, world - This is a string.", + .expectedString = "444444444 - Hello, world - This is a string.", + .expectedCount = 8 + ); + + test( .regex = "\b[hH]\w{2,4}", + .replace = "Bonjour", + .string = "123456789 - Hello, world - This is a string.", + .expectedString = "123456789 - Bonjour, world - This is a string.", + .expectedCount = 1 + ); + + test( .regex = "(\w)\s*-\s*(\w)", + .replace = "$1. $2", + .string = "123456789 - Hello, world - This is a string.", + .expectedString = "123456789. Hello, world. This is a string.", + .expectedCount = 2 + ); + + test( .regex = "([a-z]\w+)@(\w+)\.(\w+)\.([a-z]{2,})", + .replace = "$1 at $2 dot $3 dot $4", + .string = "josmessa@uk.ibm.com", + .expectedString = "josmessa at uk dot ibm dot com", + .expectedCount = 1 + ); + + test( .regex = "\b\w{1}s", + .replace = "test", + .string = "This is a string. (0-9) as well as parentheses", + .expectedString = "This test a string. (0-9) test well test parentheses", + .expectedCount = 3 + ); + + + test( .regex = "(\d{1})-(\d{1})", + .replace = "$1 to $2", + .string = "This is a string. It contains numbers (0-9) as well as parentheses and some other things!", + .expectedString = "This is a string. It contains numbers (0 to 9) as well as parentheses and some other things!", + .expectedCount = 1 + ); + + test( .regex = "[\(!\)]", + .replace = "*", + .string = "This is a string. It contains numbers (0-9) as well as parentheses and some other things!", + .expectedString = "This is a string. It contains numbers *0-9* as well as parentheses and some other things*", + .expectedCount = 3 + ); + } + + server_print("^tChecking edges cases..."); + { + test(.regex = "[0-9]+", .replace = "*", .string = "", .expectedString = "", .expectedCount = 0); + test(.regex = "([0-9]+)", .replace = "", .string = "123", .expectedString = "", .expectedCount = 1); + test(.regex = "a", .replace = "\", .string = "a", .expectedString = "\", .expectedCount = 1); + test(.regex = "^^", .replace = "x", .string = "a", .expectedString = "xa", .expectedCount = 1); + test(.regex = "b", .replace = "\", .string = "b", .expectedString = "\", .expectedCount = 1, .bufferlen = 1); + test(.regex = "b", .replace = "^^", .string = "b", .expectedString = "b", .expectedCount = 0, .bufferlen = 0); + test(.regex = "\w+", .replace = "123", .string = "abc", .expectedString = "12", .expectedCount = 1, .bufferlen = 2); + } + + server_print("^tChecking UTF-8 support..."); + { + test(.regex = "(\w+)", .replace = "*", .string = "éà@É", .expectedString = "éà@É", .expectedCount = 0); + test(.regex = "(\w+)", .replace = "*", .string = "éà@É", .expectedString = "*@*", .expectedCount = 2, .regexFlags = PCRE_UCP | PCRE_UTF8); + test(.regex = "(\w+)", .replace = "字", .string = "éà@É", .expectedString = "字@字",.expectedCount = 2, .regexFlags = PCRE_UCP | PCRE_UTF8); + test(.regex = "(\w+)", .replace = "字", .string = "éà@É", .expectedString = "字", .expectedCount = 2, .regexFlags = PCRE_UCP | PCRE_UTF8, .bufferlen = 3); + } + + server_print("^tChecking substitutions..."); + { + test(.regex = "x", .replace = "y", .string = "text", .expectedString = "teyt" ); + test(.regex = "x", .replace = "$", .string = "text", .expectedString = "te$t" ); + test(.regex = "x", .replace = "$1", .string = "text", .expectedString = "te$1t" ); + test(.regex = "x", .replace = "${1", .string = "text", .expectedString = "te${1t" ); + test(.regex = "x", .replace = "${", .string = "text", .expectedString = "te${t" ); + test(.regex = "x", .replace = "${$0", .string = "text", .expectedString = "te${xt" ); + test(.regex = "x", .replace = "${1}", .string = "text", .expectedString = "te${1}t" ); + test(.regex = "x", .replace = "${1}", .string = "text", .expectedString = "te${1}t" ); + test(.regex = "x", .replace = "$5", .string = "text", .expectedString = "te$5t" ); + test(.regex = "x", .replace = "$5", .string = "te(x)t", .expectedString = "te($5)t" ); + test(.regex = "x", .replace = "${foo", .string = "text", .expectedString = "te${foot" ); + test(.regex = "(x)", .replace = "$5", .string = "text", .expectedString = "te$5t" ); + test(.regex = "(x)", .replace = "$1", .string = "text", .expectedString = "text" ); + test(.regex = "e(x)", .replace = "$1", .string = "text", .expectedString = "txt" ); + test(.regex = "e(x)", .replace = "$5", .string = "text", .expectedString = "t$5t" ); + test(.regex = "e(x)", .replace = "$4", .string = "text", .expectedString = "t$4t" ); + test(.regex = "e(x)", .replace = "$3", .string = "text", .expectedString = "t$3t" ); + test(.regex = "e(x)", .replace = "${1}", .string = "text", .expectedString = "txt" ); + test(.regex = "e(x)", .replace = "${3}", .string = "text", .expectedString = "t${3}t" ); + test(.regex = "e(x)", .replace = "${1}${3}", .string = "text", .expectedString = "tx${3}t" ); + test(.regex = "e(x)", .replace = "${1}${name}", .string = "text", .expectedString = "tx${name}t"); + test(.regex = "e(?x)", .replace = "${1}${name}", .string = "text", .expectedString = "tx${name}t"); + test(.regex = "e(?x)", .replace = "${1}${foo}", .string = "text", .expectedString = "txxt" ); + test(.regex = "e(?x)", .replace = "${goll}${foo}", .string = "text", .expectedString = "t${goll}xt"); + test(.regex = "e(?x)", .replace = "${goll${foo}", .string = "text", .expectedString = "t${gollxt" ); + test(.regex = "e(?x)", .replace = "${goll${foo}}", .string = "text", .expectedString = "t${gollx}t"); + test(.regex = "e(?x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" ); + test(.regex = "e(?x)", .replace = "${${foo}}", .string = "text", .expectedString = "t${x}t" ); + test(.regex = "e(?x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" ); + test(.regex = "e(?x)", .replace = "$${bfoo}}", .string = "text", .expectedString = "t${bfoo}}t"); + test(.regex = "e(?x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" ); + test(.regex = "e(?x)", .replace = "$${foo}", .string = "text", .expectedString = "t${foo}t" ); + test(.regex = "e(?x)", .replace = "$$", .string = "text", .expectedString = "t$t" ); + test(.regex = "(e)(?x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "txext" ); + test(.regex = "(?e)(x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "teext" ); + test(.regex = "(e)(?x)", .replace = "${foo}$1$2$+", .string = "text", .expectedString = "txexxt" ); + test(.regex = "(?e)(x)", .replace = "${foo}$1$2$+", .string = "text", .expectedString = "teexxt" ); + test(.regex = "(?e)(x)", .replace = "${foo}$1$2$_", .string = "texts", .expectedString = "teextextsts"); + test(.regex = "(?e)(x)", .replace = "${foo}$1$2$`", .string = "texts", .expectedString = "teextts" ), + test(.regex = "(?e)(x)", .replace = "${foo}$1$2$'", .string = "texts", .expectedString = "teextsts" ), + test(.regex = "(?e)(x)", .replace = "${foo}$1$2$&", .string = "texts", .expectedString = "teexexts" ), + test(.regex = "x", .replace = "y", .string = "text", .expectedString = "teyt" ); + test(.regex = "x", .replace = "$", .string = "text", .expectedString = "te$t" ); + test(.regex = "x", .replace = "$1", .string = "text", .expectedString = "te$1t" ); + test(.regex = "x", .replace = "${1}", .string = "text", .expectedString = "te${1}t" ); + test(.regex = "x", .replace = "$5", .string = "text", .expectedString = "te$5t" ); + test(.regex = "x", .replace = "$5", .string = "te(x)t", .expectedString = "te($5)t" ); + test(.regex = "x", .replace = "${foo", .string = "text", .expectedString = "te${foot" ); + test(.regex = "(x)", .replace = "$5", .string = "text", .expectedString = "te$5t" ); + test(.regex = "(x)", .replace = "$1", .string = "text", .expectedString = "text" ); + test(.regex = "e(x)", .replace = "$1", .string = "text", .expectedString = "txt" ); + test(.regex = "e(x)", .replace = "$5", .string = "text", .expectedString = "t$5t" ); + test(.regex = "e(x)", .replace = "$4", .string = "text", .expectedString = "t$4t" ); + test(.regex = "e(x)", .replace = "$3", .string = "text", .expectedString = "t$3t" ); + test(.regex = "e(x)", .replace = "${1}", .string = "text", .expectedString = "txt" ); + test(.regex = "e(x)", .replace = "${3}", .string = "text", .expectedString = "t${3}t" ); + test(.regex = "e(x)", .replace = "${1}${3}", .string = "text", .expectedString = "tx${3}t" ); + test(.regex = "e(x)", .replace = "${1}${name}", .string = "text", .expectedString = "tx${name}t"); + test(.regex = "e(?x)", .replace = "${1}${name}", .string = "text", .expectedString = "tx${name}t"); + test(.regex = "e(?x)", .replace = "${1}${foo}", .string = "text", .expectedString = "txxt" ); + test(.regex = "e(?x)", .replace = "${goll}${foo}", .string = "text", .expectedString = "t${goll}xt"); + test(.regex = "e(?x)", .replace = "${goll${foo}", .string = "text", .expectedString = "t${gollxt" ); + test(.regex = "e(?x)", .replace = "${goll${foo}}", .string = "text", .expectedString = "t${gollx}t"); + test(.regex = "e(?x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" ); + test(.regex = "e(?x)", .replace = "${${foo}}", .string = "text", .expectedString = "t${x}t" ); + test(.regex = "e(?x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" ); + test(.regex = "e(?x)", .replace = "$${bfoo}}", .string = "text", .expectedString = "t${bfoo}}t"); + test(.regex = "e(?x)", .replace = "$${foo}}", .string = "text", .expectedString = "t${foo}}t" ); + test(.regex = "e(?x)", .replace = "$${foo}", .string = "text", .expectedString = "t${foo}t" ); + test(.regex = "e(?x)", .replace = "$$", .string = "text", .expectedString = "t$t" ); + test(.regex = "(e)(?x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "txext" ); + test(.regex = "(?e)(x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "teext" ); + test(.regex = "(e)(?x)", .replace = "${foo}$1$2$+", .string = "text", .expectedString = "txexxt" ); + test(.regex = "(?e)(x)", .replace = "${foo}$1$2$+", .string = "text", .expectedString = "teexxt" ); + test(.regex = "(?e)(x)", .replace = "${foo}$1$2$_", .string = "texts", .expectedString = "teextextsts"); + test(.regex = "(?e)(x)", .replace = "${foo}$1$2$`", .string = "texts", .expectedString = "teextts" ); + test(.regex = "(?e)(x)", .replace = "${foo}$1$2$'", .string = "texts", .expectedString = "teextsts" ); + test(.regex = "(?e)(x)", .replace = "${foo}$1$2$&", .string = "texts", .expectedString = "teexexts" ); + test(.regex = "<(.+?)>", .replace = "[$0:$1]", .string = "am not", .expectedString = "[:i]am not[:/i]"); + test(.regex = "(?e)(?x)", .replace = "${foo}$1$2", .string = "text", .expectedString = "teext", .regexFlags = PCRE_DUPNAMES); + test(.regex = "\b(\w+)(\s)(\w+)\b", .replace = "$3$2$1", .string = "one two", .expectedString = "two one"); + test(.regex = "\b(\d+)\s?USD", .replace = "$$$1", .string = "103 USD", .expectedString = "$103" ); + test(.regex = "\b(?\w+)(\s)(?\w+)\b", .replace = "${w2} ${w1}", .string = "one two", .expectedString = "two one"); + test(.regex = "(\$*(\d*(\.+\d+)?){1})", .replace = "**$&", .string = "$1.30", .expectedString = "**$1.30**"); + test(.regex = "B+", .replace = "$`", .string = "AABBCC", .expectedString = "AAAACC"); + test(.regex = "B+", .replace = "$'", .string = "AABBCC", .expectedString = "AACCCC"); + test(.regex = "B+(C+)", .replace = "$+", .string = "AABBCCDD", .expectedString = "AACCDD"); + test(.regex = "B+", .replace = "$_", .string = "AABBCC", .expectedString = "AAAABBCCCC"); + test(.regex = "(F)(2)(3)(4)(5)(6)(7)(8)(9)(10)(L)\11", .replace = "${S}$11$1", .string = "F2345678910L71", .expectedString = "F2345678910L71"), + test(.regex = "(F)(2)(3)(4)(5)(6)(7)(8)(9)(10)(L)\11", .replace = "${S}$11$1", .string = "F2345678910LL1", .expectedString = "${S}LF1"); + } + + server_print("^tChecking moar #1..."); + { + test(.string = "(?(w)a|o)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "(?(w)|o)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "(?(w)a)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "(?(w)a|)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "(?(w)?|a|o)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "(?(w)||o)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "(?(w)(a)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "(?(w))\a|)" , .regex = "\(\?\(\w+\).*\|?.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "(?(2)a|o)" , .regex = "\(\?\([^^\)]+\).*\|?.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "(?(|)a|o)" , .regex = "\(\?\([^^\)]+\).*\|?.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "a\3b" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\(\d+)" , .replace = "\5", .expectedString = "a\5b"); + test(.string = "\3b" , .regex = "(?=(?:\A|[^^\\])(?:[\\]{2}){0,3})\\(\d+)" , .replace = "\5", .expectedString = "\5b"); + test(.string = "\\\3b" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\(\d+)" , .replace = "\5", .expectedString = "\\\5b"); + test(.string = "\\\k" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k<(\w+)>" , .replace = "\5", .expectedString = "\\\5"); + test(.string = "\\\\k'g'" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w+)'" , .replace = "\5", .expectedString = "\\\\k'g'"); + test(.string = "a\\\\k'g'" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w+)'" , .replace = "\5", .expectedString = "a\\\\k'g'"); + test(.string = "\k'g'" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w+)'" , .replace = "\5", .expectedString = "\5"); + test(.string = "(?)" , .regex = "\(\?<[A-Za-z]\w*-[A-Za-z]\w*>.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "(?'n1-n2'a)" , .regex = "\(\?'[A-Za-z]\w*-[A-Za-z]\w*'.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "\p{Isa}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\\[pP]\{)Is(?=\w+\})", .replace = "In", .expectedString = "\p{Ina}"); + test(.string = "\p{Is}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\\[pP]\{)Is(?=\w+\})", .replace = "In", .expectedString = "\p{Is}"); + test(.string = "\p{Isa" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\\[pP]\{)Is(?=\w+\})", .replace = "In", .expectedString = "\p{Isa"); + test(.string = "a(?#|)" , .regex = "\(\?#[^^\)]*\)" , .replace = "", .expectedString = "a"); + test(.string = "(?#|)" , .regex = "\(\?#[^^\)]*\)" , .replace = "", .expectedString = ""); + test(.string = "(?#|)" , .regex = "\#[^^\n\r]*" , .replace = "", .expectedString = "(?"); + test(.string = "(?inm-xs:\#)" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(r"); + test(.string = "(?ni:())" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(r)"); + test(.string = "(?x-i:)" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(?x-i:)"); + test(.string = "(?n:))" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(?n:))"); + test(.string = "(?)" , .regex = "\(\?<[A-Za-z]\w*>.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "(?'n1'y)" , .regex = "\(\?'[A-Za-z]\w*'.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "(?<45>y)" , .regex = "\(\?<\d+>.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "(?'7'o)" , .regex = "\(\?'\d+'.*\)" , .replace = "r", .expectedString = "r"); + test(.string = "\\\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "\\r"); + test(.string = "a\\\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "a\\r"); + test(.string = "\\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "\r"); + test(.string = "a\\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "a\r"); + test(.string = "\(" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "r"); + test(.string = "a\(" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\(" , .replace = "r", .expectedString = "ar"); + test(.string = "?:" , .regex = "(?:^^\?[:imnsx=!>-]|^^\?<[!=])" , .replace = "r", .expectedString = "r"); + test(.string = "?-]|^^\?<[!=])" , .replace = "r", .expectedString = "r"); + test(.string = "?-" , .regex = "(?:^^\?[:imnsx=!>-]|^^\?<[!=])" , .replace = "r", .expectedString = "r"); + test(.string = "\(?" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?<[A-Za-z]\w*>", .replace = "r", .expectedString = "\(r"); + test(.string = "a\(?'n'" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?'[A-Za-z]\w*'", .replace = "r", .expectedString = "a\(r"); + test(.string = "\\(?<2>" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?<\d+>" , .replace = "r", .expectedString = "\\(r"); + test(.string = "(?'2'" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?'\d+'" , .replace = "r", .expectedString = "(r"); + test(.string = "\[\b]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\u8]"); + test(.string = "\[a\bb]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[a\u8b]"); + test(.string = "\[\b]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\u8]"); + test(.string = "\[\b]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\u8]"); + test(.string = "\[\\b]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\\u8]"); + test(.string = "[[]" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\\[" , .expectedString = "[\[]"); + test(.string = "\[[]" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\[" , .expectedString = "\[[]"); + test(.string = "\[\[]" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\[" , .expectedString = "\[\[]"); + test(.string = "\[\[]" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\[" , .expectedString = "\[\[]"); + test(.string = "\{" , .regex = "(?=(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\\{"); + test(.string = "\{" , .regex = "(?=(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\\{"); + test(.string = "\{1,2}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\{1,2}"); + test(.string = "\{1}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\{1}"); + test(.string = "\{1,}" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\{1,}"); + test(.string = "\{1" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\\{", .expectedString = "\{1"); + test(.string = "\\(?!{1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "\5", .expectedString = "?!"); + test(.string = "{1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "r"); + test(.string = "({1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "r"); + test(.string = "(?{1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "(?{1}"); + test(.string = "(?:{1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "r"); + test(.string = "\({1}" , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "\({1}"); + test(.string = "\p{Isa}" , .regex = "(?!\\[pP]\{)Is(?=\w+\})" , .replace = "In", .expectedString = "\p{Ina}"); + test(.string = "\p{Is}" , .regex = "(?!\\[pP]\{)Is(?=\w+\})" , .replace = "In", .expectedString = "\p{Is}"); + test(.string = "\p{Isa" , .regex = "(?!\\[pP]\{)Is(?=\w+\})" , .replace = "In", .expectedString = "\p{Isa"); + test(.string = "\}" , .regex = "(?!(\\A|[^^\\])(\\{2})*\\{\\d\\d*(,(\\d\\d*)?)?)\\}", .replace = "\\}", .expectedString = "\}"); + test(.string = "{\}" , .regex = "(?!(\A|[^^\^^])(\^^{2})*\{\d\d*(,(\d\d*)?)?)\}", .replace = "\\}", .expectedString = "{\\}"); + test(.string = "{1,2}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "{1,2\}"); + test(.string = "\{1}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "\{1\}"); + test(.string = "\{1\}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "\{1\\}"); + test(.string = "\{1}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "\{1\}"); + test(.string = "{1,}" , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}" , .replace = "\\}", .expectedString = "{1,\}"); + test(.string = "a(?" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k<(\w)>" , .replace = "\5", .expectedString = "\\5"); + test(.string = "a\\k" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k<(\w)>" , .replace = "\5", .expectedString = "a\\5"); + test(.string = "\\k'g'" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w)'" , .replace = "\5", .expectedString = "\\5"); + test(.string = "a\\k'g'" , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w)'" , .replace = "\5", .expectedString = "a\\5"); + test(.string = "\k'g'" , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w)'" , .replace = "\5", .expectedString = "\5"); + } + + server_print("^tChecking moar #2..."); + { + test(.regex = "^^((?>[a-zA-Z\d!#$%&'*+\-\/=?^^_`{|}~]+\x20*|^"((?=[\x01-\x7f])[^^^"\\]|\\[\x01-\x7f])*^"\x20*)*(?<))?((?!\.)(?>\.?[a-zA-Z\d!#$%&'*+\-\/=?^^_`{|}~]+)+|^"((?=[\x01-\x7f])[^^^"\\]|\\[\x01-\x7f])*^")@(((?!-)[a-zA-Z\d\-]+(?)$" , + .replace = "$1$4@$7net>", + .string = "Name Surname ", + .expectedString = "Name Surname " + ); + + test(.regex = "([A-Z])\w+", + .replace = "*snip*", + .string = "Welcome to RegExr v2.0 by gskinner.com!\ + \ + Edit the Expression & Text to see matches. Roll over matches or the expression for details. Undo mistakes with ctrl-z. Save & Share expressions with friends or the Community. A full Reference & Help is available in the Library, or watch the video Tutorial.\ + \ + Sample text for testing:\ + abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ\ + :0123456789 +-.,!@#$%^^&*();\/|<>^"'\ + 12345 -98.7 3.141 .6180 9,000 +42\ + 555.123.4567 +1-(800)-555-2468\ + foo@demo.net bar.ba@test.co.uk\ + www.demo.com http://foo.co.uk/\ + http://regexr.com/foo.html?q=bar", + + .expectedString = "*snip* to *snip* v2.0 by gskinner.com!\ + \ + *snip* the *snip* & *snip* to see matches. *snip* over matches or the expression for details. *snip* mistakes with ctrl-z. *snip* & *snip* expressions with friends or the *snip*. A full *snip* & *snip* is available in the *snip*, or watch the video *snip*.\ + \ + *snip* text for testing:\ + abcdefghijklmnopqrstuvwxyz *snip*\ + :0123456789 +-.,!@#$%^^&*();\/|<>^"'\ + 12345 -98.7 3.141 .6180 9,000 +42\ + 555.123.4567 +1-(800)-555-2468\ + foo@demo.net bar.ba@test.co.uk\ + www.de", + .regexFlags = PCRE_EXTENDED + ); + + test(.regex = "/\*(?>[^^*/]+|\*[^^/]|/[^^*]|/\*(?>[^^*/]+|\*[^^/]|/[^^*])*\*/)*\*/", + .replace = "", + .string = "/* comment */\ + no comment\ + /* comment\ + spanning\ + multiple\ + lines */\ + /* comment /* nesting */ of /* two */ levels supported */\ + /* comment /* nesting */ of /* /* more than */ two levels */ not supported */", + .expectedString = "no comment\ + /* comment of not supported */" + ); + + test(.regex = "\b(?https?|ftp)://(?[A-Z0-9.-]+)(?/[A-Z0-9+&@#/%=~_|!:,.;-]*)?(?\?[A-Z0-9+&@#/%=~_|!:,.;]*)?", + .replace = "${protocol}s://site.com${file}^n", + .string = "http://www.alliedmods.net http://www.alliedmods.net/ http://www.alliedmods.net/test.php http://www.alliedmods.net/index.php?secret=x Something interesting at http://www.alliedmods.net.", + .expectedString = "https://site.com^nhttps://site.com/^nhttps://site.com/test.php^nhttps://site.com/index.php^nhttps://site.com^n", + .regexFlags = PCRE_CASELESS | PCRE_EXTENDED, + .formatFlags = REGEX_FORMAT_NOCOPY + ); + + test(.regex = "\b(https?|ftp)://([A-Z0-9.-]+)(/[A-Z0-9+&@#/%=~_|!:,.;-]*)?(\?[A-Z0-9+&@#/%=~_|!:,.;]*)?", + .replace = "$1s://site.com$3^n", + .string = "http://www.alliedmods.net http://www.alliedmods.net/ http://www.alliedmods.net/test.php http://www.alliedmods.net/index.php?secret=x Something interesting at http://www.alliedmods.net.", + .expectedString = "https://site.com^nhttps://site.com/^nhttps://site.com/test.php^nhttps://site.com/index.php^nhttps://site.com^n", + .regexFlags = PCRE_CASELESS | PCRE_EXTENDED, + .formatFlags = REGEX_FORMAT_NOCOPY + ); + + test(.regex = "\b(https?|ftp)://([A-Z0-9.-]+)(/[A-Z0-9+&@#/%=~_|!:,.;-]*)?(\?[A-Z0-9+&@#/%=~_|!:,.;]*)?", + .replace = "$1s://site.com$3^n", + .string = "http://www.alliedmods.net http://www.alliedmods.net/ http://www.alliedmods.net/test.php http://www.alliedmods.net/index.php?secret=x Something interesting at http://www.alliedmods.net.", + .expectedString = "https://site.com^n", + .regexFlags = PCRE_CASELESS | PCRE_EXTENDED, + .formatFlags = REGEX_FORMAT_NOCOPY | REGEX_FORMAT_FIRSTONLY + ); + + test(.regex = "^^(.++)\r?\n(?=(?:^^(?!\1$).*+\r?\n)*+\1$)", + .replace = "", + .string = "one^n\ + two^n\ + three^n\ + four^n\ + two^n\ + three^n\ + four^n\ + three^n\ + four^n\ + four", + .expectedString = "one^n\ + two^n\ + three^n\ + four", + .regexFlags = PCRE_EXTENDED | PCRE_MULTILINE + ); + } + + end(); +} \ No newline at end of file diff --git a/support/PackageScript b/support/PackageScript index 265ad942..ff5fd354 100644 --- a/support/PackageScript +++ b/support/PackageScript @@ -225,6 +225,7 @@ scripting_files = [ 'testsuite/menutest.sma', 'testsuite/native_test.sma', 'testsuite/nvault_test.sma', + 'testsuite/regex_test.sma', 'testsuite/sorttest.sma', 'testsuite/strbreak.sma', 'testsuite/sqlxtest.sma',