Regex: Add regex_replace native.

This commit is contained in:
Arkshine
2014-07-17 11:20:52 +02:00
parent 287f471ac4
commit 939a724b1a
11 changed files with 1432 additions and 56 deletions

View File

@ -18,6 +18,7 @@ binary.sources = [
'sdk/amxxmodule.cpp',
'module.cpp',
'CRegEx.cpp',
'utils.cpp',
]
AMXX.modules += [builder.Add(binary)]

View File

@ -30,10 +30,12 @@
* you do not wish to do so, delete this exception statement from your
* version.
*/
#include "amxxmodule.h"
#include "pcre.h"
#include "CRegEx.h"
#include <string.h>
#include "amxxmodule.h"
#include <ctype.h>
#include "utils.h"
RegEx::RegEx()
{
@ -43,6 +45,9 @@ RegEx::RegEx()
mFree = true;
subject = NULL;
mSubStrings.clear();
mMatchesSubs.clear();
mSubsNameTable.clear();
mNumSubpatterns = 0;
}
void RegEx::Clear()
@ -57,6 +62,9 @@ void RegEx::Clear()
delete[] subject;
subject = NULL;
mSubStrings.clear();
mMatchesSubs.clear();
mSubsNameTable.clear();
mNumSubpatterns = 0;
}
RegEx::~RegEx()
@ -143,6 +151,19 @@ int RegEx::Compile(const char *pattern, int iFlags)
mFree = false;
/**
* Retrieve the number of captured groups
* including the full match.
*/
pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &mNumSubpatterns);
++mNumSubpatterns;
/**
* Build the table with the named groups,
* which contain an index and a name per group.
*/
MakeSubpatternsTable(mNumSubpatterns);
return 1;
}
@ -153,13 +174,13 @@ int RegEx::Match(const char *str)
if (mFree || re == NULL)
return -1;
this->ClearMatch();
ClearMatch();
//save str
subject = new char[strlen(str) + 1];
strcpy(subject, str);
rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30);
rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, REGEX_MAX_SUBPATTERNS);
if (rc < 0)
{
@ -188,61 +209,103 @@ int RegEx::Match(const char *str)
int RegEx::MatchAll(const char *str)
{
int rc = 0;
int rr = 0;
int offset = 0;
int rc = 0;
int startOffset = 0;
int exoptions = 0;
int notEmpty = 0;
int sizeOffsets = mNumSubpatterns * 3;
int subjectLen = strlen(str);
if (mFree || re == NULL)
{
return -1;
}
this->ClearMatch();
//save str
subject = new char[strlen(str) + 1];
ClearMatch();
subject = new char[subjectLen + 1];
strcpy(subject, str);
RegExSub sub, whole;
while ((rr = pcre_exec(re, NULL, subject, (int)strlen(subject), offset, 0, ovector, 30)))
RegExSub sub;
while (1)
{
if (rr < 0)
rr = pcre_exec(re, NULL, subject, (int)subjectLen, startOffset, exoptions | notEmpty, ovector, REGEX_MAX_SUBPATTERNS);
/**
* The string was already proved to be valid UTF-8
*/
exoptions |= PCRE_NO_UTF8_CHECK;
/**
* Too many substrings
*/
if (rr == 0)
{
if (rr == PCRE_ERROR_NOMATCH)
rr = sizeOffsets / 3;
}
if (rr > 0)
{
mMatchesSubs.append(rr);
for (int s = 0; s < rr; ++s)
{
break;
sub.start = ovector[2 * s];
sub.end = ovector[2 * s + 1];
mSubStrings.append(sub);
}
}
else if (rr == PCRE_ERROR_NOMATCH)
{
/**
* If we previously set PCRE_NOTEMPTY after a null match,
* this is not necessarily the end. We need to advance
* the start offset, and continue. Fudge the offset values
* to achieve this, unless we're already at the end of the string.
*/
if (notEmpty && startOffset < (int)subjectLen)
{
ovector[0] = startOffset;
ovector[1] = startOffset + 1;
}
else
{
mErrorOffset = rr;
if (rc)
this->ClearMatch();
return -1;
break;
}
}
rc += rr;
mSubStrings.ensure(rc);
for (int s = 1; s < rr; ++s)
else
{
sub.start = ovector[2 * s];
sub.end = ovector[2 * s + 1];
mSubStrings.append(sub);
mErrorOffset = rr;
if (mMatchesSubs.length())
{
ClearMatch();
}
return -1;
}
offset = ovector[1];
/**
* If we have matched an empty string, mimic what Perl's /g options does.
* This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
* the match again at the same point. If this fails (picked up above) we
* advance to the next character.
*/
notEmpty = (ovector[1] == ovector[0]) ? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
/**
* Advance to the next piece.
*/
startOffset = ovector[1];
}
if (!rc)
if (!mMatchesSubs.length())
{
return 0;
sub = mSubStrings.at(0);
whole.start = sub.start;
sub = mSubStrings.back();
whole.end = sub.end;
mSubStrings.insert(0, whole);
}
return 1;
}
@ -256,18 +319,14 @@ void RegEx::ClearMatch()
delete[] subject;
subject = NULL;
mSubStrings.clear();
mMatchesSubs.clear();
}
const char *RegEx::GetSubstring(int s, char buffer[], int max)
const char *getSubstring(char *subject, size_t start, size_t end, char buffer[], size_t max, size_t *outlen)
{
int i = 0;
if ((size_t)s >= mSubStrings.length() || s < 0)
return NULL;
RegExSub sub = mSubStrings.at(s);
char *substr_a = subject + sub.start;
int substr_l = sub.end - sub.start;
size_t i;
char * substr_a = subject + start;
size_t substr_l = end - start;
for (i = 0; i < substr_l; i++)
{
@ -278,5 +337,516 @@ const char *RegEx::GetSubstring(int s, char buffer[], int max)
buffer[i] = '\0';
if (outlen)
{
*outlen = i;
}
return buffer;
}
}
const char *RegEx::GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen)
{
if (start < 0 || start >= mSubStrings.length())
{
return NULL;
}
RegExSub sub = mSubStrings.at(start);
return getSubstring(subject, sub.start, sub.end, buffer, max, outlen);
}
const char *RegEx::GetSubstring(size_t startOffset, size_t endOffset, char buffer[], size_t max, size_t *outlen)
{
if (startOffset < 0 || endOffset < 0)
{
return NULL;
}
return getSubstring(subject, startOffset, endOffset, buffer, max, outlen);
}
void RegEx::MakeSubpatternsTable(int numSubpatterns)
{
int nameCount = 0;
int rc = pcre_fullinfo(re, NULL, PCRE_INFO_NAMECOUNT, &nameCount);
if (rc < 0)
{
return;
}
if (nameCount > 0)
{
const char *nameTable;
int nameSize = 0;
int i = 0;
int rc1 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMETABLE, &nameTable);
int rc2 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameSize);
rc = rc2 ? rc2 : rc1;
if (rc < 0)
{
mSubsNameTable.clear();
return;
}
NamedGroup data;
while (i++ < nameCount)
{
data.index = 0xff * (unsigned char)nameTable[0] + (unsigned char)nameTable[1];
data.name = nameTable + 2;
mSubsNameTable.append(ke::Move(data));
nameTable += nameSize;
}
}
}
int RegEx::Replace(char *text, size_t textMaxLen, const char *replace, size_t replaceLen, int flags)
{
char *output = text;
/**
* Retrieve all matches and store them in
* mSubStrings list.
*/
if (MatchAll(output) == -1)
{
return -1;
}
size_t subjectLen = strlen(subject);
size_t total = 0;
size_t baseIndex = 0;
size_t diffLength = 0;
char *toReplace = new char[textMaxLen + 1];
char *toSearch = NULL;
/**
* All characters which is not matched are not copied when replacing matches.
* Then original text (output buffer) should be considerated as empty.
*/
if (flags & REGEX_FORMAT_NOCOPY)
{
*output = '\0';
}
else
{
/**
* This is used only when we do replace matches.
*/
toSearch = new char[textMaxLen + 1];
}
/**
* Loop over all matches found.
*/
for (size_t i = 0; i < mMatchesSubs.length(); ++i)
{
char *ptr = toReplace;
size_t browsed = 0;
size_t searchLen = 0;
size_t length = 0;
/**
* Build the replace string as it can contain backreference
* and this needs to be parsed.
*/
for (const char *s = replace, *end = s + replaceLen; s < end && browsed <= textMaxLen; ++s, ++browsed)
{
unsigned int c = *s;
/**
* Supported format specifiers:
*
* $number : Substitutes the substring matched by group number.
* n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
* ${name} : Substitutes the substring matched by the named group name (a maximum of 32 characters).
* $& : Substitutes a copy of the whole match.
* $` : Substitutes all the text of the input string before the match.
* $' : Substitutes all the text of the input string after the match.
* $+ : Substitutes the last group that was captured.
* $_ : Substitutes the entire input string.
* $$ : Substitutes a literal "$".
*/
if (c == '$' || c == '\\')
{
switch (*++s)
{
case '\0':
{
/**
* End of string.
* Copy one character.
*/
*(ptr + browsed) = c;
break;
}
case '&':
{
/**
* Concatenate retrieved full match sub-string.
* length - 1 to overwrite EOS.
*/
GetSubstring(baseIndex, ptr + browsed, textMaxLen, &length);
browsed += length - 1;
break;
}
case '`':
{
/**
* Concatenate part of original text up to
* first sub-string position.
*/
length = mSubStrings.at(baseIndex).start;
memcpy(ptr + browsed, subject, length);
browsed += length - 1;
break;
}
case '\'':
{
/**
* Concatenate part of original text from
* last sub-string end position to EOS.
*/
length = mSubStrings.at(baseIndex).end;
memcpy(ptr + browsed, subject + length, subjectLen - length);
browsed += (subjectLen - length) - 1;
break;
}
case '+':
{
/**
* Copy the last group that was captured.
*/
GetSubstring(baseIndex + mMatchesSubs.at(i) - 1, ptr + browsed, textMaxLen, &length);
browsed += length - 1;
break;
}
case '_':
{
/**
* Copy the entire input string.
*/
memcpy(ptr + browsed, subject, subjectLen);
browsed += (subjectLen - 1);
break;
}
case '$':
case '\\':
{
/**
* Copy the single character $ or \.
*/
*(ptr + browsed) = c;
break;
}
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '{':
{
/**
* Checking backreference.
* Which can be either $n, ${n} or ${name}.
*/
size_t backref = -1;
const char *walk = s;
bool inBrace = false;
bool nameCheck = false;
/**
* ${nn}.
* ^
*/
if (*walk == '{')
{
inBrace = true;
++walk;
}
/**
* Valid number.
* $nn or ${nn}
* ^ ^
*/
if (*walk >= '0' && *walk <= '9')
{
backref = *walk - '0';
++walk;
}
else if (inBrace)
{
nameCheck = true;
/**
* Not a valid number.
* Checking as string.
* ${name}
* ^
*/
if (*walk)
{
const char *pch = strchr(walk, '}');
if (pch != NULL)
{
/**
* A named group maximum character is 32 (PCRE).
*/
char name[32];
size_t nameLength = strncopy(name, walk, pch - walk + 1);
int flags, num = 0;
pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &flags);
/**
* If PCRE_DUPNAMES is set, the pcre_copy_named_substring function should be used
* as pcre_get_stringnumber output order is not defined.
*/
if (flags & PCRE_DUPNAMES)
{
memset(ovector, 0, REGEX_MAX_SUBPATTERNS);
/**
* pcre_copy_named_substring needs a vector containing sub-patterns ranges
* for a given match.
*/
for (size_t j = 0; j < mMatchesSubs.at(i); ++j)
{
ovector[2 * j] = mSubStrings.at(baseIndex + j).start;
ovector[2 * j + 1] = mSubStrings.at(baseIndex + j).end;
}
num = pcre_copy_named_substring(re, subject, ovector, mMatchesSubs.at(i), name, ptr + browsed, (int)textMaxLen);
if (num != PCRE_ERROR_NOSUBSTRING)
{
browsed += num - 1;
s = pch;
break;
}
++pch;
}
else
{
/**
* Retrieve sub-pattern index from a give name.
*/
num = pcre_get_stringnumber(re, name);
if (num != PCRE_ERROR_NOSUBSTRING)
{
backref = num;
walk = ++pch;
}
}
if (num == PCRE_ERROR_NOSUBSTRING || num >= (int)mMatchesSubs.at(i))
{
/**
* If a sub-string for a given match is not found, or if > to
* number of sub-patterns we still need to check if this
* group name is a valid one because if so we want to escape it.
* Looking at the name table.
*/
bool found = false;
for (size_t i = 0; i < mSubsNameTable.length(); ++i)
{
if (!mSubsNameTable.at(i).name.compare(name))
{
--browsed;
s = --pch;
found = true;
break;
}
}
if (found)
{
continue;
}
}
}
}
}
if (!nameCheck)
{
/**
* Valid second number.
* $nn or ${nn}
* ^ ^
*/
if (*walk && *walk >= '0' && *walk <= '9')
{
backref = backref * 10 + *walk - '0';
++walk;
}
if (inBrace)
{
/**
* Invalid specifier
* Either hit EOS or missing }.
* ${n or ${nn or ${nx or ${nnx
* ^ ^ ^ ^
*/
if (*walk == '\0' || *walk != '}')
{
backref = -1;
}
else
{
++walk;
}
}
}
length = walk - s;
s = --walk;
/**
* We can't provide a capture number >= to total that pcre_exec has found.
* 0 is implicitly accepted, same behavior as $&.
*/
if (backref >= 0 && (int)backref < mNumSubpatterns)
{
/**
* Valid available index for a given match.
*/
if (backref < mMatchesSubs.at(i))
{
/**
* Concatenate retrieved sub-string.
* length - 1 to overwrite EOS.
*/
GetSubstring(baseIndex + backref, ptr + browsed, textMaxLen, &length);
browsed += length - 1;
}
else
{
/**
* Valid unavailable index for a given match.
*/
--browsed;
}
}
else
{
/**
* If we here it means the syntax is valid but sub-pattern doesn't exist.
* So, copy as it is, including $.
*/
memcpy(ptr + browsed, s - length, length + 1);
browsed += length;
}
break;
}
default:
{
/**
* Not a valid format modifier.
* So we copy characters as it is.
*/
*(ptr + browsed) = *s;
break;
}
}
}
else
{
/**
* At this point, direct copy.
*/
*(ptr + browsed) = c;
}
}
*(ptr + browsed) = '\0';
/**
* Concatenate only replace string of each match,
* as we don't want to copy unmatched characters.
*/
if (flags & REGEX_FORMAT_NOCOPY)
{
/**
* We want just the first occurrence.
*/
if (total++ && (flags & REGEX_FORMAT_FIRSTONLY))
{
break;
}
strncat(output, toReplace, textMaxLen + 1);
}
else
{
/**
* Retrieves full string of a given match.
*/
const char *search = GetSubstring(baseIndex, toSearch, textMaxLen, &searchLen);
/**
* We get something to replace, but the sub-pattern to search is empty.
* We insert replacement either a the start end or string.
*/
if (*toReplace && !searchLen)
{
if (output - text > 0)
{
strncat(output, toReplace, textMaxLen);
}
else
{
strncat(toReplace, text, textMaxLen);
strncopy(text, toReplace, strlen(toReplace) + 1);
}
++total;
}
else if ((output = UTIL_ReplaceEx(text + mSubStrings.at(baseIndex).start + diffLength, textMaxLen, search, searchLen, toReplace, browsed, false)) != NULL)
{
/**
* Then we simply do a replace.
* Probably not the most efficient, but this should be at least safe.
* To avoid issue where the function could find a string which is not at the expected index,
* We force the input string to start from index of the full match.
*/
++total;
}
if (total && (flags & REGEX_FORMAT_FIRSTONLY))
{
break;
}
}
/**
* mMatchesSubs is a flat list containing all sub-patterns of all matches.
* A number of sub-patterns can vary per match. So we calculate the position in the list,
* from where the first sub-pattern result of current match starts.
*/
baseIndex += mMatchesSubs.at(i);
diffLength += browsed - searchLen;
}
delete[] toReplace;
if (toSearch != NULL)
{
delete[] toSearch;
}
/**
* Return the number of successful replacements.
*/
return total;
}

View File

@ -34,6 +34,19 @@
#define _INCLUDE_CREGEX_H
#include <am-vector.h>
#include <am-string.h>
/**
* Maximum number of sub-patterns, here 50 (this should be a multiple of 3).
*/
#define REGEX_MAX_SUBPATTERNS 150
/**
* Flags to used with regex_replace, to control the replacement behavior.
*/
#define REGEX_FORMAT_DEFAULT 0 // Uses the standard formatting rules to replace matches.
#define REGEX_FORMAT_NOCOPY 1 // The sections that do not match the regular expression are not copied when replacing matches.
#define REGEX_FORMAT_FIRSTONLY 2 // Only the first occurrence of a regular expression is replaced.
class RegEx
{
@ -42,6 +55,11 @@ public:
int start, end;
};
struct NamedGroup {
ke::AString name;
size_t index;
};
RegEx();
~RegEx();
@ -52,8 +70,11 @@ public:
int Compile(const char *pattern, int iFlags);
int Match(const char *str);
int MatchAll(const char *str);
int Replace(char *text, size_t text_maxlen, const char *replace, size_t replaceLen, int flags = 0);
void ClearMatch();
const char *GetSubstring(int s, char buffer[], int max);
const char *GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen = NULL);
const char *GetSubstring(size_t start, size_t end, char buffer[], size_t max, size_t *outlen = NULL);
void MakeSubpatternsTable(int numSubpatterns);
public:
int mErrorOffset;
@ -63,9 +84,12 @@ public:
private:
pcre *re;
bool mFree;
int ovector[30];
int ovector[REGEX_MAX_SUBPATTERNS];
char *subject;
ke::Vector<RegExSub> mSubStrings;
ke::Vector<size_t> mMatchesSubs;
ke::Vector<NamedGroup> mSubsNameTable;
int mNumSubpatterns;
};
#endif //_INCLUDE_CREGEX_H

View File

@ -153,6 +153,8 @@ cell match(AMX *amx, cell *params, bool all)
else
{
*errorCode = x->Count();
if (all)
return x->Count();
}
return id + 1;
@ -272,6 +274,43 @@ static cell AMX_NATIVE_CALL regex_free(AMX *amx, cell *params)
return 1;
}
//native regex_replace(Regex:pattern, string[], maxLen, const replace[], flags = REGEX_FORMAT_DEFAULT, &errcode = 0);
static cell AMX_NATIVE_CALL regex_replace(AMX *amx, cell *params)
{
int id = params[1] - 1;
if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
{
MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id);
return 0;
}
int textLen, replaceLen;
char *text = MF_GetAmxString(amx, params[2], 0, &textLen);
const char *replace = MF_GetAmxString(amx, params[4], 1, &replaceLen);
cell *erroCode = MF_GetAmxAddr(amx, params[6]);
RegEx *x = PEL[id];
int e = x->Replace(text, params[3] + 1, replace, replaceLen, params[5]);
if (e == -1)
{
*erroCode = x->mErrorOffset;
x->ClearMatch();
return -2;
}
else if (e == 0)
{
*erroCode = 0;
x->ClearMatch();
return 0;
}
MF_SetAmxString(amx, params[2], text, params[3]);
return e;
}
AMX_NATIVE_INFO regex_Natives[] = {
{"regex_compile", regex_compile},
{"regex_compile_ex", regex_compile_ex},
@ -280,6 +319,7 @@ AMX_NATIVE_INFO regex_Natives[] = {
{"regex_match_all", regex_match_all},
{"regex_match_all_c", regex_match_all_c},
{"regex_substr", regex_substr},
{"regex_replace", regex_replace},
{"regex_free", regex_free},
{NULL, NULL},
};

View File

@ -99,6 +99,7 @@
<ClCompile Include="..\CRegEx.cpp" />
<ClCompile Include="..\module.cpp" />
<ClCompile Include="..\sdk\amxxmodule.cpp" />
<ClCompile Include="..\utils.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\CRegEx.h" />
@ -107,6 +108,7 @@
<ClInclude Include="..\sdk\moduleconfig.h" />
<ClInclude Include="..\sdk\CVector.h" />
<ClInclude Include="..\sdk\amxxmodule.h" />
<ClInclude Include="..\utils.h" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\..\plugins\include\regex.inc" />

View File

@ -32,6 +32,9 @@
<ClCompile Include="..\sdk\amxxmodule.cpp">
<Filter>Module SDK\SDK Base</Filter>
</ClCompile>
<ClCompile Include="..\utils.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\CRegEx.h">
@ -52,6 +55,9 @@
<ClInclude Include="..\sdk\amxxmodule.h">
<Filter>Module SDK\SDK Base</Filter>
</ClInclude>
<ClInclude Include="..\utils.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="..\..\..\plugins\include\regex.inc">

236
dlls/regex/utils.cpp Normal file
View File

@ -0,0 +1,236 @@
#include "amxxmodule.h"
#include <string.h>
#include "utils.h"
int UTIL_CheckValidChar(char *c)
{
int count;
int bytecount = 0;
for (count = 1; (*c & 0xC0) == 0x80; count++)
{
c--;
}
switch (*c & 0xF0)
{
case 0xC0:
case 0xD0:
{
bytecount = 2;
break;
}
case 0xE0:
{
bytecount = 3;
break;
}
case 0xF0:
{
bytecount = 4;
break;
}
}
if (bytecount != count)
{
return count;
}
return 0;
}
unsigned int strncopy(char *dest, const char *src, size_t count)
{
if (!count)
{
return 0;
}
char *start = dest;
while ((*src) && (--count))
{
*dest++ = *src++;
}
*dest = '\0';
return (dest - start);
}
/**
* NOTE: Do not edit this for the love of god unless you have
* read the test cases and understand the code behind each one.
* While I don't guarantee there aren't mistakes, I do guarantee
* that plugins will end up relying on tiny idiosyncrasies of this
* function, just like they did with AMX Mod X.
*
* There are explicitly more cases than the AMX Mod X version because
* we're not doing a blind copy. Each case is specifically optimized
* for what needs to be done. Even better, we don't have to error on
* bad buffer sizes. Instead, this function will smartly cut off the
* string in a way that pushes old data out.
*/
char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive)
{
char *ptr = subject;
size_t browsed = 0;
size_t textLen = strlen(subject);
/* It's not possible to search or replace */
if (searchLen > textLen)
{
return NULL;
}
/* Handle the case of one byte replacement.
* It's only valid in one case.
*/
if (maxLen == 1)
{
/* If the search matches and the replace length is 0,
* we can just terminate the string and be done.
*/
if ((caseSensitive ? strcmp(subject, search) : stricmp(subject, search)) == 0 && replaceLen == 0)
{
*subject = '\0';
return subject;
}
else
{
return NULL;
}
}
/* Subtract one off the maxlength so we can include the null terminator */
maxLen--;
while (*ptr != '\0' && (browsed <= textLen - searchLen))
{
/* See if we get a comparison */
if ((caseSensitive ? strncmp(ptr, search, searchLen) : strnicmp(ptr, search, searchLen)) == 0)
{
if (replaceLen > searchLen)
{
/* First, see if we have enough space to do this operation */
if (maxLen - textLen < replaceLen - searchLen)
{
/* First, see if the replacement length goes out of bounds. */
if (browsed + replaceLen >= maxLen)
{
/* EXAMPLE CASE:
* Subject: AABBBCCC
* Buffer : 12 bytes
* Search : BBB
* Replace: DDDDDDDDDD
* OUTPUT : AADDDDDDDDD
* POSITION: ^
*/
/* If it does, we'll just bound the length and do a strcpy. */
replaceLen = maxLen - browsed;
/* Note, we add one to the final result for the null terminator */
strncopy(ptr, replace, replaceLen + 1);
/* Don't truncate a multi-byte character */
if (*(ptr + replaceLen - 1) & 1 << 7)
{
replaceLen -= UTIL_CheckValidChar(ptr + replaceLen - 1);
*(ptr + replaceLen) = '\0';
}
}
else
{
/* EXAMPLE CASE:
* Subject: AABBBCCC
* Buffer : 12 bytes
* Search : BBB
* Replace: DDDDDDD
* OUTPUT : AADDDDDDDCC
* POSITION: ^
*/
/* We're going to have some bytes left over... */
size_t origBytesToCopy = (textLen - (browsed + searchLen)) + 1;
size_t realBytesToCopy = (maxLen - (browsed + replaceLen)) + 1;
char *moveFrom = ptr + searchLen + (origBytesToCopy - realBytesToCopy);
char *moveTo = ptr + replaceLen;
/* First, move our old data out of the way. */
memmove(moveTo, moveFrom, realBytesToCopy);
/* Now, do our replacement. */
memcpy(ptr, replace, replaceLen);
}
}
else
{
/* EXAMPLE CASE:
* Subject: AABBBCCC
* Buffer : 12 bytes
* Search : BBB
* Replace: DDDD
* OUTPUT : AADDDDCCC
* POSITION: ^
*/
/* Yes, we have enough space. Do a normal move operation. */
char *moveFrom = ptr + searchLen;
char *moveTo = ptr + replaceLen;
/* First move our old data out of the way. */
size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
memmove(moveTo, moveFrom, bytesToCopy);
/* Now do our replacement. */
memcpy(ptr, replace, replaceLen);
}
}
else if (replaceLen < searchLen)
{
/* EXAMPLE CASE:
* Subject: AABBBCCC
* Buffer : 12 bytes
* Search : BBB
* Replace: D
* OUTPUT : AADCCC
* POSITION: ^
*/
/* If the replacement does not grow the string length, we do not
* need to do any fancy checking at all. Yay!
*/
char *moveFrom = ptr + searchLen; /* Start after the search pointer */
char *moveTo = ptr + replaceLen; /* Copy to where the replacement ends */
/* Copy our replacement in, if any */
if (replaceLen)
{
memcpy(ptr, replace, replaceLen);
}
/* Figure out how many bytes to move down, including null terminator */
size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
/* Move the rest of the string down */
memmove(moveTo, moveFrom, bytesToCopy);
}
else
{
/* EXAMPLE CASE:
* Subject: AABBBCCC
* Buffer : 12 bytes
* Search : BBB
* Replace: DDD
* OUTPUT : AADDDCCC
* POSITION: ^
*/
/* We don't have to move anything around, just do a straight copy */
memcpy(ptr, replace, replaceLen);
}
return ptr + replaceLen;
}
ptr++;
browsed++;
}
return NULL;
}

8
dlls/regex/utils.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef UTILS_H
#define UTILS_H
int UTIL_CheckValidChar(char *c);
char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive);
unsigned int strncopy(char *dest, const char *src, size_t count);
#endif // UTILS_H