Regex: Add regex_replace native.

2014-07-17 11:20:52 +02:00
parent 287f471ac4
commit 939a724b1a
11 changed files with 1432 additions and 56 deletions
--- a/dlls/regex/AMBuilder
+++ b/dlls/regex/AMBuilder
@@ -18,6 +18,7 @@ binary.sources = [
  'sdk/amxxmodule.cpp',
  'module.cpp',
  'CRegEx.cpp',
+  'utils.cpp',
 ]

 AMXX.modules += [builder.Add(binary)]
--- a/dlls/regex/CRegEx.cpp
+++ b/dlls/regex/CRegEx.cpp
@@ -30,10 +30,12 @@
 *  you do not wish to do so, delete this exception statement from your
 *  version.
 */
+#include "amxxmodule.h"
 #include "pcre.h"
 #include "CRegEx.h"
 #include <string.h>
-#include "amxxmodule.h"
+#include <ctype.h>
+#include "utils.h"

 RegEx::RegEx()
 {
@@ -43,6 +45,9 @@ RegEx::RegEx()
 	mFree = true;
 	subject = NULL;
 	mSubStrings.clear();
+	mMatchesSubs.clear();
+	mSubsNameTable.clear();
+	mNumSubpatterns = 0;
 }

 void RegEx::Clear()
@@ -57,6 +62,9 @@ void RegEx::Clear()
 		delete[] subject;
 	subject = NULL;
 	mSubStrings.clear();
+	mMatchesSubs.clear();
+	mSubsNameTable.clear();
+	mNumSubpatterns = 0;
 }

 RegEx::~RegEx()
@@ -143,6 +151,19 @@ int RegEx::Compile(const char *pattern, int iFlags)

 	mFree = false;

+	/**
+	 * Retrieve the number of captured groups
+	 * including the full match.
+	 */
+	pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &mNumSubpatterns);
+	++mNumSubpatterns;
+
+	/**
+	 * Build the table with the named groups,
+	 * which contain an index and a name per group.
+	 */
+	MakeSubpatternsTable(mNumSubpatterns);
+
 	return 1;
 }

@@ -153,13 +174,13 @@ int RegEx::Match(const char *str)
 	if (mFree || re == NULL)
 		return -1;

-	this->ClearMatch();
+	ClearMatch();

 	//save str
 	subject = new char[strlen(str) + 1];
 	strcpy(subject, str);

-	rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30);
+	rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, REGEX_MAX_SUBPATTERNS);

 	if (rc < 0)
 	{
@@ -188,61 +209,103 @@ int RegEx::Match(const char *str)

 int RegEx::MatchAll(const char *str)
 {
-	int rc = 0;
 	int rr = 0;
-	int offset = 0;
+	int rc = 0;
+	int startOffset = 0;
+	int exoptions = 0;
+	int notEmpty = 0;
+	int sizeOffsets = mNumSubpatterns * 3;
+	int subjectLen = strlen(str);

 	if (mFree || re == NULL)
+	{
 		return -1;
+	}

-	this->ClearMatch();
-
-	//save str
-	subject = new char[strlen(str) + 1];
+	ClearMatch();
+	
+	subject = new char[subjectLen + 1];
 	strcpy(subject, str);

-	RegExSub sub, whole;
-	while ((rr = pcre_exec(re, NULL, subject, (int)strlen(subject), offset, 0, ovector, 30)))
+	RegExSub sub;
+
+	while (1)
 	{
-		if (rr < 0)
+		rr = pcre_exec(re, NULL, subject, (int)subjectLen, startOffset, exoptions | notEmpty, ovector, REGEX_MAX_SUBPATTERNS);
+
+		/**
+		 * The string was already proved to be valid UTF-8
+		 */
+		exoptions |= PCRE_NO_UTF8_CHECK;
+
+		/**
+		 * Too many substrings
+		 */
+		if (rr == 0)
 		{
-			if (rr == PCRE_ERROR_NOMATCH)
+			rr = sizeOffsets / 3;
+		}
+
+		if (rr > 0)
+		{
+			mMatchesSubs.append(rr);
+
+			for (int s = 0; s < rr; ++s)
 			{
-				break;
+				sub.start = ovector[2 * s];
+				sub.end = ovector[2 * s + 1];
+
+				mSubStrings.append(sub);
+			}
+		}
+		else if (rr == PCRE_ERROR_NOMATCH)
+		{
+			/**
+			 * If we previously set PCRE_NOTEMPTY after a null match,
+			 * this is not necessarily the end. We need to advance
+			 * the start offset, and continue. Fudge the offset values
+			 * to achieve this, unless we're already at the end of the string. 
+			 */
+			if (notEmpty && startOffset < (int)subjectLen) 
+			{
+				ovector[0] = startOffset;
+				ovector[1] = startOffset + 1;
 			}
 			else
 			{
-				mErrorOffset = rr;
-
-				if (rc)
-					this->ClearMatch();
-
-				return -1;
+				break;
 			}
 		}
-
-		rc += rr;
-		mSubStrings.ensure(rc);
-
-		for (int s = 1; s < rr; ++s)
+		else
 		{
-			sub.start = ovector[2 * s];
-			sub.end = ovector[2 * s + 1];
-			mSubStrings.append(sub);
+			mErrorOffset = rr;
+
+			if (mMatchesSubs.length())
+			{
+				ClearMatch();
+			}
+
+			return -1;
 		}

-		offset = ovector[1];
+		/**
+		 * If we have matched an empty string, mimic what Perl's /g options does.
+		 * This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
+		 * the match again at the same point. If this fails (picked up above) we
+		 * advance to the next character. 
+		 */
+		notEmpty = (ovector[1] == ovector[0]) ? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
+
+		/** 
+		 * Advance to the next piece. 
+		 */
+		startOffset = ovector[1];
 	}

-	if (!rc)
+	if (!mMatchesSubs.length())
+	{
 		return 0;
-
-	sub = mSubStrings.at(0);
-	whole.start = sub.start;
-	sub = mSubStrings.back();
-	whole.end = sub.end;
-
-	mSubStrings.insert(0, whole);
+	}

 	return 1;
 }
@@ -256,18 +319,14 @@ void RegEx::ClearMatch()
 		delete[] subject;
 	subject = NULL;
 	mSubStrings.clear();
+	mMatchesSubs.clear();
 }

-const char *RegEx::GetSubstring(int s, char buffer[], int max)
+const char *getSubstring(char *subject, size_t start, size_t end, char buffer[], size_t max, size_t *outlen)
 {
-	int i = 0;
-	if ((size_t)s >= mSubStrings.length() || s < 0)
-		return NULL;
-
-	RegExSub sub = mSubStrings.at(s);
-
-	char *substr_a = subject + sub.start;
-	int substr_l = sub.end - sub.start;
+	size_t i;
+	char * substr_a = subject + start;
+	size_t substr_l = end - start;

 	for (i = 0; i < substr_l; i++)
 	{
@@ -278,5 +337,516 @@ const char *RegEx::GetSubstring(int s, char buffer[], int max)

 	buffer[i] = '\0';

+	if (outlen)
+	{
+		*outlen = i;
+	}
+
 	return buffer;
-}
+}
+
+const char *RegEx::GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen)
+{
+	if (start < 0 || start >= mSubStrings.length())
+	{
+		return NULL;
+	}
+
+	RegExSub sub = mSubStrings.at(start);
+
+	return getSubstring(subject, sub.start, sub.end, buffer, max, outlen);
+}
+
+const char *RegEx::GetSubstring(size_t startOffset, size_t endOffset, char buffer[], size_t max, size_t *outlen)
+{
+	if (startOffset < 0 || endOffset < 0)
+	{
+		return NULL;
+	}
+
+	return getSubstring(subject, startOffset, endOffset, buffer, max, outlen);
+}
+
+void RegEx::MakeSubpatternsTable(int numSubpatterns)
+{
+	int nameCount = 0;
+	int rc = pcre_fullinfo(re, NULL, PCRE_INFO_NAMECOUNT, &nameCount);
+	
+	if (rc < 0) 
+	{
+		return;
+	}
+
+	if (nameCount > 0) 
+	{
+		const char *nameTable;
+		int nameSize = 0;
+		int i = 0;
+
+		int rc1 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMETABLE, &nameTable);
+		int rc2 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameSize);
+
+		rc = rc2 ? rc2 : rc1;
+
+		if (rc < 0)
+		{
+			mSubsNameTable.clear();
+			return;
+		}
+
+		NamedGroup data;
+
+		while (i++ < nameCount) 
+		{
+			data.index = 0xff * (unsigned char)nameTable[0] + (unsigned char)nameTable[1];
+			data.name = nameTable + 2;
+
+			mSubsNameTable.append(ke::Move(data));
+			nameTable += nameSize;
+		}
+	}
+}
+
+int RegEx::Replace(char *text, size_t textMaxLen, const char *replace, size_t replaceLen, int flags)
+{
+	char *output = text;
+
+	/**
+	 * Retrieve all matches and store them in 
+	 * mSubStrings list.
+	 */
+	if (MatchAll(output) == -1)
+	{
+		return -1;
+	}
+
+	size_t subjectLen = strlen(subject);
+	size_t total = 0;
+	size_t baseIndex = 0;
+	size_t diffLength = 0;
+
+	char *toReplace = new char[textMaxLen + 1];
+	char *toSearch = NULL;
+
+	/**
+	 * All characters which is not matched are not copied when replacing matches.
+	 * Then original text (output buffer) should be considerated as empty.
+	 */
+	if (flags & REGEX_FORMAT_NOCOPY)
+	{
+		*output = '\0';
+	}
+	else
+	{
+		/**
+		 * This is used only when we do replace matches.
+		 */
+		toSearch  = new char[textMaxLen + 1];
+	}
+
+	/** 
+	 * Loop over all matches found.
+	 */
+	for (size_t i = 0; i < mMatchesSubs.length(); ++i)
+	{
+		char *ptr = toReplace;
+
+		size_t browsed = 0;
+		size_t searchLen = 0;
+		size_t length = 0;
+	
+		/**
+		 * Build the replace string as it can contain backreference
+		 * and this needs to be parsed.
+		 */
+		for (const char *s = replace, *end = s + replaceLen; s < end && browsed <= textMaxLen; ++s, ++browsed)
+		{
+			unsigned int c = *s;
+
+			/**
+			 * Supported format specifiers:
+			 *
+			 *   $number  : Substitutes the substring matched by group number.
+			 *              n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
+			 *   ${name}  : Substitutes the substring matched by the named group name (a maximum of 32 characters).
+			 *   $&       : Substitutes a copy of the whole match.
+			 *   $`       : Substitutes all the text of the input string before the match.
+			 *   $'       : Substitutes all the text of the input string after the match.
+			 *   $+       : Substitutes the last group that was captured.
+			 *   $_       : Substitutes the entire input string.
+			 *   $$       : Substitutes a literal "$".
+			 */
+			if (c == '$' || c == '\\')
+			{
+				switch (*++s)
+				{
+					case '\0':
+					{
+						/**
+						 * End of string.
+						 * Copy one character.
+						 */
+						 *(ptr + browsed) = c;
+						 break;
+					}
+					case '&':
+					{
+						/**
+						 * Concatenate retrieved full match sub-string.
+						 * length - 1 to overwrite EOS.
+						 */
+						GetSubstring(baseIndex, ptr + browsed, textMaxLen, &length);
+						browsed += length - 1;
+						break;
+					}
+					case '`':
+					{
+						/**
+						 * Concatenate part of original text up to
+						 * first sub-string position.
+						 */
+						length = mSubStrings.at(baseIndex).start;
+						memcpy(ptr + browsed, subject, length);
+						browsed += length - 1;
+						break;
+					}
+					case '\'':
+					{
+						/**
+						 * Concatenate part of original text from
+						 * last sub-string end position to EOS.
+						 */
+						length = mSubStrings.at(baseIndex).end;
+						memcpy(ptr + browsed, subject + length, subjectLen - length);
+						browsed += (subjectLen - length) - 1;
+						break;
+					}
+					case '+':
+					{
+						/**
+						 * Copy the last group that was captured.
+						 */
+						GetSubstring(baseIndex + mMatchesSubs.at(i) - 1, ptr + browsed, textMaxLen, &length);
+						browsed += length - 1;
+						break;
+					}
+					case '_':
+					{
+						/**
+						 * Copy the entire input string.
+						 */
+						memcpy(ptr + browsed, subject, subjectLen);
+						browsed += (subjectLen - 1);
+						break;
+					}
+					case '$':
+					case '\\':
+					{
+						/**
+						 * Copy the single character $ or \.
+						 */
+						*(ptr + browsed) = c;
+						break;
+					}
+					case '0': case '1':	case '2': case '3':	case '4': 
+					case '5': case '6': case '7': case '8': case '9':
+					case '{':
+					{
+						/**
+						 * Checking backreference.
+						 * Which can be either $n, ${n} or ${name}.
+						 */
+						size_t backref = -1;
+						const char *walk = s;
+						bool inBrace = false;
+						bool nameCheck = false;
+
+						/**
+						 * ${nn}.
+						 *  ^
+						 */
+						if (*walk == '{') 
+						{
+							inBrace = true;
+							++walk;
+						}
+
+						/**
+						 * Valid number.
+						 * $nn or ${nn}
+						 *  ^       ^
+						 */
+						if (*walk >= '0' && *walk <= '9')
+						{
+							backref = *walk - '0';
+							++walk;
+						}
+						else if (inBrace)
+						{
+							nameCheck = true;
+
+							/**
+							 * Not a valid number.
+							 * Checking as string.
+							 * ${name}
+							 *   ^
+							 */
+							if (*walk)
+							{
+								const char *pch = strchr(walk, '}');
+
+								if (pch != NULL)
+								{
+									/**
+									 * A named group maximum character is 32 (PCRE).
+									 */
+									char name[32];
+									size_t nameLength = strncopy(name, walk, pch - walk + 1);
+
+									int flags, num = 0;
+									pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &flags);
+
+									/**
+									 * If PCRE_DUPNAMES is set, the pcre_copy_named_substring function should be used
+									 * as pcre_get_stringnumber output order is not defined.
+									 */
+									if (flags & PCRE_DUPNAMES)
+									{
+										memset(ovector, 0, REGEX_MAX_SUBPATTERNS);
+
+										/**
+										 * pcre_copy_named_substring needs a vector containing sub-patterns ranges
+										 * for a given match.
+										 */
+										for (size_t j = 0; j < mMatchesSubs.at(i); ++j)
+										{
+											ovector[2 * j] = mSubStrings.at(baseIndex + j).start;
+											ovector[2 * j + 1] = mSubStrings.at(baseIndex + j).end;
+										}
+
+										num = pcre_copy_named_substring(re, subject, ovector, mMatchesSubs.at(i), name, ptr + browsed, (int)textMaxLen);
+
+										if (num != PCRE_ERROR_NOSUBSTRING)
+										{
+											browsed += num - 1;
+											s = pch;
+											break;
+										}
+										++pch;
+									}
+									else
+									{
+										/**
+										 * Retrieve sub-pattern index from a give name.
+										 */
+										num = pcre_get_stringnumber(re, name);
+										if (num != PCRE_ERROR_NOSUBSTRING)
+										{
+											backref = num;
+											walk = ++pch;
+										}
+									}
+
+									if (num == PCRE_ERROR_NOSUBSTRING || num >= (int)mMatchesSubs.at(i))
+									{
+										/**
+										 * If a sub-string for a given match is not found,  or if > to
+										 * number of sub-patterns we still need to check if this 
+										 * group name is a valid one because if so we want to escape it. 
+										 * Looking at the name table.
+										 */
+										bool found = false;
+										for (size_t i = 0; i < mSubsNameTable.length(); ++i)
+										{
+											if (!mSubsNameTable.at(i).name.compare(name))
+											{
+												--browsed;
+												s = --pch;
+												found = true;
+												break;
+											}
+										}
+
+										if (found)
+										{
+											continue;
+										}
+									}
+								}
+							}
+						}
+
+						if (!nameCheck)
+						{
+							/**
+							 * Valid second number.
+							 * $nn or ${nn}
+							 *   ^       ^
+							 */
+							if (*walk && *walk >= '0' && *walk <= '9')
+							{
+								backref = backref * 10 + *walk - '0';
+								++walk;
+							}
+
+							if (inBrace)
+							{
+								/**
+								 * Invalid specifier
+								 * Either hit EOS or missing }.
+								 * ${n  or ${nn  or ${nx or ${nnx
+								 *    ^        ^       ^        ^
+								 */
+								if (*walk == '\0' || *walk != '}')
+								{
+									backref = -1;
+								}
+								else
+								{
+									++walk;
+								}
+							}
+						}
+
+						length = walk - s;
+						s = --walk;
+
+						/**
+						 * We can't provide a capture number >= to total that pcre_exec has found.
+						 * 0 is implicitly accepted, same behavior as $&.
+						 */
+						if (backref >= 0 && (int)backref < mNumSubpatterns)
+						{
+							/**
+							 * Valid available index for a given match.
+							 */
+							if (backref < mMatchesSubs.at(i))
+							{
+								/**
+								 * Concatenate retrieved sub-string.
+								 * length - 1 to overwrite EOS.
+								 */
+								GetSubstring(baseIndex + backref, ptr + browsed, textMaxLen, &length);
+								browsed += length - 1;
+							}
+							else
+							{
+								/**
+								 * Valid unavailable index for a given match.
+								 */
+								--browsed;
+							}
+						}
+						else
+						{
+							/**
+							 * If we here it means the syntax is valid but sub-pattern doesn't exist. 
+							 * So, copy as it is, including $.
+							 */
+							memcpy(ptr + browsed, s - length, length + 1);
+							browsed += length;
+						}
+
+						break;
+					}
+					default:
+					{
+						/**
+						 * Not a valid format modifier.
+						 * So we copy characters as it is.
+						 */
+						*(ptr + browsed) = *s;
+						break;
+					}
+				}
+			}
+			else
+			{
+				/**
+				 * At this point, direct copy.
+				 */
+				*(ptr + browsed) = c;
+			}
+		}
+
+		*(ptr + browsed) = '\0';
+
+		/**
+		 * Concatenate only replace string of each match, 
+		 * as we don't want to copy unmatched characters.
+		 */
+		if (flags & REGEX_FORMAT_NOCOPY)
+		{
+			/**
+			 * We want just the first occurrence.
+			 */
+			if (total++ && (flags & REGEX_FORMAT_FIRSTONLY))
+			{
+				break;
+			}
+
+			strncat(output, toReplace, textMaxLen + 1);
+		}
+		else
+		{
+			/**
+			 * Retrieves full string of a given match.
+			 */
+			const char *search = GetSubstring(baseIndex, toSearch, textMaxLen, &searchLen);
+
+			/**
+			 * We get something to replace, but the sub-pattern to search is empty.
+			 * We insert replacement either a the start end or string.
+			 */
+			if (*toReplace && !searchLen)
+			{
+				if (output - text > 0)
+				{
+					strncat(output, toReplace, textMaxLen);
+				}
+				else
+				{
+					strncat(toReplace, text, textMaxLen);
+					strncopy(text, toReplace, strlen(toReplace) + 1);
+				}
+
+				++total;
+			}
+			else if ((output = UTIL_ReplaceEx(text + mSubStrings.at(baseIndex).start + diffLength, textMaxLen, search, searchLen, toReplace, browsed, false)) != NULL)
+			{
+				/**
+				 * Then we simply do a replace.
+				 * Probably not the most efficient, but this should be at least safe.
+				 * To avoid issue where the function could find a string which is not at the expected index,
+				 * We force the input string to start from index of the full match.
+				 */
+				++total;
+			}
+
+			if (total && (flags & REGEX_FORMAT_FIRSTONLY))
+			{
+				break;
+			}
+		}
+
+		/**
+		 * mMatchesSubs is a flat list containing all sub-patterns of all matches.
+		 * A number of sub-patterns can vary per match. So we calculate the position in the list, 
+		 * from where the first sub-pattern result of current match starts.
+		 */
+		baseIndex  += mMatchesSubs.at(i);
+		diffLength += browsed - searchLen;
+	}
+
+	delete[] toReplace;
+	
+	if (toSearch != NULL)
+	{
+		delete[] toSearch;
+	}
+
+	/**
+	 * Return the number of successful replacements.
+	 */
+	return total;
+}
--- a/dlls/regex/CRegEx.h
+++ b/dlls/regex/CRegEx.h
@@ -34,6 +34,19 @@
 #define _INCLUDE_CREGEX_H
 
 #include <am-vector.h>
+#include <am-string.h>
+
+/**
+ * Maximum number of sub-patterns, here 50 (this should be a multiple of 3).
+ */
+#define REGEX_MAX_SUBPATTERNS 150
+
+/**
+ * Flags to used with regex_replace, to control the replacement behavior.
+ */
+#define REGEX_FORMAT_DEFAULT   0  // Uses the standard formatting rules to replace matches.
+#define REGEX_FORMAT_NOCOPY    1  // The sections that do not match the regular expression are not copied when replacing matches.
+#define REGEX_FORMAT_FIRSTONLY 2  // Only the first occurrence of a regular expression is replaced.

 class RegEx
 {
@@ -42,6 +55,11 @@ public:
 		int start, end;
 	};

+	struct NamedGroup {
+		ke::AString name;
+		size_t index;
+	};
+
 	RegEx();
 	~RegEx();

@@ -52,8 +70,11 @@ public:
 	int Compile(const char *pattern, int iFlags);
 	int Match(const char *str);
 	int MatchAll(const char *str);
+	int Replace(char *text, size_t text_maxlen, const char *replace, size_t replaceLen, int flags = 0);
 	void ClearMatch();
-	const char *GetSubstring(int s, char buffer[], int max);
+	const char *GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen = NULL);
+	const char *GetSubstring(size_t start, size_t end, char buffer[], size_t max, size_t *outlen = NULL);
+	void MakeSubpatternsTable(int numSubpatterns);

 public:
 	int mErrorOffset;
@@ -63,9 +84,12 @@ public:
 private:
 	pcre *re;
 	bool mFree;
-	int ovector[30];
+	int ovector[REGEX_MAX_SUBPATTERNS];
 	char *subject;
 	ke::Vector<RegExSub> mSubStrings;
+	ke::Vector<size_t> mMatchesSubs;
+	ke::Vector<NamedGroup> mSubsNameTable;
+	int mNumSubpatterns;
 };

 #endif //_INCLUDE_CREGEX_H
--- a/dlls/regex/module.cpp
+++ b/dlls/regex/module.cpp
@@ -153,6 +153,8 @@ cell match(AMX *amx, cell *params, bool all)
 	else 
 	{
 		*errorCode = x->Count();
+		if (all)
+			return x->Count();
 	}

 	return id + 1;
@@ -272,6 +274,43 @@ static cell AMX_NATIVE_CALL regex_free(AMX *amx, cell *params)
 	return 1;
 }

+//native regex_replace(Regex:pattern, string[], maxLen, const replace[], flags = REGEX_FORMAT_DEFAULT, &errcode = 0);
+static cell AMX_NATIVE_CALL regex_replace(AMX *amx, cell *params)
+{
+	int id = params[1] - 1;
+	if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
+	{
+		MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id);
+		return 0;
+	}
+
+	int textLen, replaceLen;
+	char *text = MF_GetAmxString(amx, params[2], 0, &textLen);
+	const char *replace = MF_GetAmxString(amx, params[4], 1, &replaceLen);
+
+	cell *erroCode = MF_GetAmxAddr(amx, params[6]);
+
+	RegEx *x = PEL[id]; 
+	int e = x->Replace(text, params[3] + 1, replace, replaceLen, params[5]);
+
+	if (e == -1)
+	{
+		*erroCode = x->mErrorOffset;
+		x->ClearMatch();
+		return -2;
+	}
+	else if (e == 0)
+	{
+		*erroCode = 0;
+		x->ClearMatch();
+		return 0;
+	}
+
+	MF_SetAmxString(amx, params[2], text, params[3]);
+
+	return e;
+}
+
 AMX_NATIVE_INFO regex_Natives[] = {
 	{"regex_compile",			regex_compile},
 	{"regex_compile_ex",		regex_compile_ex},
@@ -280,6 +319,7 @@ AMX_NATIVE_INFO regex_Natives[] = {
 	{"regex_match_all",			regex_match_all},
 	{"regex_match_all_c",		regex_match_all_c},
 	{"regex_substr",			regex_substr},
+	{"regex_replace",			regex_replace},
 	{"regex_free",				regex_free},
 	{NULL,						NULL},
 };
--- a/dlls/regex/msvc10/regex.vcxproj
+++ b/dlls/regex/msvc10/regex.vcxproj
@@ -99,6 +99,7 @@
    <ClCompile Include="..\CRegEx.cpp" />
    <ClCompile Include="..\module.cpp" />
    <ClCompile Include="..\sdk\amxxmodule.cpp" />
+    <ClCompile Include="..\utils.cpp" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\CRegEx.h" />
@@ -107,6 +108,7 @@
    <ClInclude Include="..\sdk\moduleconfig.h" />
    <ClInclude Include="..\sdk\CVector.h" />
    <ClInclude Include="..\sdk\amxxmodule.h" />
+    <ClInclude Include="..\utils.h" />
  </ItemGroup>
  <ItemGroup>
    <None Include="..\..\..\plugins\include\regex.inc" />
--- a/dlls/regex/msvc10/regex.vcxproj.filters
+++ b/dlls/regex/msvc10/regex.vcxproj.filters
@@ -32,6 +32,9 @@
    <ClCompile Include="..\sdk\amxxmodule.cpp">
      <Filter>Module SDK\SDK Base</Filter>
    </ClCompile>
+    <ClCompile Include="..\utils.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\CRegEx.h">
@@ -52,6 +55,9 @@
    <ClInclude Include="..\sdk\amxxmodule.h">
      <Filter>Module SDK\SDK Base</Filter>
    </ClInclude>
+    <ClInclude Include="..\utils.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <None Include="..\..\..\plugins\include\regex.inc">
--- a/dlls/regex/utils.cpp
+++ b/dlls/regex/utils.cpp
@@ -0,0 +1,236 @@
+
+#include "amxxmodule.h"
+#include <string.h>
+#include "utils.h"
+
+int UTIL_CheckValidChar(char *c)
+{
+	int count;
+	int bytecount = 0;
+
+	for (count = 1; (*c & 0xC0) == 0x80; count++)
+	{
+		c--;
+	}
+
+	switch (*c & 0xF0)
+	{
+	case 0xC0:
+	case 0xD0:
+	{
+				 bytecount = 2;
+				 break;
+	}
+	case 0xE0:
+	{
+				 bytecount = 3;
+				 break;
+	}
+	case 0xF0:
+	{
+				 bytecount = 4;
+				 break;
+	}
+	}
+
+	if (bytecount != count)
+	{
+		return count;
+	}
+
+	return 0;
+}
+
+unsigned int strncopy(char *dest, const char *src, size_t count)
+{
+	if (!count)
+	{
+		return 0;
+	}
+
+	char *start = dest;
+	while ((*src) && (--count))
+	{
+		*dest++ = *src++;
+	}
+	*dest = '\0';
+
+	return (dest - start);
+}
+
+/**
+* NOTE: Do not edit this for the love of god unless you have
+* read the test cases and understand the code behind each one.
+* While I don't guarantee there aren't mistakes, I do guarantee
+* that plugins will end up relying on tiny idiosyncrasies of this
+* function, just like they did with AMX Mod X.
+*
+* There are explicitly more cases than the AMX Mod X version because
+* we're not doing a blind copy.  Each case is specifically optimized
+* for what needs to be done.  Even better, we don't have to error on
+* bad buffer sizes.  Instead, this function will smartly cut off the
+* string in a way that pushes old data out.
+*/
+char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive)
+{
+	char *ptr = subject;
+	size_t browsed = 0;
+	size_t textLen = strlen(subject);
+
+	/* It's not possible to search or replace */
+	if (searchLen > textLen)
+	{
+		return NULL;
+	}
+
+	/* Handle the case of one byte replacement.
+	* It's only valid in one case.
+	*/
+	if (maxLen == 1)
+	{
+		/* If the search matches and the replace length is 0,
+		* we can just terminate the string and be done.
+		*/
+		if ((caseSensitive ? strcmp(subject, search) : stricmp(subject, search)) == 0 && replaceLen == 0)
+		{
+			*subject = '\0';
+			return subject;
+		}
+		else
+		{
+			return NULL;
+		}
+	}
+
+	/* Subtract one off the maxlength so we can include the null terminator */
+	maxLen--;
+
+	while (*ptr != '\0' && (browsed <= textLen - searchLen))
+	{
+		/* See if we get a comparison */
+		if ((caseSensitive ? strncmp(ptr, search, searchLen) : strnicmp(ptr, search, searchLen)) == 0)
+		{
+			if (replaceLen > searchLen)
+			{
+				/* First, see if we have enough space to do this operation */
+				if (maxLen - textLen < replaceLen - searchLen)
+				{
+					/* First, see if the replacement length goes out of bounds. */
+					if (browsed + replaceLen >= maxLen)
+					{
+						/* EXAMPLE CASE:
+						* Subject: AABBBCCC
+						* Buffer : 12 bytes
+						* Search : BBB
+						* Replace: DDDDDDDDDD
+						* OUTPUT : AADDDDDDDDD
+						* POSITION:           ^
+						*/
+						/* If it does, we'll just bound the length and do a strcpy. */
+						replaceLen = maxLen - browsed;
+
+						/* Note, we add one to the final result for the null terminator */
+						strncopy(ptr, replace, replaceLen + 1);
+
+						/* Don't truncate a multi-byte character */
+						if (*(ptr + replaceLen - 1) & 1 << 7)
+						{
+							replaceLen -= UTIL_CheckValidChar(ptr + replaceLen - 1);
+							*(ptr + replaceLen) = '\0';
+						}
+					}
+					else
+					{
+						/* EXAMPLE CASE:
+						* Subject: AABBBCCC
+						* Buffer : 12 bytes
+						* Search : BBB
+						* Replace: DDDDDDD
+						* OUTPUT : AADDDDDDDCC
+						* POSITION:         ^
+						*/
+						/* We're going to have some bytes left over... */
+						size_t origBytesToCopy = (textLen - (browsed + searchLen)) + 1;
+						size_t realBytesToCopy = (maxLen - (browsed + replaceLen)) + 1;
+						char *moveFrom = ptr + searchLen + (origBytesToCopy - realBytesToCopy);
+						char *moveTo = ptr + replaceLen;
+
+						/* First, move our old data out of the way. */
+						memmove(moveTo, moveFrom, realBytesToCopy);
+
+						/* Now, do our replacement. */
+						memcpy(ptr, replace, replaceLen);
+					}
+				}
+				else
+				{
+					/* EXAMPLE CASE:
+					* Subject: AABBBCCC
+					* Buffer : 12 bytes
+					* Search : BBB
+					* Replace: DDDD
+					* OUTPUT : AADDDDCCC
+					* POSITION:      ^
+					*/
+					/* Yes, we have enough space.  Do a normal move operation. */
+					char *moveFrom = ptr + searchLen;
+					char *moveTo = ptr + replaceLen;
+
+					/* First move our old data out of the way. */
+					size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
+					memmove(moveTo, moveFrom, bytesToCopy);
+
+					/* Now do our replacement. */
+					memcpy(ptr, replace, replaceLen);
+				}
+			}
+			else if (replaceLen < searchLen)
+			{
+				/* EXAMPLE CASE:
+				* Subject: AABBBCCC
+				* Buffer : 12 bytes
+				* Search : BBB
+				* Replace: D
+				* OUTPUT : AADCCC
+				* POSITION:   ^
+				*/
+				/* If the replacement does not grow the string length, we do not
+				* need to do any fancy checking at all.  Yay!
+				*/
+				char *moveFrom = ptr + searchLen;		/* Start after the search pointer */
+				char *moveTo = ptr + replaceLen;		/* Copy to where the replacement ends */
+
+				/* Copy our replacement in, if any */
+				if (replaceLen)
+				{
+					memcpy(ptr, replace, replaceLen);
+				}
+
+				/* Figure out how many bytes to move down, including null terminator */
+				size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
+
+				/* Move the rest of the string down */
+				memmove(moveTo, moveFrom, bytesToCopy);
+			}
+			else
+			{
+				/* EXAMPLE CASE:
+				* Subject: AABBBCCC
+				* Buffer : 12 bytes
+				* Search : BBB
+				* Replace: DDD
+				* OUTPUT : AADDDCCC
+				* POSITION:     ^
+				*/
+				/* We don't have to move anything around, just do a straight copy */
+				memcpy(ptr, replace, replaceLen);
+			}
+
+			return ptr + replaceLen;
+		}
+		ptr++;
+		browsed++;
+	}
+
+	return NULL;
+}
--- a/dlls/regex/utils.h
+++ b/dlls/regex/utils.h
@@ -0,0 +1,8 @@
+#ifndef UTILS_H
+#define UTILS_H
+
+int UTIL_CheckValidChar(char *c);
+char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive);
+unsigned int strncopy(char *dest, const char *src, size_t count);
+
+#endif // UTILS_H