Add new string natives/stocks, make some UTF-8 safe (bug 6110, r=ds)

2014-04-30 09:33:03 +02:00
parent c99a518ba4
commit a86ca1491f
12 changed files with 1560 additions and 161 deletions
--- a/amxmodx/amxmodx.cpp
+++ b/amxmodx/amxmodx.cpp
@@ -234,8 +234,9 @@ static cell AMX_NATIVE_CALL console_print(AMX *amx, cell *params) /* 2 param */
 		if (len > 254)
 		{
 			len = 254;
-			if (((message[len - 1] & 0xFF) >= 0xC2) && ((message[len - 1] & 0xFF) <= 0xEF)) {       // Don't truncate a double-byte character
-				len--;
+			if ((message[len - 1] & 1 << 7))
+			{
+				len -= UTIL_CheckValidChar(message + len - 1); // Don't truncate a multi-byte character
 			}
 		}
 		message[len++] = '\n';
@@ -252,9 +253,9 @@ static cell AMX_NATIVE_CALL console_print(AMX *amx, cell *params) /* 2 param */
 			if (len > 126)	// Client console truncates after byte 127. (126 + \n = 127)
 			{
 				len = 126;
-				if (((message[len - 1] & 0xFF) >= 0xC2) && ((message[len - 1] & 0xFF) <= 0xEF))	// Don't truncate a double-byte character
+				if ((message[len - 1] & 1 << 7))
 				{
-					len--;
+					len -= UTIL_CheckValidChar(message + len - 1); // Don't truncate a multi-byte character
 				}
 			}
 			message[len++] = '\n';      // Client expects newline from the server
@@ -289,9 +290,9 @@ static cell AMX_NATIVE_CALL client_print(AMX *amx, cell *params) /* 3 param */
 				if (((params[2] == 1) || (params[2] == 2)) && (len > 126))	// Client console truncates after byte 127. (126 + \n = 127)
 				{
 					len = 126;
-					if (((msg[len - 1] & 0xFF) >= 0xC2) && ((msg[len - 1] & 0xFF) <= 0xEF))	// Don't truncate a double-byte character
+					if ((msg[len - 1] & 1 << 7))
 					{
-						len--;
+						len -= UTIL_CheckValidChar(msg + len - 1); // Don't truncate a multi-byte character
 					}
 				}
 				msg[len++] = '\n';	// Client expects newline from the server
@@ -323,9 +324,9 @@ static cell AMX_NATIVE_CALL client_print(AMX *amx, cell *params) /* 3 param */
 			if (((params[2] == 1) || (params[2] == 2)) && (len > 126))	// Client console truncates after byte 127. (126 + \n = 127)
 			{
 				len = 126;
-				if (((msg[len - 1] & 0xFF) >= 0xC2) && ((msg[len - 1] & 0xFF) <= 0xEF))       // Don't truncate a double-byte character
+				if ((msg[len - 1] & 1 << 7))
 				{
-					len--;
+					len -= UTIL_CheckValidChar(msg + len - 1); // Don't truncate a multi-byte character
 				}
 			}
 			msg[len++] = '\n';	// Client expects newline from the server
@@ -370,6 +371,16 @@ static cell AMX_NATIVE_CALL client_print_color(AMX *amx, cell *params) /* 3 para
 			{
 				g_langMngr.SetDefLang(i);
 				msg = format_amxstring(amx, params, 3, len);
+
+				if (len > 190)	// Server crashes after byte 190. (190 + \n = 191)
+				{
+					len = 190;
+					if ((msg[len - 1] & 1 << 7))
+					{
+						len -= UTIL_CheckValidChar(msg + len - 1); // Don't truncate a multi-byte character
+					}
+				}
+
 				msg[len++] = '\n';
 				msg[len] = 0;

@@ -392,6 +403,16 @@ static cell AMX_NATIVE_CALL client_print_color(AMX *amx, cell *params) /* 3 para
 			g_langMngr.SetDefLang(index);

 			msg = format_amxstring(amx, params, 3, len);
+
+			if (len > 190)	// Server crashes after byte 190. (190 + \n = 191)
+			{
+				len = 190;
+				if ((msg[len - 1] & 1 << 7))
+				{
+					len -= UTIL_CheckValidChar(msg + len - 1); // Don't truncate a multi-byte character
+				}
+			}
+
 			msg[len++] = '\n';
 			msg[len] = 0;
 		
@@ -663,7 +684,9 @@ static cell AMX_NATIVE_CALL get_user_name(AMX *amx, cell *params) /* 3 param */
 {
 	int index = params[1];
 	
-	return set_amxstring(amx, params[2], (index < 1 || index > gpGlobals->maxClients) ? hostname->string : g_players[index].name.c_str(), params[3]);
+	return set_amxstring_utf8(amx, params[2], (index < 1 || index > gpGlobals->maxClients) ? 
+			hostname->string : 
+			g_players[index].name.c_str(), g_players[index].name.size(), params[3] + 1);
 }

 static cell AMX_NATIVE_CALL get_user_index(AMX *amx, cell *params) /* 1 param */
@@ -1597,8 +1620,8 @@ static cell AMX_NATIVE_CALL get_concmd(AMX *amx, cell *params) /* 7 param */
 	if (cmd == 0)
 		return 0;
 	
-	set_amxstring(amx, params[2], cmd->getCmdLine(), params[3]);
-	set_amxstring(amx, params[5], cmd->getCmdInfo(), params[6]);
+	set_amxstring_utf8(amx, params[2], cmd->getCmdLine(), strlen(cmd->getCmdLine()), params[3] + 1); // + EOS
+	set_amxstring_utf8(amx, params[5], cmd->getCmdInfo(), strlen(cmd->getCmdInfo()), params[6] + 1); // + EOS
 	cell *cpFlags = get_amxaddr(amx, params[4]);
 	*cpFlags = cmd->getFlags();
 	
@@ -1634,8 +1657,8 @@ static cell AMX_NATIVE_CALL get_clcmd(AMX *amx, cell *params) /* 7 param */
 	if (cmd == 0)
 		return 0;

-	set_amxstring(amx, params[2], cmd->getCmdLine(), params[3]);
-	set_amxstring(amx, params[5], cmd->getCmdInfo(), params[6]);
+	set_amxstring_utf8(amx, params[2], cmd->getCmdLine(), strlen(cmd->getCmdLine()), params[3] + 1); // + EOS
+	set_amxstring_utf8(amx, params[5], cmd->getCmdInfo(), strlen(cmd->getCmdInfo()), params[6] + 1); // + EOS
 	cell *cpFlags = get_amxaddr(amx, params[4]);
 	*cpFlags = cmd->getFlags();

@@ -1649,8 +1672,8 @@ static cell AMX_NATIVE_CALL get_srvcmd(AMX *amx, cell *params)
 	if (cmd == 0)
 		return 0;
 	
-	set_amxstring(amx, params[2], cmd->getCmdLine(), params[3]);
-	set_amxstring(amx, params[5], cmd->getCmdInfo(), params[6]);
+	set_amxstring_utf8(amx, params[2], cmd->getCmdLine(), strlen(cmd->getCmdLine()), params[3] + 1); // + EOS
+	set_amxstring_utf8(amx, params[5], cmd->getCmdInfo(), strlen(cmd->getCmdInfo()), params[6] + 1); // + EOS
 	cell *cpFlags = get_amxaddr(amx, params[4]);
 	*cpFlags = cmd->getFlags();
 	
@@ -1891,7 +1914,7 @@ static cell AMX_NATIVE_CALL get_pcvar_string(AMX *amx, cell *params)
 		return 0;
 	}

-	return set_amxstring(amx, params[2], ptr->string ? ptr->string : "", params[3]);
+	return set_amxstring_utf8(amx, params[2], ptr->string ? ptr->string : "", ptr->string ? strlen(ptr->string) : 0, params[3] + 1); // EOS
 }

 static cell AMX_NATIVE_CALL get_cvar_string(AMX *amx, cell *params) /* 3 param */
@@ -1912,7 +1935,8 @@ static cell AMX_NATIVE_CALL get_cvar_string(AMX *amx, cell *params) /* 3 param *
 		}
 	}
 	
-	return set_amxstring(amx, params[2], CVAR_GET_STRING(sptemp), params[3]);
+	const char *value = CVAR_GET_STRING(sptemp);
+	return set_amxstring_utf8(amx, params[2], value, strlen(value), params[3] + 1); // + EOS
 }

 static cell AMX_NATIVE_CALL get_pcvar_float(AMX *amx, cell *params)
@@ -2168,9 +2192,9 @@ static cell AMX_NATIVE_CALL format_time(AMX *amx, cell *params) /* 3 param */
 	}
 	
 	char szDate[512];
-	strftime(szDate, 511, sptemp, lt);
+	ilen = strftime(szDate, 511, sptemp, lt); // Returns length, including null-character.
 	
-	return set_amxstring(amx, params[1], szDate, params[2]);
+	return set_amxstring_utf8(amx, params[1], szDate, ilen - 1, params[2] + 1); // + EOS

 }

@@ -2235,7 +2259,8 @@ static cell AMX_NATIVE_CALL read_data(AMX *amx, cell *params) /* 3 param */
 		case 1:
 			return g_events.getArgInteger(params[1]);
 		case 3:
-			return set_amxstring(amx, params[2], g_events.getArgString(params[1]), *get_amxaddr(amx, params[3]));
+			return set_amxstring_utf8(amx, params[2], g_events.getArgString(params[1]), 
+				strlen(g_events.getArgString(params[1])),*get_amxaddr(amx, params[3]) + 1); // + EOS
 		default:
 			cell *fCell = get_amxaddr(amx, params[2]);
 			REAL fparam = (REAL)g_events.getArgFloat(params[1]);
@@ -2438,7 +2463,8 @@ static cell AMX_NATIVE_CALL get_localinfo(AMX *amx, cell *params) /* 3 param */
 	int ilen;
 	char* sptemp = get_amxstring(amx, params[1], 0, ilen);
 	
-	return set_amxstring(amx, params[2], LOCALINFO(sptemp), params[3]);
+	char *value = LOCALINFO(sptemp);
+	return set_amxstring_utf8(amx, params[2], value, strlen(value), params[3] + 1); //  + EOS
 }

 static cell AMX_NATIVE_CALL set_localinfo(AMX *amx, cell *params) /* 2 param */
@@ -2511,14 +2537,15 @@ static cell AMX_NATIVE_CALL read_argc(AMX *amx, cell *params)

 static cell AMX_NATIVE_CALL read_argv(AMX *amx, cell *params) /* 3 param */
 {
-	return set_amxstring(amx, params[2], /*(params[1] < 0 ||
-	params[1] >= CMD_ARGC()) ? "" : */CMD_ARGV(params[1]), params[3]);
+	const char *value = CMD_ARGV(params[1]);
+	return set_amxstring_utf8(amx, params[2], /*(params[1] < 0 ||
+	params[1] >= CMD_ARGC()) ? "" : */value, strlen(value), params[3] + 1); // + EOS
 }

 static cell AMX_NATIVE_CALL read_args(AMX *amx, cell *params) /* 2 param */
 {
 	const char* sValue = CMD_ARGS();
-	return set_amxstring(amx, params[1], sValue ? sValue : "", params[2]);
+	return set_amxstring_utf8(amx, params[1], sValue ? sValue : "", sValue ? strlen(sValue) : 0, params[2] + 1); // +EOS
 }

 static cell AMX_NATIVE_CALL get_user_msgid(AMX *amx, cell *params) /* 1 param */
@@ -3210,7 +3237,8 @@ static cell AMX_NATIVE_CALL force_unmodified(AMX *amx, cell *params)

 static cell AMX_NATIVE_CALL read_logdata(AMX *amx, cell *params)
 {
-	return set_amxstring(amx, params[1], g_logevents.getLogString(), params[2]);
+	const char *value = g_logevents.getLogString();
+	return set_amxstring_utf8(amx, params[1], value, strlen(value), params[2] + 1); // + EOS
 }

 static cell AMX_NATIVE_CALL read_logargc(AMX *amx, cell *params)
@@ -3220,7 +3248,8 @@ static cell AMX_NATIVE_CALL read_logargc(AMX *amx, cell *params)

 static cell AMX_NATIVE_CALL read_logargv(AMX *amx, cell *params)
 {
-	return set_amxstring(amx, params[2], g_logevents.getLogArg(params[1]), params[3]);
+	const char *value = g_logevents.getLogArg(params[1]);
+	return set_amxstring_utf8(amx, params[2], value, strlen(value), params[3] + 1); // + EOS
 }

 static cell AMX_NATIVE_CALL parse_loguser(AMX *amx, cell *params)
@@ -3504,9 +3533,13 @@ static cell AMX_NATIVE_CALL get_module(AMX *amx, cell *params)
 	if ((*moduleIter).isAmxx()) 	 
 	{ 	 
 		const amxx_module_info_s *info = (*moduleIter).getInfoNew();
-		set_amxstring(amx, params[2], info && info->name ? info->name : "unk", params[3]); 	 
-		set_amxstring(amx, params[4], info && info->author ? info->author : "unk", params[5]); 	 
-		set_amxstring(amx, params[6], info && info->version ? info->version : "unk", params[7]); 	 
+		const char *name = info && info->name ? info->name : "unk";
+		const char *author = info && info->author ? info->author : "unk";
+		const char *version = info && info->version ? info->version : "unk";
+
+		set_amxstring_utf8(amx, params[2], name, strlen(name), params[3]  + 1); // + EOS
+		set_amxstring_utf8(amx, params[4], author, strlen(author), params[5] + 1); // + EOS
+		set_amxstring_utf8(amx, params[6], version, strlen(version), params[7] + 1); // + EOS
 	}

 	// compatibility problem possible
--- a/amxmodx/amxmodx.h
+++ b/amxmodx/amxmodx.h
@@ -147,6 +147,11 @@ void UTIL_ShowMenu(edict_t* pEntity, int slots, int time, char *menu, int mlen);
 void UTIL_ClientSayText(edict_t *pEntity, int sender, char *msg);
 void UTIL_TeamInfo(edict_t *pEntity, int playerIndex, const char *pszTeamName);

+template <typename D>
+int UTIL_CheckValidChar(D *c);
+unsigned int UTIL_GetUTF8CharBytes(const char *stream);
+unsigned int UTIL_ReplaceAll(char *subject, size_t maxlength, const char *search, const char *replace, bool caseSensitive);
+char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive);
 char *UTIL_VarArgs(const char *fmt, ...);


@@ -294,6 +299,7 @@ int amxstring_len(cell* cstr);
 int load_amxscript(AMX* amx, void** program, const char* path, char error[64], int debug);
 int set_amxnatives(AMX* amx, char error[64]);
 int set_amxstring(AMX *amx, cell amx_addr, const char *source, int max);
+int set_amxstring_utf8(AMX *amx, cell amx_addr, const char *source, size_t sourcelen, size_t maxlen);
 int unload_amxscript(AMX* amx, void** program);

 void copy_amxmemory(cell* dest, cell* src, int len);
--- a/amxmodx/datapacks.cpp
+++ b/amxmodx/datapacks.cpp
@@ -137,13 +137,14 @@ static cell AMX_NATIVE_CALL ReadPackString(AMX* amx, cell* params)
 	}

 	const char *str;
-	if (!(str = d->ReadString(NULL)))
+	size_t len;
+	if (!(str = d->ReadString(&len)))
 	{
 		LogError(amx, AMX_ERR_NATIVE, "DataPack operation is out of bounds.");
 		return 0;
 	}

-	return set_amxstring(amx, params[2], str, params[3]);
+	return set_amxstring_utf8(amx, params[2], str, len, params[3] + 1); // + EOS
 }

 static cell AMX_NATIVE_CALL ResetPack(AMX* amx, cell* params)
--- a/amxmodx/datastructs.h
+++ b/amxmodx/datastructs.h
@@ -163,6 +163,13 @@ public:
 			*out='\0';
 		}

+		/* Don't truncate a multi-byte character */
+		if (*(output - 1) & 1 << 7)
+		{
+			size = UTIL_CheckValidChar(output - 1);
+			*(output - size) = '\0';
+		}
+
 		return 1;
 	}
 	// Returns 1 on success
--- a/amxmodx/file.cpp
+++ b/amxmodx/file.cpp
@@ -169,7 +169,7 @@ static cell AMX_NATIVE_CALL read_file(AMX *amx, cell *params) /* 5 param */
 			buffor[--len] = 0;
 		
 		cell *length = get_amxaddr(amx, params[5]);
-		*length = set_amxstring(amx, params[3], buffor, params[4]);
+		*length = set_amxstring_utf8(amx, params[3], buffor, len, params[4] + 1); // + EOS
 		
 		return i;
 	}
@@ -583,7 +583,7 @@ static cell AMX_NATIVE_CALL amx_fgets(AMX *amx, cell *params)
 	static char buffer[4096];
 	buffer[0] = '\0';
 	fgets(buffer, sizeof(buffer)-1, fp);
-	return set_amxstring(amx, params[2], buffer, params[3]);
+	return set_amxstring_utf8(amx, params[2], buffer, strlen(buffer), params[3] + 1); // + EOS
 }

 static cell AMX_NATIVE_CALL amx_fseek(AMX *amx, cell *params)
--- a/amxmodx/format.cpp
+++ b/amxmodx/format.cpp
@@ -149,6 +149,12 @@ void AddString(U **buf_p, size_t &maxlen, const cell *string, int width, int pre
 	if (size > (int)maxlen)
 		size = maxlen;

+	/* If precision is provided, make sure we don't truncate a multi-byte character */
+	if (prec >= size && (string[size - 1] & 1 << 7))
+	{
+		size -= UTIL_CheckValidChar((cell *)string + size - 1);
+	}
+
 	maxlen -= size;
 	width -= size;

@@ -286,6 +292,58 @@ void AddFloat(U **buf_p, size_t &maxlen, double fval, int width, int prec, int f
 	*buf_p = buf;
 }

+template <typename U>
+void AddBinary(U **buf_p, size_t &maxlen, unsigned int val, int width, int flags)
+{
+	char text[32];
+	int digits;
+	U *buf;
+
+	digits = 0;
+	do
+	{
+		if (val & 1)
+		{
+			text[digits++] = '1';
+		}
+		else
+		{
+			text[digits++] = '0';
+		}
+		val >>= 1;
+	} while (val);
+
+	buf = *buf_p;
+
+	if (!(flags & LADJUST))
+	{
+		while (digits < width && maxlen)
+		{
+			*buf++ = (flags & ZEROPAD) ? '0' : ' ';
+			width--;
+			maxlen--;
+		}
+	}
+
+	while (digits-- && maxlen)
+	{
+		*buf++ = text[digits];
+		width--;
+		maxlen--;
+	}
+
+	if (flags & LADJUST)
+	{
+		while (width-- && maxlen)
+		{
+			*buf++ = (flags & ZEROPAD) ? '0' : ' ';
+			maxlen--;
+		}
+	}
+
+	*buf_p = buf;
+}
+
 template <typename U>
 void AddUInt(U **buf_p, size_t &maxlen, unsigned int val, int width, int flags)
 {
@@ -527,6 +585,11 @@ reswitch:
 			llen--;
 			arg++;
 			break;
+		case 'b':
+			CHECK_ARGS(0);
+			AddBinary(&buf_p, llen, *get_amxaddr(amx, params[arg]), width, flags);
+			arg++;
+			break;
 		case 'd':
 		case 'i':
 			CHECK_ARGS(0);
@@ -635,6 +698,14 @@ break_to_normal_string:
 done:
 	*buf_p = static_cast<D>(0);
 	*param = arg;
+
+	/* if max buffer length consumed, make sure we don't truncate a multi-byte character */
+	if (llen <= 0 && *(buf_p - 1) & 1 << 7)
+	{
+		llen += UTIL_CheckValidChar(buf_p - 1);
+		*(buf_p - llen) = static_cast<D>(0);
+	}
+
 	return maxlen-llen;
 }

--- a/amxmodx/string.cpp
+++ b/amxmodx/string.cpp
@@ -103,13 +103,44 @@ int set_amxstring(AMX *amx, cell amx_addr, const char *source, int max)
 #endif
 	
 	while (max-- && *source)
-		*dest++ = (cell)*source++;
+		*dest++ = (unsigned char)*source++;
 	
 	*dest = 0;
 	
 	return dest - start;
 }

+int set_amxstring_utf8(AMX *amx, cell amx_addr, const char *source, size_t sourcelen, size_t maxlen)
+{
+	size_t len = sourcelen;
+	bool needtocheck = false;
+
+	register cell* dest = (cell *)(amx->base + (int)(((AMX_HEADER *)amx->base)->dat + amx_addr));
+	register cell* start = dest;
+
+	if (len >= maxlen)
+	{
+		len = maxlen - 1;
+		needtocheck = true;
+	}
+
+	maxlen = len;
+
+	while (maxlen-- && *source)
+	{
+		*dest++ = *(unsigned char*)source++;
+	}
+
+	if (needtocheck && (start[len - 1] & 1 << 7))
+	{
+		len -= UTIL_CheckValidChar(start + len - 1);
+	}
+
+	start[len] = '\0';
+
+	return len;
+}
+
 extern "C" size_t get_amxstring_r(AMX *amx, cell amx_addr, char *destination, int maxlen)
 {
 	register cell *source = (cell *)(amx->base + (int)(((AMX_HEADER *)amx->base)->dat + amx_addr));
@@ -287,6 +318,62 @@ static cell AMX_NATIVE_CALL replace(AMX *amx, cell *params) /* 4 param */
 	return 0;
 }

+static cell AMX_NATIVE_CALL replace_string(AMX *amx, cell *params)
+{
+	int len;
+	size_t maxlength = (size_t)params[2];
+
+	char *text = get_amxstring(amx, params[1], 0, len);
+	const char *search = get_amxstring(amx, params[3], 1, len);
+	const char *replace = get_amxstring(amx, params[4], 2, len);
+
+	bool caseSensitive = params[5] ? true : false;
+
+	if (search[0] == '\0')
+	{
+		LogError(amx, AMX_ERR_NATIVE, "Cannot replace searches of empty strings.");
+		return -1;
+	}
+
+	int count = UTIL_ReplaceAll(text, maxlength + 1, search, replace, caseSensitive); // + EOS
+
+	set_amxstring(amx, params[1], text, maxlength);
+
+	return count;
+}
+
+static cell AMX_NATIVE_CALL replace_stringex(AMX *amx, cell *params)
+{
+	int len;
+	size_t maxlength = (size_t)params[2];
+
+	char *text = get_amxstring(amx, params[1], 0, len);
+	const char *search = get_amxstring(amx, params[3], 1, len);
+	const char *replace = get_amxstring(amx, params[4], 2, len);
+
+	size_t searchLen = (params[5] == -1) ? strlen(search) : (size_t)params[5];
+	size_t replaceLen = (params[6] == -1) ? strlen(replace) : (size_t)params[6];
+
+	bool caseSensitive = params[7] ? true : false;
+
+	if (searchLen == 0)
+	{
+		LogError(amx, AMX_ERR_NATIVE, "Cannot replace searches of empty strings.");
+		return -1;
+	}
+
+	char *ptr = UTIL_ReplaceEx(text, maxlength + 1, search, searchLen, replace, replaceLen, caseSensitive); // + EOS
+
+	if (ptr == NULL)
+	{
+		return -1;
+	}
+
+	set_amxstring(amx, params[1], ptr, maxlength);
+
+	return ptr - text;
+}
+
 static cell AMX_NATIVE_CALL contain(AMX *amx, cell *params) /* 2 param */
 {
 	register cell *a = get_amxaddr(amx, params[2]);
@@ -854,8 +941,8 @@ static cell AMX_NATIVE_CALL amx_strtok(AMX *amx, cell *params)

 	right[right_pos] = 0;
 	left[left_pos] = 0;
-	set_amxstring(amx, params[2], left, leftMax);
-	set_amxstring(amx, params[4], right, rightMax);
+	set_amxstring_utf8(amx, params[2], left, strlen(left), leftMax + 1); // +EOS
+	set_amxstring_utf8(amx, params[4], right, strlen(right), rightMax + 1); // +EOS
 	delete [] left;
 	delete [] right;
 	
@@ -928,8 +1015,9 @@ static cell AMX_NATIVE_CALL amx_strtok2(AMX *amx, cell *params)

 	right[right_pos] = 0;
 	left[left_pos] = 0;
-	set_amxstring(amx, params[2], left, left_max);
-	set_amxstring(amx, params[4], right, right_max);
+
+	set_amxstring_utf8(amx, params[2], left, strlen(left), left_max + 1); // + EOS
+	set_amxstring_utf8(amx, params[4], right, strlen(right), right_max + 1); // + EOS

 	delete [] left;
 	delete [] right;
@@ -1029,7 +1117,7 @@ do_copy:
 				                    : end - beg
 				                   )
 				                 : 0;
-				set_amxstring(amx, params[2], start, copylen);
+				set_amxstring_utf8(amx, params[2], start, strlen(start), copylen + 1); // + EOS

 				end = (len-i+1 > (size_t)RightMax) ? (size_t)RightMax : len-i+1;
 				if (end)
@@ -1045,13 +1133,50 @@ do_copy:
 	}

 	//if we got here, there was nothing to break
-	set_amxstring(amx, params[2], &(string[beg]), LeftMax);
+	set_amxstring_utf8(amx, params[2], &(string[beg]), strlen(&(string[beg])), LeftMax + 1); // + EOS
 	if (RightMax)
 		*right = '\0';

 	return 1;
 }

+static cell AMX_NATIVE_CALL split_string(AMX *amx, cell *params)
+{
+	int textLen, splitLen;
+	char *text = get_amxstring(amx, params[1], 0, textLen);
+	const char *split = get_amxstring(amx, params[2], 1, splitLen);
+
+	if (splitLen > textLen)
+	{
+		return -1;
+	}
+
+	int maxLen = params[4];
+
+	/**
+	* Note that it's <= ... you could also just add 1,
+	* but this is a bit nicer
+	*/
+	for (int i = 0; i <= textLen - splitLen; i++)
+	{
+		if (strncmp(&text[i], split, splitLen) == 0)
+		{
+			/* Split hereeeee */
+			if (i >= maxLen + 1) // + null terminator
+			{
+				set_amxstring_utf8(amx, params[3], text, textLen, maxLen + 1); // + null terminator
+			}
+			else
+			{
+				set_amxstring_utf8(amx, params[3], text, textLen, i + 1);
+			}
+			return i + splitLen;
+		}
+	}
+
+	return -1;
+}
+
 static cell AMX_NATIVE_CALL format_args(AMX *amx, cell *params)
 {
 	int len;
@@ -1065,29 +1190,102 @@ static cell AMX_NATIVE_CALL format_args(AMX *amx, cell *params)

 	char* string = format_arguments(amx, pos, len); // indexed from 0
 	
-	return set_amxstring(amx, params[1], string, params[2]);
+	return set_amxstring_utf8(amx, params[1], string, len, params[2] + 1); // + EOS
 }

 static cell AMX_NATIVE_CALL is_digit(AMX *amx, cell *params)
 {
-	return isdigit(params[1]);
+	char chr = params[1];
+
+	if (UTIL_GetUTF8CharBytes(&chr) != 1)
+	{
+		return 0;
+	}
+
+	return isdigit(chr);
 }

 static cell AMX_NATIVE_CALL is_alnum(AMX *amx, cell *params)
 {
-	return isalnum(params[1]);
+	char chr = params[1];
+
+	if (UTIL_GetUTF8CharBytes(&chr) != 1)
+	{
+		return 0;
+	}
+
+	return isalnum(chr);
 }

 static cell AMX_NATIVE_CALL is_space(AMX *amx, cell *params)
 {
-	return isspace(params[1]);
+	char chr = params[1];
+
+	if (UTIL_GetUTF8CharBytes(&chr) != 1)
+	{
+		return 0;
+	}
+
+	return isspace(chr);
 }

 static cell AMX_NATIVE_CALL is_alpha(AMX *amx, cell *params)
 {
-	return isalpha(params[1]);
+	char chr = params[1];
+
+	if (UTIL_GetUTF8CharBytes(&chr) != 1)
+	{
+		return 0;
+	}
+
+	return isalpha(chr);
 }

+static cell AMX_NATIVE_CALL is_char_upper(AMX *amx, cell *params)
+{
+	char chr = params[1];
+
+	if (UTIL_GetUTF8CharBytes(&chr) != 1)
+	{
+		return 0;
+	}
+
+	return isupper(chr);
+}
+
+static cell AMX_NATIVE_CALL is_char_lower(AMX *amx, cell *params)
+{
+	char chr = params[1];
+
+	if (UTIL_GetUTF8CharBytes(&chr) != 1)
+	{
+		return 0;
+	}
+
+	return islower(chr);
+}
+
+static cell AMX_NATIVE_CALL is_char_mb(AMX *amx, cell *params)
+{
+	char chr = params[1];
+
+	unsigned int bytes = UTIL_GetUTF8CharBytes(&chr);
+	if (bytes == 1)
+	{
+		return 0;
+	}
+
+	return bytes;
+}
+
+static cell AMX_NATIVE_CALL get_char_bytes(AMX *amx, cell *params)
+{
+	int len;
+	char *str = get_amxstring(amx, params[1], 0, len);
+
+	return UTIL_GetUTF8CharBytes(str);
+};
+
 static cell AMX_NATIVE_CALL amx_ucfirst(AMX *amx, cell *params)
 {
 	cell *str = get_amxaddr(amx, params[1]);
@@ -1163,6 +1361,18 @@ static cell AMX_NATIVE_CALL n_strcmp(AMX *amx, cell *params)
 		return strcmp(str1, str2);
 }

+static cell AMX_NATIVE_CALL n_strncmp(AMX *amx, cell *params)
+{
+	int len;
+	char *str1 = get_amxstring(amx, params[1], 0, len);
+	char *str2 = get_amxstring(amx, params[2], 1, len);
+
+	if (params[4])
+		return strncmp(str1, str2, (size_t)params[3]);
+	else
+		return strncasecmp(str1, str2, (size_t)params[3]);
+}
+
 static cell AMX_NATIVE_CALL n_strfind(AMX *amx, cell *params)
 {
 	int len;
@@ -1274,13 +1484,20 @@ AMX_NATIVE_INFO string_Natives[] =
 	{"isalnum",			is_alnum},
 	{"isspace",			is_space},
 	{"isalpha",			is_alpha},
+	{"is_char_upper",	is_char_upper},
+	{"is_char_lower",	is_char_lower},
+	{"is_char_mb",		is_char_mb},
+	{"get_char_bytes",	get_char_bytes},
 	{"num_to_str",		numtostr},
 	{"numtostr",		numtostr},
 	{"parse",			parse},
 	{"replace",			replace},
+	{"replace_string",	replace_string},
+	{"replace_stringex",replace_stringex},
 	{"setc",			setc},
 	{"strbreak",		strbreak},
 	{"argparse",		argparse},
+	{"split_string",	split_string},
 	{"strtolower",		strtolower},
 	{"strtoupper",		strtoupper},
 	{"str_to_num",		strtonum},
@@ -1295,6 +1512,7 @@ AMX_NATIVE_INFO string_Natives[] =
 	{"strcat",			n_strcat},
 	{"strfind",			n_strfind},
 	{"strcmp",			n_strcmp},
+	{"strncmp",			n_strncmp},
 	{"str_to_float",	str_to_float},
 	{"float_to_str",	float_to_str},
 	{"vformat",			vformat},
--- a/amxmodx/trie_natives.h
+++ b/amxmodx/trie_natives.h
@@ -112,7 +112,16 @@ public:
 	{
 		if (m_type == TRIE_DATA_STRING && max >= 0)
 		{
-			memcpy(out, m_data, (max > m_cellcount ? m_cellcount : max) * sizeof(cell));
+			int len = (max > m_cellcount) ? m_cellcount : max;
+			memcpy(out, m_data, len * sizeof(cell));
+			
+			/* Don't truncate a multi-byte character */
+			if (m_data[len - 1] & 1 << 7)
+			{
+				len -= UTIL_CheckValidChar(m_data + len - 1);
+				out[len] = '\0';
+			}
+
 			return true;
 		}
 		return false;
--- a/amxmodx/util.cpp
+++ b/amxmodx/util.cpp
@@ -397,3 +397,275 @@ void UTIL_FakeClientCommand(edict_t *pEdict, const char *cmd, const char *arg1,
 	g_fakecmd.fake = false;
 }

+unsigned int UTIL_GetUTF8CharBytes(const char *stream)
+{
+	unsigned char c = *(unsigned char *)stream;
+	if (c & (1 << 7))
+	{
+		if (c & (1 << 5))
+		{
+			if (c & (1 << 4))
+			{
+				return 4;
+			}
+			return 3;
+		}
+		return 2;
+	}
+	return 1;
+}
+
+template int UTIL_CheckValidChar<char>(char *);
+template int UTIL_CheckValidChar<cell>(cell *);
+
+template <typename D>
+int UTIL_CheckValidChar(D *c)
+{
+	int count;
+	int bytecount = 0;
+
+	for (count = 1; (*c & 0xC0) == 0x80; count++)
+	{
+		c--;
+	}
+
+	switch (*c & 0xF0)
+	{
+		case 0xC0:
+		case 0xD0:
+		{
+			bytecount = 2;
+			break;
+		}
+		case 0xE0:
+		{
+			bytecount = 3;
+			break;
+		}
+		case 0xF0:
+		{
+			bytecount = 4;
+			break;
+		}
+	}
+
+	if (bytecount != count)
+	{
+		return count;
+	}
+
+	return 0;
+}
+
+unsigned int UTIL_ReplaceAll(char *subject, size_t maxlength, const char *search, const char *replace, bool caseSensitive)
+{
+	size_t searchLen = strlen(search);
+	size_t replaceLen = strlen(replace);
+
+	char *ptr = subject;
+	unsigned int total = 0;
+	while ((ptr = UTIL_ReplaceEx(ptr, maxlength, search, searchLen, replace, replaceLen, caseSensitive)) != NULL)
+	{
+		total++;
+		if (*ptr == '\0')
+		{
+			break;
+		}
+	}
+
+	return total;
+}
+
+unsigned int strncopy(char *dest, const char *src, size_t count)
+{
+	if (!count)
+	{
+		return 0;
+	}
+
+	char *start = dest;
+	while ((*src) && (--count))
+	{
+		*dest++ = *src++;
+	}
+	*dest = '\0';
+
+	return (dest - start);
+}
+
+/**
+* NOTE: Do not edit this for the love of god unless you have
+* read the test cases and understand the code behind each one.
+* While I don't guarantee there aren't mistakes, I do guarantee
+* that plugins will end up relying on tiny idiosyncrasies of this
+* function, just like they did with AMX Mod X.
+*
+* There are explicitly more cases than the AMX Mod X version because
+* we're not doing a blind copy.  Each case is specifically optimized
+* for what needs to be done.  Even better, we don't have to error on
+* bad buffer sizes.  Instead, this function will smartly cut off the
+* string in a way that pushes old data out.
+*/
+char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive)
+{
+	char *ptr = subject;
+	size_t browsed = 0;
+	size_t textLen = strlen(subject);
+
+	/* It's not possible to search or replace */
+	if (searchLen > textLen)
+	{
+		return NULL;
+	}
+
+	/* Handle the case of one byte replacement.
+	* It's only valid in one case.
+	*/
+	if (maxLen == 1)
+	{
+		/* If the search matches and the replace length is 0,
+		* we can just terminate the string and be done.
+		*/
+		if ((caseSensitive ? strcmp(subject, search) : strcasecmp(subject, search)) == 0 && replaceLen == 0)
+		{
+			*subject = '\0';
+			return subject;
+		}
+		else
+		{
+			return NULL;
+		}
+	}
+
+	/* Subtract one off the maxlength so we can include the null terminator */
+	maxLen--;
+
+	while (*ptr != '\0' && (browsed <= textLen - searchLen))
+	{
+		/* See if we get a comparison */
+		if ((caseSensitive ? strncmp(ptr, search, searchLen) : strncasecmp(ptr, search, searchLen)) == 0)
+		{
+			if (replaceLen > searchLen)
+			{
+				/* First, see if we have enough space to do this operation */
+				if (maxLen - textLen < replaceLen - searchLen)
+				{
+					/* First, see if the replacement length goes out of bounds. */
+					if (browsed + replaceLen >= maxLen)
+					{
+						/* EXAMPLE CASE:
+						* Subject: AABBBCCC
+						* Buffer : 12 bytes
+						* Search : BBB
+						* Replace: DDDDDDDDDD
+						* OUTPUT : AADDDDDDDDD
+						* POSITION:           ^
+						*/
+						/* If it does, we'll just bound the length and do a strcpy. */
+						replaceLen = maxLen - browsed;
+
+						/* Note, we add one to the final result for the null terminator */
+						strncopy(ptr, replace, replaceLen + 1);
+
+						/* Don't truncate a multi-byte character */
+						if (*(ptr + replaceLen - 1) & 1 << 7)
+						{
+							replaceLen -= UTIL_CheckValidChar(ptr + replaceLen - 1);
+							*(ptr + replaceLen) = '\0';
+						}
+					}
+					else
+					{
+						/* EXAMPLE CASE:
+						* Subject: AABBBCCC
+						* Buffer : 12 bytes
+						* Search : BBB
+						* Replace: DDDDDDD
+						* OUTPUT : AADDDDDDDCC
+						* POSITION:         ^
+						*/
+						/* We're going to have some bytes left over... */
+						size_t origBytesToCopy = (textLen - (browsed + searchLen)) + 1;
+						size_t realBytesToCopy = (maxLen - (browsed + replaceLen)) + 1;
+						char *moveFrom = ptr + searchLen + (origBytesToCopy - realBytesToCopy);
+						char *moveTo = ptr + replaceLen;
+
+						/* First, move our old data out of the way. */
+						memmove(moveTo, moveFrom, realBytesToCopy);
+
+						/* Now, do our replacement. */
+						memcpy(ptr, replace, replaceLen);
+					}
+				}
+				else
+				{
+					/* EXAMPLE CASE:
+					* Subject: AABBBCCC
+					* Buffer : 12 bytes
+					* Search : BBB
+					* Replace: DDDD
+					* OUTPUT : AADDDDCCC
+					* POSITION:      ^
+					*/
+					/* Yes, we have enough space.  Do a normal move operation. */
+					char *moveFrom = ptr + searchLen;
+					char *moveTo = ptr + replaceLen;
+
+					/* First move our old data out of the way. */
+					size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
+					memmove(moveTo, moveFrom, bytesToCopy);
+
+					/* Now do our replacement. */
+					memcpy(ptr, replace, replaceLen);
+				}
+			}
+			else if (replaceLen < searchLen)
+			{
+				/* EXAMPLE CASE:
+				* Subject: AABBBCCC
+				* Buffer : 12 bytes
+				* Search : BBB
+				* Replace: D
+				* OUTPUT : AADCCC
+				* POSITION:   ^
+				*/
+				/* If the replacement does not grow the string length, we do not
+				* need to do any fancy checking at all.  Yay!
+				*/
+				char *moveFrom = ptr + searchLen;		/* Start after the search pointer */
+				char *moveTo = ptr + replaceLen;		/* Copy to where the replacement ends */
+
+				/* Copy our replacement in, if any */
+				if (replaceLen)
+				{
+					memcpy(ptr, replace, replaceLen);
+				}
+
+				/* Figure out how many bytes to move down, including null terminator */
+				size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
+
+				/* Move the rest of the string down */
+				memmove(moveTo, moveFrom, bytesToCopy);
+			}
+			else
+			{
+				/* EXAMPLE CASE:
+				* Subject: AABBBCCC
+				* Buffer : 12 bytes
+				* Search : BBB
+				* Replace: DDD
+				* OUTPUT : AADDDCCC
+				* POSITION:     ^
+				*/
+				/* We don't have to move anything around, just do a straight copy */
+				memcpy(ptr, replace, replaceLen);
+			}
+
+			return ptr + replaceLen;
+		}
+		ptr++;
+		browsed++;
+	}
+
+	return NULL;
+}
--- a/plugins/include/string.inc
+++ b/plugins/include/string.inc
@@ -13,69 +13,226 @@

 #define charsmax(%1) (sizeof(%1)-1)

-/* Checks if source contains string. On success function
-* returns position in source, on failure returns -1. */
-native contain(const source[],const string[]);
+/**
+ * @global 	Unless otherwise noted, all string functions which take in a 
+ *			writable buffer and maximum length should NOT have the null terminator INCLUDED
+ * 			in the length.  This means that this is valid: 
+ * 			copy(string, charsmax(string), ...)
+ */
 
-/* Checks if source contains string with case ignoring. On success function
-* returns position in source, on failure returns -1. */
+/**
+ * Calculates the length of a string.
+ *
+ * @param string		String to check.
+ * @return				Number of valid character bytes in the string.
+ */
+native strlen(const string[]);
+
+/**
+ * Tests whether a string is found inside another string.
+ *
+ * @param source		String to search in.
+ * @param string		Substring to find inside the original string.
+ *
+ * @return				-1 on failure (no match found). Any other value
+ *						indicates a position in the string where the match starts.
+ */
+native contain(const source[], const string[]);
+
+/**
+ * Tests whether a string is found inside another string with case ignoring.
+ *
+ * @param source		String to search in.
+ * @param string		Substring to find inside the original string.
+ *
+ * @return				-1 on failure (no match found). Any other value
+ *						indicates a position in the string where the match starts.
+ */
 native containi(const source[],const string[]);

-/* Replaces given string to another in given text. */
+/**
+ * Given a string, replaces the first occurrence of a search string with a 
+ * replacement string.
+ *
+ * @param text			String to perform search and replacements on.
+ * @param len			Maximum length of the string buffer.
+ * @param what			String to search for.
+ * @param with			String to replace the search string with.
+ *
+ * @return				The new string length after replacement, or 0 if no replacements were made.
+ */
 native replace(text[], len, const what[], const with[]);

-/* Adds one string to another. Last parameter different from 0, specifies
-* how many chars we want to add. Function returns number of all merged chars. */
+/**
+ * Given a string, replaces all occurrences of a search string with a 
+ * replacement string.
+ *
+ * @note Similar to replace_all() stock, but implemented as native and 
+ *       with different algorithm. This native doesn't error on bad 
+ *       buffer size and will smartly cut off the string in a way 
+ *       that pushes old data out.
+ *	
+ * @note Only available in 1.8.3 and above.
+ *
+ * @param text			String to perform search and replacements on.
+ * @param maxlength		Maximum length of the string buffer.
+ * @param search		String to search for.
+ * @param replace		String to replace the search string with.
+ * @param caseSensitive	If true (default), search is case sensitive.
+ *
+ * @return				Number of replacements that were performed.
+ */
+native replace_string(text[], maxlength, const search[], const replace[], bool:caseSensitive=true);
+
+/**
+ * Given a string, replaces the first occurrence of a search string with a 
+ * replacement string.
+ *
+ * @note Similar to replace() native, but implemented with more options and 
+ *       with different algorithm. This native doesn't error on bad 
+ *       buffer size and will smartly cut off the string in a way 
+ *       that pushes old data out.
+ *	
+ * @note Only available in 1.8.3 and above.
+ *
+ * @param text			String to perform search and replacements on.
+ * @param maxlength		Maximum length of the string buffer.
+ * @param search		String to search for.
+ * @param replace		String to replace the search string with.
+ * @param searchLen		If higher than -1, its value will be used instead of
+ *						a strlen() call on the search parameter.
+ * @param replaceLen	If higher than -1, its value will be used instead of
+ *						a strlen() call on the replace parameter.
+ * @param caseSensitive	If true (default), search is case sensitive.
+ *
+ * @return				Index into the buffer (relative to the start) from where
+ *						the last replacement ended, or -1 if no replacements were
+ *						made.
+ */
+native replace_stringex(text[], maxlength, const search[], const replace[], searchLen=-1, replaceLen=-1, bool:caseSensitive=true);
+
+/**
+ * Concatenates one string onto another.
+ *
+ * @param dest			String to append to.
+ * @param len			Maximum length of entire buffer.
+ * @param src			Source string to concatenate.
+ * @param max			Number of characters to add.
+ *
+ * @return				Number of of all merged characters.
+ */
 native add(dest[],len,const src[],max=0);

-/* Fills string with given format and parameters.
- * Function returns number of copied chars.
- * Example: format(dest,"Hello %s. You are %d years old","Tom",17).
- * If any of your input buffers overlap with the destination buffer,
- *  format() falls back to a "copy-back" version as of 1.65.  This is 
- *  slower, so you should using a source string that is the same as
- *  the destination.
+/**
+ * Formats a string according to the AMX Mod X format rules (see documentation).
+ *
+ * @note Example: format(dest, "Hello %s. You are %d years old", "Tom", 17).
+ *       If any of your input buffers overlap with the destination buffer,
+ *       format() falls back to a "copy-back" version as of 1.65.  This is 
+ *       slower, so you should using a source string that is the same as
+ *       the destination.
+ *
+ * @param output		Destination string buffer.
+ * @param len			Maximum length of output string buffer.
+ * @param format		Formatting rules.
+ * @param ...			Variable number of format parameters.
+ *
+ * @return				Number of cells written.
 */
-native format(output[] ,len ,const format[] , any:...);
+native format(output[], len, const format[], any:...);

-/* Same as format(), except does not perform a "copy back" check.
- * This means formatex() is faster, but DOES NOT ALLOW this type
- * of call:
- *  formatex(buffer, len, "%s", buffer)
- *  formatex(buffer, len, buffer, buffer)
- *  formatex(buffer, len, "%s", buffer[5])
- * This is because the output is directly stored into "buffer", 
- *  rather than copied back at the end.
+/**
+ * Formats a string according to the AMX Mod X format rules (see documentation).
+ *
+ * @note Same as format(), except does not perform a "copy back" check.
+ *       This means formatex() is faster, but DOES NOT ALLOW this type
+ *       of call:
+ *         formatex(buffer, len, "%s", buffer)
+ *         formatex(buffer, len, buffer, buffer)
+ *         formatex(buffer, len, "%s", buffer[5])
+ *       This is because the output is directly stored into "buffer", 
+ *       rather than copied back at the end.
+ *
+ * @param output		Destination string buffer.
+ * @param len			Maximum length of output string buffer.
+ * @param format		Formatting rules.
+ * @param ...			Variable number of format parameters.
+ *
+ * @return				Number of cells written.
 */
-native formatex(output[] ,len ,const format[] , any:...);
+native formatex(output[], len, const format[], any:...);

-/* Replacement for format_args.  Much faster and %L compatible.
- * This works exactly like vsnprintf() from C.
- * You must pass in the output buffer and its size,
- *  the string to format, and the number of the FIRST variable
- *  argument parameter.  For example, for:
- *  function (a, b, c, ...)
- *  You would pass 4 (a is 1, b is 2, c is 3, et cetera).
- * There is no vformatex().
+/**
+ * Formats a string according to the AMX Mod X format rules (see documentation).
+ *
+ * @note This is the same as format(), except it grabs parameters from a 
+ *       parent parameter stack, rather than a local.  This is useful for 
+ *       implementing your own variable argument functions.
+ *
+ * @note Replacement for format_args.  Much faster and %L compatible.
+ *       This works exactly like vsnprintf() from C.
+ *       You must pass in the output buffer and its size,
+ *        the string to format, and the number of the FIRST variable
+ *        argument parameter.  For example, for:
+ *        function (a, b, c, ...)
+ *        You would pass 4 (a is 1, b is 2, c is 3, et cetera).
+ *       There is no vformatex().
+ *
+ * @param buffer		Destination string buffer.
+ * @param len			Maximum length of output string buffer.
+ * @param fmt			Formatting rules.
+ * @param vararg		Argument number which contains the '...' symbol.
+ *						Note: Arguments start at 1.
+ * @return 				Number of bytes written.
 */
 native vformat(buffer[], len, const fmt[], vararg);

-/*
- * Same as vformat(), except works in normal style dynamic natives.
- * Instead of passing the format arg string, you can only pass the 
- *  actual format argument number itself.
- * If you pass 0, it will read the format string from an optional 
- *  fifth parameter.
+ /**
+ * Formats a string according to the AMX Mod X format rules (see documentation).
+ *
+ * @note Same as vformat(), except works in normal style dynamic natives.
+ *       Instead of passing the format arg string, you can only pass the 
+ *       actual format argument number itself.
+ *       If you pass 0, it will read the format string from an optional 
+ *       fifth parameter.
+ *
+ * @param buffer		Destination string buffer.
+ * @param len			Maximum length of output string buffer.
+ * @param fmt_arg		Argument number which contains the format.
+ * @param vararg		Argument number which contains the '...' symbol.
+ *						Note: Arguments start at 1.
+ * @return 				Number of bytes written.
 */
 native vdformat(buffer[], len, fmt_arg, vararg, ...);

-/* Gets parameters from function as formated string. */
-native format_args(output[] ,len ,pos = 0);
+/**
+ * Gets parameters from function as formated string. 
+ *
+ * @param output		Destination string buffer.
+ * @param len			Maximum length of output string buffer.
+ * @param pos			Argument number which contains the '...' symbol.
+ *
+ * @return				Number of bytes written.
+ */
+native format_args(output[], len, pos = 0);

-/* Converts number to string. */
+/**
+ * Converts an integer to a string.
+ *
+ * @param num			Integer to convert.
+ * @param string		Buffer to store string in.
+ * @param len			Maximum length of string buffer.
+ *
+ * @return				Number of cells written to buffer.
+ */
 native num_to_str(num,string[],len);

-/* Returns converted string to number. */
+/**
+ * Converts a string to an integer.
+ *
+ * @param string		String to convert.
+ * @return				Integer conversion of string, or 0 on failure.
+ */
 native str_to_num(const string[]);

 /**
@@ -144,47 +301,122 @@ native strtol(const string[], &endPos = 0, base = 0);
 */
 native Float:strtof(const string[], &endPos = 0);

-/* Converts float to string. */
+/**
+ * Converts a floating point number to a string.
+ *
+ * @param fl			Floating point number to convert.
+ * @param string		Buffer to store string in.
+ * @param len			Maximum length of string buffer.
+ *
+ * @return				Number of cells written to buffer.
+ */
 native float_to_str(Float:fl, string[], len);

-/* Parses a float. */
+/** 
+ * Converts a string to a floating point number.
+ *
+ * @param string		String to convert to a foat.
+ * @return				Floating point result, or 0.0 on error.
+ */
 native Float:str_to_float(const string[]);

-/* Checks if two strings equal. If len var is set
-* then there are only c chars comapred. */
+/**
+ * Returns whether two strings are equal.
+ *
+ * @param a				First string (left).
+ * @param b				Second string (right).
+ * @param c				Number of characters to compare.
+ *
+ * @return				True if equal, false otherwise.
+ */
 native equal(const a[],const b[],c=0);

-/* Checks if two strings equal with case ignoring. 
-* If len var is set then there are only c chars comapred. */
+/**
+ * Returns whether two strings are equal with case ignoring.
+ *
+ * @param a				First string (left).
+ * @param b				Second string (right).
+ * @param c				Number of characters to compare.
+ *
+ * @return				True if equal, false otherwise.
+ */
 native equali(const a[],const b[],c=0);

-/* Copies one string to another. By len var
-*  you may specify max. number of chars to copy. */
+/**
+ * Copies one string to another string.
+ *
+ * @note If the destination buffer is too small to hold the source string, the 
+ *       destination will be truncated.
+ *
+ * @param dest			Destination string buffer to copy to.
+ * @param len			Destination buffer length.
+ * @param src			Source string buffer to copy from.
+ *
+ * @return				Number of cells written.
+ */
 native copy(dest[],len,const src[]);

-/* Copies one string to another until char ch is found. 
-*  By len var you may specify max. number of chars to copy. */
+/**
+ * Copies one string to another string until ch is found.
+ *
+ * @param dest			Destination string buffer to copy to.
+ * @param len			Destination buffer length.
+ * @param src			Source string buffer to copy from.
+ * @param ch			Character to search for.
+ *
+ * @return				Number of cells written.
+ */
 native copyc(dest[],len,const src[],ch);

-/* Sets string with given character. */
+/**
+ * Sets string with given character.
+ *
+ * @param src			Destination string buffer to copy to.
+ * @param len			Destination buffer length.
+ * @param ch			Character to set string.
+ *
+ * @noreturn
+ */
 native setc(src[],len,ch);

-/* Gets parameters from text.
-* Example: to split text: "^"This is^" the best year",
-* call function like this: parse(text,arg1,len1,arg2,len2,arg3,len3,arg4,len4)
-* and you will get: "This is", "the", "best", "year"
-* Function returns number of parsed parameters. */
+/**
+ * Gets parameters from text.
+ *
+ * @note Example: to split text: "^"This is^" the best year",
+ *       call function like this: parse(text,arg1,len1,arg2,len2,arg3,len3,arg4,len4)
+ *       and you will get: "This is", "the", "best", "year"
+ *       Function returns number of parsed parameters.
+ *
+ * @param text			String to parse.
+ * @param ...			Variable number of format parameters.
+ *
+ * @return				Number of parsed parameters.
+ */
 native parse(const text[], ... );

-/* Breaks a string into two halves, by token.
-   See strbreak() for doing this with parameters.
-   Example:
-   str1[] = This *is*some text
-   strtok(str1, left, 24, right, 24, '*')
-   left will be "This "
-   Right will be "is*some text"
-   If you use trimSpaces, all spaces are trimmed from Left.
-*/
+/**
+ * Breaks a string in two by token.
+ *
+ * @note Trimming spaces is buggy. Consider strtok2 instead.
+ *
+ * @note See argbreak() for doing this with parameters.
+ *       Example:
+ *        str1[] = This *is*some text
+ *        strtok(str1, left, 24, right, 24, '*')
+ *        left will be "This "
+ *        Right will be "is*some text"
+ *        If you use trimSpaces, all spaces are trimmed from Left.
+ *
+ * @param text			String to tokenize
+ * @param Left			Buffer to store left half
+ * @param leftLen		Size of left buffer
+ * @param Right			Buffer to store right half
+ * @param rightLen		Size of right buffer
+ * @param token			Token to split by
+ * @param trimSpaces	Whether spaces are trimmed.
+ *
+ * @noreturn
+ */
 native strtok(const text[], Left[], leftLen, Right[], rightLen, token=' ', trimSpaces=0);
   
 /**
@@ -231,9 +463,9 @@ native strtok(const text[], Left[], leftLen, Right[], rightLen, token=' ', trimS
 #define TRIM_FULL TRIM_OUTER|TRIM_INNER

 /**
- * Breaks a string in two by token
+ * Breaks a string in two by token.
 *
- * Only available in 1.8.3 and above
+ * @note Only available in 1.8.3 and above.
 *
 * @param text			String to tokenize
 * @param left			Buffer to store left half
@@ -248,40 +480,118 @@ native strtok(const text[], Left[], leftLen, Right[], rightLen, token=' ', trimS
 */
 native strtok2(const text[], left[], const llen, right[], const rlen, const token = ' ', const trim = 0);

-/* Strips spaces from the beginning and end of a string. */
+/**
+ * Removes whitespace characters from the beginning and end of a string.
+ *
+ * @param text			The string to trim.
+ * @return				Number of bytes written.
+ */
 native trim(text[]);

-/* Converts all chars in string to lower case. */
+/**
+ * Converts all chars in string to lower case.
+ *
+ * @param string		The string to convert.
+ * @return				Number of bytes written.
+ */
 native strtolower(string[]);

-/* Converts all chars in string to upper case. */
+/**
+ * Converts all chars in string to upper case.
+ *
+ * @param string		The string to convert.
+ * @return				Number of bytes written.
+ */
 native strtoupper(string[]);

-/* Make a string's first character uppercase */
+/**
+ * Make a string's first character uppercase.
+ *
+ * @param string		The string to convert.
+ * @return				1 on success, otherwise 0.
+ */
 native ucfirst(string[]);

-/* Returns true when value is digit. */
+/**
+ * Returns whether a character is numeric.
+ *
+ * @note Multi-byte characters will always return false.
+ *
+ * @param ch			Character to test.
+ * @return				True if character is numeric, otherwise false.
+ */
 native isdigit(ch);

-/* Returns true when value is letter. */
+/**
+ * Returns whether a character is an ASCII alphabet character.
+ *
+ * @note Multi-byte characters will always return false.
+ *
+ * @param ch			Character to test.
+ * @return				True if character is alphabetical, otherwise false.
+ */
 native isalpha(ch);

-/* Returns true when value is space. */
+/**
+ * Returns whether a character is whitespace.
+ *
+ * @note Multi-byte characters will always return false.
+ *
+ * @param ch			Character to test.
+ * @return				True if character is whitespace, otherwise false.
+ */
 native isspace(ch);

-/* Returns true when value is letter or digit. */
+/**
+ * Returns whether a character is numeric or an ASCII alphabet character.
+ *
+ * @note Multi-byte characters will always return false.
+ *
+ * @param ch			Character to test.
+ * @return				True if character is numeric, otherwise false.
+ */
 native isalnum(ch);

-/* Concatenates a string.  Maxlength is the total buffer of the destination. */
-native strcat(dest[], const source[], maxlength);
+/**
+ * Returns if a character is multi-byte or not.
+ *
+ * @note Only available in 1.8.3 and above.
+ *
+ * @param ch			Character to test.
+ * @return				0 for a normal 7-bit ASCII character,
+ *						otherwise number of bytes in multi-byte character.
+ */
+native is_char_mb(ch);

-/* Finds a string in another string.  Returns -1 if not found. */
-native strfind(const string[], const sub[], ignorecase=0, pos=0);
+/**
+ * Returns whether an alphabetic character is uppercase.
+ *
+ * @note Only available in 1.8.3 and above.
+ * @note Multi-byte characters will always return false.
+ *
+ * @param ch			Character to test.
+ * @return				True if character is uppercase, otherwise false.
+ */
+native bool:is_char_upper(ch);

-/* Compares two strings with the C function strcmp().  Returns 0 on equal. */
-native strcmp(const string1[], const string2[], ignorecase=0);
+/**
+ * Returns whether an alphabetic character is lowercase.
+ *
+ * @note Only available in 1.8.3 and above.
+ * @note Multi-byte characters will always return false.
+ *
+ * @param ch			Character to test.
+ * @return				True if character is lowercase, otherwise false.
+ */
+native bool:is_char_lower(ch);

-/* Tests if given string contains only digits. Also, returns false for zero-length strings. */
+/**
+ * Returns whether a given string contains only digits.
+ * This returns false for zero-length strings.
+ *
+ * @param sString		Character to test.
+ * @return				True if string contains only digit, otherwise false.
+ */
 stock bool:is_str_num(const sString[])
 {
 	new i = 0;
@@ -292,31 +602,139 @@ stock bool:is_str_num(const sString[])
 	return sString[i] == 0 && i != 0;
 }

-// Warning: this function is deprecated as it does not work properly. Use
-// argparse() or argbreak().
-native strbreak(const text[], Left[], leftLen, Right[], rightLen);
+/** 
+ * Returns the number of bytes a character is using.  This is
+ * for multi-byte characters (UTF-8).  For normal ASCII characters,
+ * this will return 1.
+ *
+ * @note Only available in 1.8.3 and above.
+ *
+ * @param source		Source input string.
+ * @return				Number of bytes the current character uses.
+ */
+native get_char_bytes(const source[]);
+
+/**
+ * Returns an uppercase character to a lowercase character.
+ *
+ * @note Only available in 1.8.3 and above.
+ *
+ * @param chr			Characer to convert.
+ * @return				Lowercase character on success, 
+ *						no change on failure.
+ */
+stock char_to_upper(chr)
+{
+	if (is_char_lower(chr))
+	{
+		return (chr & ~(1<<5));
+	}
+	return chr;
+}
+
+/**
+ * Returns a lowercase character to an uppercase character.
+ *
+ * @note Only available in 1.8.3 and above.
+ *
+ * @param chr			Characer to convert.
+ * @return				Uppercase character on success, 
+ *						no change on failure.
+ */
+stock char_to_lower(chr)
+{
+	if (is_char_upper(chr))
+	{
+		return (chr | (1<<5));
+	}
+	return chr;
+}
+
+/**
+ * Concatenates one string onto another.
+ *
+ * @param dest			String to append to.
+ * @param source		Source string to concatenate.
+ * @param maxlength		Maximum length of entire buffer.
+ * @return				Number of bytes written.
+ */
+native strcat(dest[], const source[], maxlength);
+
+/**
+ * Tests whether a string is found inside another string.
+ *
+ * @param string		String to search in.
+ * @param sub			Substring to find inside the original string.
+ * @param ignorecase	If true, search is case insensitive.
+ *						If false (default), search is case sensitive.
+ * @param pos			
+ * @return				-1 on failure (no match found). Any other value
+ *						indicates a position in the string where the match starts.
+ */
+native strfind(const string[], const sub[], ignorecase=0, pos=0);
+
+/**
+ * Compares two strings lexographically.
+ *
+ * @param string1		First string (left).
+ * @param string2		Second string (right).
+ * @param ignorecase	If true, comparison is case insensitive.
+ *						If false (default), comparison is case sensitive.
+ * @return				-1 if string1 < string2
+ *						0 if string1 == string2
+ *						1 if string1 > string2
+ */
+native strcmp(const string1[], const string2[], ignorecase=0);
+
+/**
+ * Compares two strings parts lexographically.
+ *
+ * @note Only available in 1.8.3 and above.
+ *
+ * @param string1		First string (left).
+ * @param string2		Second string (right).
+ * @param num			Number of characters to compare.
+ * @param ignorecase	If true, comparison is case insensitive.
+ *						If false (default), comparison is case sensitive.
+ * @return				-1 if string1 < string2
+ *						0 if string1 == string2
+ *						1 if string1 > string2
+ */
+native strncmp(const string1[], const string2[], num, bool:ignorecase=false);
+
+/**
+ * Backwards compatibility stock - use argbreak or argparse.
+ * @deprecated			this function does not work properly.
+ */
+//#pragma deprecated Use argbreak() instead
+stock strbreak(const text[], Left[], leftLen, Right[], rightLen)
+{
+	return argbreak(text, Left, leftLen, Right, rightLen);
+}

 /**
 * Parses an argument string to find the first argument. You can use this to
 * replace strbreak().
 *
- * You can use argparse() to break a string into all of its arguments:
- *   new arg[N], pos;
- *   while (true) {
- *     pos = argparse(string, pos, arg, sizeof(arg) - 1);
- *     if (pos == -1)
- *       break;
- *   }
+ * @note Only available in 1.8.3 and above.
 *
- * All initial whitespace is removed. Remaining characters are read until an
- * argument separator is encountered. A separator is any whitespace not inside
- * a double-quotation pair (i.e. "x b" is one argument). If only one quotation
- * mark appears, argparse() acts as if one existed at the end of the string.
- * Quotation marks are never written back, and do not act as separators. For
- * example, "a""b""c" will return "abc". An empty quote pair ("") will count
- * as an argument containing no characters.
+ * @note You can use argparse() to break a string into all of its arguments:
+ *       new arg[N], pos;
+ *       while (true) {
+ *         pos = argparse(string, pos, arg, sizeof(arg) - 1);
+ *         if (pos == -1)
+ *           break;
+ *       }
 *
- * argparse() will write an empty string to argbuffer if no argument is found.
+ * @note All initial whitespace is removed. Remaining characters are read until an
+ *       argument separator is encountered. A separator is any whitespace not inside
+ *       a double-quotation pair (i.e. "x b" is one argument). If only one quotation
+ *       mark appears, argparse() acts as if one existed at the end of the string.
+ *       Quotation marks are never written back, and do not act as separators. For
+ *       example, "a""b""c" will return "abc". An empty quote pair ("") will count
+ *       as an argument containing no characters.
+ *
+ * @note argparse() will write an empty string to argbuffer if no argument is found.
 *
 * @param text          String to tokenize.
 * @param pos           Position to start parsing from.
@@ -328,7 +746,19 @@ native strbreak(const text[], Left[], leftLen, Right[], rightLen);
 */
 native argparse(const text[], pos, argbuffer[], maxlen);

-/* Emulates strbreak() using argparse(). */
+/**
+ * Emulates strbreak() using argparse().
+ *
+ * @param text			Source input string.
+ * @param left			Buffer to store string left part.
+ * @param leftlen		Maximum length of the string part buffer.
+ * @param right			Buffer to store string right part.
+ * @param rightlen		Maximum length of the string part buffer.
+ *
+ * @return				-1 if no match was found; otherwise, an index into source
+ *						marking the first index after the searched text.  The
+ *						index is always relative to the start of the input string.
+ */
 stock argbreak(const text[], left[], leftlen, right[], rightlen)
 {
 	new pos = argparse(text, 0, left, leftlen);
@@ -344,11 +774,34 @@ stock argbreak(const text[], left[], leftlen, right[], rightlen)
 	return pos;
 }

-/* It is basically strbreak but you have a delimiter that is more than one character in length.
-   You pass the Input string, the Left output, the max length of the left output,
-   the right output , the max right length, and then the delimiter string.
-   By Suicid3
-*/
+/**
+ * Returns text in a string up until a certain character sequence is reached.
+ *
+ * @note Only available in 1.8.3 and above.
+ *
+ * @param source		Source input string.
+ * @param split			A string which specifies a search point to break at.
+ * @param part			Buffer to store string part.
+ * @param partLen		Maximum length of the string part buffer.
+ *
+ * @return				-1 if no match was found; otherwise, an index into source
+ *						marking the first index after the searched text.  The
+ *						index is always relative to the start of the input string.
+ */
+native split_string(const source[], const split[], part[], partLen);
+
+ /**
+ * It is basically strbreak but you have a delimiter that is more than one character in length. By Suicid3.
+ *
+ * @param szInput		Source input string.
+ * @param szLeft		Buffer to store left string part.
+ * @param pL_Max		Maximum length of the string part buffer.
+ * @param szRight		Buffer to store right string part.
+ * @param pR_Max		Maximum length of the string part buffer.
+ * @param szDelim		A string which specifies a search point to break at.
+ * 
+ * @noreturn
+ */
 stock split(const szInput[], szLeft[], pL_Max, szRight[], pR_Max, const szDelim[])
 {
 	new iEnd = contain(szInput, szDelim);
@@ -373,7 +826,15 @@ stock split(const szInput[], szLeft[], pL_Max, szRight[], pR_Max, const szDelim[
 	return;
 }

-/* Removes a path from szFilePath leaving the name of the file in szFile for a pMax length. */
+ /**
+ * Removes a path from szFilePath leaving the name of the file in szFile for a pMax length.
+ *
+ * @param szFilePath	String to perform search and replacements on.
+ * @param szFile		Buffer to store file name.
+ * @param pMax			Maximum length of the string buffer.
+ * 
+ * @noreturn
+ */
 stock remove_filepath(const szFilePath[], szFile[], pMax)
 {
 	new len = strlen(szFilePath);
@@ -385,9 +846,20 @@ stock remove_filepath(const szFilePath[], szFile[], pMax)
 	return;
 }

-/* Replaces a contained string iteratively.
- * This ensures that no infinite replacements will take place by
- *  intelligently moving to the next string position each iteration.
+ /**
+ * Replaces a contained string iteratively.
+ *
+ * @note Consider using replace_string() instead.
+ *
+ * @note This ensures that no infinite replacements will take place by
+ *       intelligently moving to the next string position each iteration.
+ *
+ * @param string	String to perform search and replacements on.
+ * @param len		Maximum length of the string buffer.
+ * @param what		String to search for.
+ * @param with		String to replace the search string with.
+ 
+ * @return			Number of replacements on success, otherwise 0.
 */
 stock replace_all(string[], len, const what[], const with[])
 {
@@ -435,3 +907,78 @@ stock replace_all(string[], len, const what[], const with[])
 	
 	return total;
 }
+
+/**
+ * Breaks a string into pieces and stores each piece into an array of buffers.
+ *
+ * @param text				The string to split.
+ * @param split				The string to use as a split delimiter.
+ * @param buffers			An array of string buffers (2D array).
+ * @param maxStrings		Number of string buffers (first dimension size).
+ * @param maxStringLength	Maximum length of each string buffer.
+ * @param copyRemainder		False (default) discard excess pieces, true to ignore
+ *							delimiters after last piece.
+ * @return					Number of strings retrieved.
+ */
+stock explode_string(const text[], const split[], buffers[][], maxStrings, maxStringLength, bool:copyRemainder = false)
+{
+	new reloc_idx, idx, total;
+
+	if (maxStrings < 1 || !split[0])
+	{
+		return 0;
+	}
+
+	while ((idx = split_string(text[reloc_idx], split, buffers[total], maxStringLength)) != -1)
+	{
+		reloc_idx += idx;
+		if (++total == maxStrings)
+		{
+			if (copyRemainder)
+			{
+				copy(buffers[total-1], maxStringLength, text[reloc_idx-idx]);
+			}
+			return total;
+		}
+	}
+
+	copy(buffers[total++], maxStringLength, text[reloc_idx]);
+
+	return total;
+}
+
+/**
+ * Joins an array of strings into one string, with a "join" string inserted in
+ * between each given string.  This function complements ExplodeString.
+ *
+ * @param strings		An array of strings.
+ * @param numStrings	Number of strings in the array.
+ * @param join			The join string to insert between each string.
+ * @param buffer		Output buffer to write the joined string to.
+ * @param maxLength		Maximum length of the output buffer.
+ * @return				Number of bytes written to the output buffer.
+ */
+stock implode_strings(const strings[][], numStrings, const join[], buffer[], maxLength)
+{
+	new total, length, part_length;
+	new join_length = strlen(join);
+	for (new i=0; i<numStrings; i++)
+	{
+		length = copy(buffer[total], maxLength-total, strings[i]);
+		total += length;
+		if (length < part_length)
+		{
+			break;
+		}
+		if (i != numStrings - 1)
+		{
+			length = copy(buffer[total], maxLength-total, join);
+			total += length;
+			if (length < join_length)
+			{
+				break;
+			}
+		}
+	}
+	return total;
+}
--- a/plugins/testsuite/utf8test.sma
+++ b/plugins/testsuite/utf8test.sma
@@ -0,0 +1,234 @@
+#include <amxmodx>
+
+/**
+ * Warning: To get expected result, file encoding must be UTF-8 without BOM.
+ */
+ 
+public plugin_init()
+{
+    register_plugin("UTF-8 Test", AMXX_VERSION_STR, "AMXX Dev Team");
+    register_srvcmd("utf8test", "OnServerCommand");
+}
+
+new ErrorCount;
+new TestNumber;
+
+enum TestType
+{
+    TT_Equal = 0,
+    TT_LessThan,
+    TT_GreaterThan,
+    TT_LessThanEqual,
+    TT_GreaterThanEqual,
+    TT_NotEqual
+};
+
+new const TestWords[TestType][] = 
+{
+    "==",
+    "<",
+    ">",
+    "<=",
+    ">=",
+    "!="
+};
+
+test(any:a, any:b = true, TestType:type = TT_Equal)
+{
+    ++TestNumber;
+    
+    new passed = 0;
+    
+    switch (type)
+    {
+        case TT_Equal:              passed = a == b;
+        case TT_LessThan:           passed = a < b;
+        case TT_GreaterThan:        passed = a > b;
+        case TT_LessThanEqual:      passed = a <= b;
+        case TT_GreaterThanEqual:   passed = a >= b;
+        case TT_NotEqual:           passed = a != b;
+    }
+    
+    if (!passed)
+    {
+        server_print("^tFailed test #%d (%d %s %d)", TestNumber, a, bool:TestWords[type], b);
+        ErrorCount++;
+    }
+}
+
+showResult()
+{
+    if (!ErrorCount)
+    {
+        server_print("All tests passed (%d/%d).", TestNumber, TestNumber);
+    }
+    else
+    {
+        server_print("Test failed %d/%d, aborting.", TestNumber - ErrorCount, TestNumber);
+    }
+}
+ 
+public OnServerCommand()  
+{
+    /**
+     * Initiliaze some data.
+     */
+    new reference[] = "𤭢hi AMXX® Hello㋡ crab?ൠ";
+    
+    new Array:a = ArrayCreate(sizeof reference);
+    ArrayPushString(a, reference);
+    
+    new Trie:t = TrieCreate();
+    TrieSetString(t, "reference", reference);
+    
+    new DataPack:d = CreateDataPack();
+    WritePackString(d, reference);
+    ResetPack(d);
+    
+    set_localinfo("reference", reference);
+    
+    
+    server_print("Counting character bytes...");
+    {
+        test(get_char_bytes("®") == 2);
+        test(get_char_bytes("㋡") == 3);
+        test(get_char_bytes("𤭢") == 4);
+        test(get_char_bytes("ൠ") == 3);
+    }
+    
+    server_print("Checking character bytes...");
+    {
+        /**
+         * is_char_mb() returns also number of bytes if not 0.
+         */
+        test(is_char_mb(reference[0]) != 0);  // 𤭢
+        test(is_char_mb(reference[11]) != 0); // ®
+        test(is_char_mb(reference[19]) != 0); // ㋡
+        test(is_char_mb(reference[29]) != 0); // ൠ
+    }
+    
+    server_print("Checking truncated character bytes - atcprintf...");
+    {
+        /**
+         * Truncating '𤭢' at different index. '𤭢' = 4 bytes
+         * A buffer of 4 = 3 bytes + EOS.
+         * Expected result: empty buffer.
+         */
+        new buffer1[4]; 
+        for(new i = charsmax(buffer1), length1; i >= 0; --i)
+        {
+            length1 = formatex(buffer1, i, "%s", reference);
+            test(buffer1[0] == EOS && length1 == 0); 
+        }
+        
+        /**
+         * Truncating inside '®'.
+         * Expected result: '®' should be skipped.
+         */
+        new buffer2[12];
+        new length2 = formatex(buffer2, charsmax(buffer2), "%s", reference);
+        test(strcmp(buffer2, "𤭢hi AMXX") == 0 && length2 == strlen("𤭢hi AMXX"));
+        
+        /**
+         * Truncating inside 'ൠ'.
+         * Buffer of 14: Enough to hold "㋡ crab?ൠ"
+         * Retrieve 11 characters using precision format from '㋡' to inside 'ൠ'..
+         * Expected result: 'ൠ'. should be skipped.
+         */
+        new buffer3[14]; 
+        new length3 = formatex(buffer3, charsmax(buffer3), "%.11s", reference[19]);
+        test(strcmp(buffer3, "㋡ crab?") == 0 && length3 == get_char_bytes("㋡") + strlen(" crab?")); 
+    }
+    
+    server_print("Checking truncated character bytes - set_amxstring_utf8..."); 
+    {
+        /**
+         * Splits string at '㋡'.
+         * Buffer can hold only 16 characters.
+         * Expected result: '㋡' should not be included and returned position should be after '㋡'.
+         */
+        new buffer1[16];
+        new index1 = split_string(reference, "㋡", buffer1, charsmax(buffer1));
+        test(strcmp(buffer1, "𤭢hi AMXX® H") == 0 && index1 == strlen("𤭢hi AMXX® Hello") + get_char_bytes("㋡")); 
+        
+        /**
+         * Splits string at '𤭢'.
+         * Expected result: Empty string and returned position should be after '𤭢'.
+         */
+        new buffer2[5];
+        new index2 = split_string(reference, "𤭢", buffer2, charsmax(buffer2));
+        test(buffer2[0] == EOS && index2 == get_char_bytes("𤭢"));
+        
+        /**
+         * Splits string at '\ൠ'.
+         * Expected result: Empty string and returned position should -1 (not found).
+         */
+        new buffer3[12];
+        new index3 = split_string(reference, "\ൠ", buffer3, charsmax(buffer3));
+        test(buffer3[0] == EOS && index3 == -1); 
+        
+        /**
+         * Truncating '𤭢' at different index. '𤭢' = 4 bytes
+         * A buffer of 4 = 3 bytes + EOS.
+         * Expected result: empty buffer.
+         */
+        new buffer4[4]; 
+        for(new i = charsmax(buffer4), length4; i >= 0; --i)
+        {
+            length4 = get_localinfo("reference", buffer4, i);
+            test(buffer4[0] == EOS && length4 == 0); 
+        }
+    }
+
+    server_print("Checking truncated character bytes - direct copy...");
+    {
+        /**
+         * Replaces '®' by '𤭢'.
+         * Expected result: '𤭢' should eat '® He" which counts 4 bytes.
+         */
+        new count1 = replace_string(reference, charsmax(reference), "®", "𤭢");
+        test(strcmp(reference, "𤭢hi AMXX𤭢ello㋡ crab?ൠ") == 0 && count1 == 1);
+        
+        /**
+         * Replaces '®' by '𤭢'.
+         * Expected result: not found.
+         */
+        new count2 = replace_string(reference, charsmax(reference), "®", "𤭢");
+        test(strcmp(reference, "𤭢hi AMXX𤭢ello㋡ crab?ൠ") == 0 && count2 == 0);
+        
+        /**
+         * Replaces 'ൠ' by '𤭢'.
+         * Expected result: 'ൠ' = 3 bytes, '𤭢' = 4 bytes. Not enough spaces to hold '𤭢', skipping it.
+         */
+        new count3 = replace_string(reference, charsmax(reference), "ൠ", "𤭢");
+        test(strcmp(reference, "𤭢hi AMXX𤭢ello㋡ crab?") == 0 && count3 == 1);
+        
+        /**
+         * Gets reference string with limited buffer.
+         * Expected result: '㋡' should be ignored as no spaces.
+         */
+        new buffer[charsmax(reference) - 9];
+        ArrayGetString(a, 0, buffer, charsmax(buffer));
+        test(strcmp(buffer, "𤭢hi AMXX® Hello") == 0);
+        
+        /**
+         * Gets reference string with limited buffer.
+         * Expected result: '㋡' should be ignored as no spaces.
+         */
+        TrieGetString(t, "reference", buffer, charsmax(buffer));
+        test(strcmp(buffer, "𤭢hi AMXX® Hello") == 0);
+        
+        /**
+         * Gets reference string with limited buffer.
+         * Expected result: '㋡' should be ignored as no room.
+         */
+        new length = ReadPackString(d, buffer, charsmax(buffer));
+        test(strcmp(buffer, "𤭢hi AMXX® Hello") == 0 && length == strlen("𤭢hi AMXX® Hello"));
+    }
+    
+    ArrayDestroy(a);
+    TrieDestroy(t);
+    DestroyDataPack(d);
+
+    showResult();
+}
--- a/support/PackageScript
+++ b/support/PackageScript
@@ -239,6 +239,7 @@ scripting_files = [
  'testsuite/sqlxtest.sq3',
  'testsuite/sqlxtest.sql',
  'testsuite/trietest.sma',
+  'testsuite/utf8test.sma',
  'include/amxconst.inc',
  'include/amxmisc.inc',
  'include/amxmodx.inc',