Improve UTF-8 support in some natives (bug 6475) (#407)

* Compile as static library, update AMBuildScript and link to core * Update VS project files to include the library * Add UTF-8 Rewind library (v1.5.1) to third_party directory * Update ACKNOWLEDGEMENTS.txt * Move AMXX buffer in its own function * Move constants from string.inc to string_const.inc and update project files * Move stocks from string.inc to string_stocks.inc and update project files * Improve UTF-8 support in containi() and update documentation * Improve UTF-8 support in strcmp() and update documentation * Improve UTF-8 support in strfind() and update documentation Worth to be noted that this native with ignorecase set was not working properly. So broken that no one reported the issue. This adds also a safety check for "pos" parameter to not go < 0. * Improve UTF-8 support in strncmp() and update documentation * Improve UTF-8 support in equali() and update documentation * Add an option to some UTF-8 Rewind functions for avoiding invalid data to be replaced By default it replaces any invalid byte or sequence of bytes by 0xFFFD (3 bytes). It can be problematic when the input buffer is not changed (from a plugin) and that some natives need to calculate a position from the converted string. With such replacement, the position is displaced due the final string length being larger. This compiles the library as C++, because I added some silly param with a default default value which is not supported by C. * Improve UTF-8 support in replace_string/ex() and update documentation * Add is_string_category() and update documentation * Update a little testsuite plugin (and fix linux compilation) * Add mb_strotolower/upper() and update documentation * Add mb_ucfirst() and update documentation * Add mb_strtotile() and update documentation * Improve UTF-8 support in get_players() and find_player() with name/case insenstive flags set * Fix KliPPy's complain
2017-08-05 10:32:16 +02:00
parent 07c3d49cfa
commit ab854ec035
34 changed files with 20166 additions and 532 deletions
--- a/amxmodx/util.cpp
+++ b/amxmodx/util.cpp
@ -9,6 +9,7 @@

 #include <time.h>
 #include "amxmodx.h"
+#include <utf8rewind.h>

 int UTIL_ReadFlags(const char* c) 
 {
@ -454,11 +455,38 @@ int UTIL_CheckValidChar(D *c)
 	return 0;
 }

-unsigned int UTIL_ReplaceAll(char *subject, size_t maxlength, const char *search, const char *replace, bool caseSensitive)
-{
-	size_t searchLen = strlen(search);
-	size_t replaceLen = strlen(replace);
+static char OutputBuffer1[MAX_BUFFER_LENGTH];
+static char OutputBuffer2[MAX_BUFFER_LENGTH];

+char* utf8stristr(const char *string1, const char *string2)
+{
+	auto string1Length = utf8casefold(string1, strlen(string1), OutputBuffer1, MAX_BUFFER_LENGTH - 1, UTF8_LOCALE_DEFAULT, nullptr, TRUE);
+	auto string2Length = utf8casefold(string2, strlen(string2), OutputBuffer2, MAX_BUFFER_LENGTH - 1, UTF8_LOCALE_DEFAULT, nullptr, TRUE);
+
+	OutputBuffer1[string1Length] = '\0';
+	OutputBuffer2[string2Length] = '\0';
+
+	return strstr(OutputBuffer1, OutputBuffer2);
+}
+
+int utf8strncasecmp(const char *string1, const char *string2, size_t n)
+{
+	auto string1Length = utf8casefold(string1, strlen(string1), OutputBuffer1, MAX_BUFFER_LENGTH - 1, UTF8_LOCALE_DEFAULT, nullptr, TRUE);
+	auto string2Length = utf8casefold(string2, strlen(string2), OutputBuffer2, MAX_BUFFER_LENGTH - 1, UTF8_LOCALE_DEFAULT, nullptr, TRUE);
+
+	OutputBuffer1[string1Length] = '\0';
+	OutputBuffer2[string2Length] = '\0';
+
+	return n != 0 ? strncmp(OutputBuffer1, OutputBuffer2, n) : strcmp(OutputBuffer1, OutputBuffer2);
+}
+
+int utf8strcasecmp(const char *string1, const char *string2)
+{
+	return utf8strncasecmp(string1, string2, 0);
+}
+
+size_t UTIL_ReplaceAll(char *subject, size_t maxlength, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive)
+{
 	char *newptr, *ptr = subject;
 	unsigned int total = 0;
 	while ((newptr = UTIL_ReplaceEx(ptr, maxlength, search, searchLen, replace, replaceLen, caseSensitive)) != NULL)
@ -476,6 +504,11 @@ unsigned int UTIL_ReplaceAll(char *subject, size_t maxlength, const char *search
 	return total;
 }

+size_t UTIL_ReplaceAll(char *subject, size_t maxlength, const char *search, const char *replace, bool caseSensitive)
+{
+	return UTIL_ReplaceAll(subject, maxlength, search, strlen(search), replace, strlen(replace), caseSensitive);
+}
+
 template unsigned int strncopy<char, char>(char *, const char *, size_t);
 template unsigned int strncopy<char, cell>(char *, const cell *, size_t);
 template unsigned int strncopy<cell, char>(cell *, const char *, size_t);
@ -534,7 +567,7 @@ char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t se
 		/* If the search matches and the replace length is 0,
 		* we can just terminate the string and be done.
 		*/
-		if ((caseSensitive ? strcmp(subject, search) : strcasecmp(subject, search)) == 0 && replaceLen == 0)
+		if ((caseSensitive ? strcmp(subject, search) : utf8strcasecmp(subject, search)) == 0 && replaceLen == 0)
 		{
 			*subject = '\0';
 			return subject;
@ -551,7 +584,7 @@ char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t se
 	while (*ptr != '\0' && (browsed <= textLen - searchLen))
 	{
 		/* See if we get a comparison */
-		if ((caseSensitive ? strncmp(ptr, search, searchLen) : strncasecmp(ptr, search, searchLen)) == 0)
+		if ((caseSensitive ? strncmp(ptr, search, searchLen) : utf8strncasecmp(ptr, search, searchLen)) == 0)
 		{
 			if (replaceLen > searchLen)
 			{