From a86ca1491f97961cd58ac1492272130742bb5d5b Mon Sep 17 00:00:00 2001 From: Arkshine Date: Wed, 30 Apr 2014 09:33:03 +0200 Subject: [PATCH] Add new string natives/stocks, make some UTF-8 safe (bug 6110, r=ds) --- amxmodx/amxmodx.cpp | 95 ++-- amxmodx/amxmodx.h | 6 + amxmodx/datapacks.cpp | 5 +- amxmodx/datastructs.h | 7 + amxmodx/file.cpp | 4 +- amxmodx/format.cpp | 71 +++ amxmodx/string.cpp | 242 ++++++++++- amxmodx/trie_natives.h | 11 +- amxmodx/util.cpp | 272 ++++++++++++ plugins/include/string.inc | 773 ++++++++++++++++++++++++++++----- plugins/testsuite/utf8test.sma | 234 ++++++++++ support/PackageScript | 1 + 12 files changed, 1560 insertions(+), 161 deletions(-) create mode 100644 plugins/testsuite/utf8test.sma diff --git a/amxmodx/amxmodx.cpp b/amxmodx/amxmodx.cpp index 217b3721..dc856d34 100755 --- a/amxmodx/amxmodx.cpp +++ b/amxmodx/amxmodx.cpp @@ -234,8 +234,9 @@ static cell AMX_NATIVE_CALL console_print(AMX *amx, cell *params) /* 2 param */ if (len > 254) { len = 254; - if (((message[len - 1] & 0xFF) >= 0xC2) && ((message[len - 1] & 0xFF) <= 0xEF)) { // Don't truncate a double-byte character - len--; + if ((message[len - 1] & 1 << 7)) + { + len -= UTIL_CheckValidChar(message + len - 1); // Don't truncate a multi-byte character } } message[len++] = '\n'; @@ -252,9 +253,9 @@ static cell AMX_NATIVE_CALL console_print(AMX *amx, cell *params) /* 2 param */ if (len > 126) // Client console truncates after byte 127. (126 + \n = 127) { len = 126; - if (((message[len - 1] & 0xFF) >= 0xC2) && ((message[len - 1] & 0xFF) <= 0xEF)) // Don't truncate a double-byte character + if ((message[len - 1] & 1 << 7)) { - len--; + len -= UTIL_CheckValidChar(message + len - 1); // Don't truncate a multi-byte character } } message[len++] = '\n'; // Client expects newline from the server @@ -289,9 +290,9 @@ static cell AMX_NATIVE_CALL client_print(AMX *amx, cell *params) /* 3 param */ if (((params[2] == 1) || (params[2] == 2)) && (len > 126)) // Client console truncates after byte 127. (126 + \n = 127) { len = 126; - if (((msg[len - 1] & 0xFF) >= 0xC2) && ((msg[len - 1] & 0xFF) <= 0xEF)) // Don't truncate a double-byte character + if ((msg[len - 1] & 1 << 7)) { - len--; + len -= UTIL_CheckValidChar(msg + len - 1); // Don't truncate a multi-byte character } } msg[len++] = '\n'; // Client expects newline from the server @@ -323,9 +324,9 @@ static cell AMX_NATIVE_CALL client_print(AMX *amx, cell *params) /* 3 param */ if (((params[2] == 1) || (params[2] == 2)) && (len > 126)) // Client console truncates after byte 127. (126 + \n = 127) { len = 126; - if (((msg[len - 1] & 0xFF) >= 0xC2) && ((msg[len - 1] & 0xFF) <= 0xEF)) // Don't truncate a double-byte character + if ((msg[len - 1] & 1 << 7)) { - len--; + len -= UTIL_CheckValidChar(msg + len - 1); // Don't truncate a multi-byte character } } msg[len++] = '\n'; // Client expects newline from the server @@ -370,6 +371,16 @@ static cell AMX_NATIVE_CALL client_print_color(AMX *amx, cell *params) /* 3 para { g_langMngr.SetDefLang(i); msg = format_amxstring(amx, params, 3, len); + + if (len > 190) // Server crashes after byte 190. (190 + \n = 191) + { + len = 190; + if ((msg[len - 1] & 1 << 7)) + { + len -= UTIL_CheckValidChar(msg + len - 1); // Don't truncate a multi-byte character + } + } + msg[len++] = '\n'; msg[len] = 0; @@ -392,6 +403,16 @@ static cell AMX_NATIVE_CALL client_print_color(AMX *amx, cell *params) /* 3 para g_langMngr.SetDefLang(index); msg = format_amxstring(amx, params, 3, len); + + if (len > 190) // Server crashes after byte 190. (190 + \n = 191) + { + len = 190; + if ((msg[len - 1] & 1 << 7)) + { + len -= UTIL_CheckValidChar(msg + len - 1); // Don't truncate a multi-byte character + } + } + msg[len++] = '\n'; msg[len] = 0; @@ -663,7 +684,9 @@ static cell AMX_NATIVE_CALL get_user_name(AMX *amx, cell *params) /* 3 param */ { int index = params[1]; - return set_amxstring(amx, params[2], (index < 1 || index > gpGlobals->maxClients) ? hostname->string : g_players[index].name.c_str(), params[3]); + return set_amxstring_utf8(amx, params[2], (index < 1 || index > gpGlobals->maxClients) ? + hostname->string : + g_players[index].name.c_str(), g_players[index].name.size(), params[3] + 1); } static cell AMX_NATIVE_CALL get_user_index(AMX *amx, cell *params) /* 1 param */ @@ -1597,8 +1620,8 @@ static cell AMX_NATIVE_CALL get_concmd(AMX *amx, cell *params) /* 7 param */ if (cmd == 0) return 0; - set_amxstring(amx, params[2], cmd->getCmdLine(), params[3]); - set_amxstring(amx, params[5], cmd->getCmdInfo(), params[6]); + set_amxstring_utf8(amx, params[2], cmd->getCmdLine(), strlen(cmd->getCmdLine()), params[3] + 1); // + EOS + set_amxstring_utf8(amx, params[5], cmd->getCmdInfo(), strlen(cmd->getCmdInfo()), params[6] + 1); // + EOS cell *cpFlags = get_amxaddr(amx, params[4]); *cpFlags = cmd->getFlags(); @@ -1633,9 +1656,9 @@ static cell AMX_NATIVE_CALL get_clcmd(AMX *amx, cell *params) /* 7 param */ if (cmd == 0) return 0; - - set_amxstring(amx, params[2], cmd->getCmdLine(), params[3]); - set_amxstring(amx, params[5], cmd->getCmdInfo(), params[6]); + + set_amxstring_utf8(amx, params[2], cmd->getCmdLine(), strlen(cmd->getCmdLine()), params[3] + 1); // + EOS + set_amxstring_utf8(amx, params[5], cmd->getCmdInfo(), strlen(cmd->getCmdInfo()), params[6] + 1); // + EOS cell *cpFlags = get_amxaddr(amx, params[4]); *cpFlags = cmd->getFlags(); @@ -1649,8 +1672,8 @@ static cell AMX_NATIVE_CALL get_srvcmd(AMX *amx, cell *params) if (cmd == 0) return 0; - set_amxstring(amx, params[2], cmd->getCmdLine(), params[3]); - set_amxstring(amx, params[5], cmd->getCmdInfo(), params[6]); + set_amxstring_utf8(amx, params[2], cmd->getCmdLine(), strlen(cmd->getCmdLine()), params[3] + 1); // + EOS + set_amxstring_utf8(amx, params[5], cmd->getCmdInfo(), strlen(cmd->getCmdInfo()), params[6] + 1); // + EOS cell *cpFlags = get_amxaddr(amx, params[4]); *cpFlags = cmd->getFlags(); @@ -1891,7 +1914,7 @@ static cell AMX_NATIVE_CALL get_pcvar_string(AMX *amx, cell *params) return 0; } - return set_amxstring(amx, params[2], ptr->string ? ptr->string : "", params[3]); + return set_amxstring_utf8(amx, params[2], ptr->string ? ptr->string : "", ptr->string ? strlen(ptr->string) : 0, params[3] + 1); // EOS } static cell AMX_NATIVE_CALL get_cvar_string(AMX *amx, cell *params) /* 3 param */ @@ -1912,7 +1935,8 @@ static cell AMX_NATIVE_CALL get_cvar_string(AMX *amx, cell *params) /* 3 param * } } - return set_amxstring(amx, params[2], CVAR_GET_STRING(sptemp), params[3]); + const char *value = CVAR_GET_STRING(sptemp); + return set_amxstring_utf8(amx, params[2], value, strlen(value), params[3] + 1); // + EOS } static cell AMX_NATIVE_CALL get_pcvar_float(AMX *amx, cell *params) @@ -2168,9 +2192,9 @@ static cell AMX_NATIVE_CALL format_time(AMX *amx, cell *params) /* 3 param */ } char szDate[512]; - strftime(szDate, 511, sptemp, lt); + ilen = strftime(szDate, 511, sptemp, lt); // Returns length, including null-character. - return set_amxstring(amx, params[1], szDate, params[2]); + return set_amxstring_utf8(amx, params[1], szDate, ilen - 1, params[2] + 1); // + EOS } @@ -2235,7 +2259,8 @@ static cell AMX_NATIVE_CALL read_data(AMX *amx, cell *params) /* 3 param */ case 1: return g_events.getArgInteger(params[1]); case 3: - return set_amxstring(amx, params[2], g_events.getArgString(params[1]), *get_amxaddr(amx, params[3])); + return set_amxstring_utf8(amx, params[2], g_events.getArgString(params[1]), + strlen(g_events.getArgString(params[1])),*get_amxaddr(amx, params[3]) + 1); // + EOS default: cell *fCell = get_amxaddr(amx, params[2]); REAL fparam = (REAL)g_events.getArgFloat(params[1]); @@ -2438,7 +2463,8 @@ static cell AMX_NATIVE_CALL get_localinfo(AMX *amx, cell *params) /* 3 param */ int ilen; char* sptemp = get_amxstring(amx, params[1], 0, ilen); - return set_amxstring(amx, params[2], LOCALINFO(sptemp), params[3]); + char *value = LOCALINFO(sptemp); + return set_amxstring_utf8(amx, params[2], value, strlen(value), params[3] + 1); // + EOS } static cell AMX_NATIVE_CALL set_localinfo(AMX *amx, cell *params) /* 2 param */ @@ -2511,14 +2537,15 @@ static cell AMX_NATIVE_CALL read_argc(AMX *amx, cell *params) static cell AMX_NATIVE_CALL read_argv(AMX *amx, cell *params) /* 3 param */ { - return set_amxstring(amx, params[2], /*(params[1] < 0 || - params[1] >= CMD_ARGC()) ? "" : */CMD_ARGV(params[1]), params[3]); + const char *value = CMD_ARGV(params[1]); + return set_amxstring_utf8(amx, params[2], /*(params[1] < 0 || + params[1] >= CMD_ARGC()) ? "" : */value, strlen(value), params[3] + 1); // + EOS } static cell AMX_NATIVE_CALL read_args(AMX *amx, cell *params) /* 2 param */ { const char* sValue = CMD_ARGS(); - return set_amxstring(amx, params[1], sValue ? sValue : "", params[2]); + return set_amxstring_utf8(amx, params[1], sValue ? sValue : "", sValue ? strlen(sValue) : 0, params[2] + 1); // +EOS } static cell AMX_NATIVE_CALL get_user_msgid(AMX *amx, cell *params) /* 1 param */ @@ -3210,7 +3237,8 @@ static cell AMX_NATIVE_CALL force_unmodified(AMX *amx, cell *params) static cell AMX_NATIVE_CALL read_logdata(AMX *amx, cell *params) { - return set_amxstring(amx, params[1], g_logevents.getLogString(), params[2]); + const char *value = g_logevents.getLogString(); + return set_amxstring_utf8(amx, params[1], value, strlen(value), params[2] + 1); // + EOS } static cell AMX_NATIVE_CALL read_logargc(AMX *amx, cell *params) @@ -3220,7 +3248,8 @@ static cell AMX_NATIVE_CALL read_logargc(AMX *amx, cell *params) static cell AMX_NATIVE_CALL read_logargv(AMX *amx, cell *params) { - return set_amxstring(amx, params[2], g_logevents.getLogArg(params[1]), params[3]); + const char *value = g_logevents.getLogArg(params[1]); + return set_amxstring_utf8(amx, params[2], value, strlen(value), params[3] + 1); // + EOS } static cell AMX_NATIVE_CALL parse_loguser(AMX *amx, cell *params) @@ -3503,10 +3532,14 @@ static cell AMX_NATIVE_CALL get_module(AMX *amx, cell *params) // set name, author, version if ((*moduleIter).isAmxx()) { - const amxx_module_info_s *info = (*moduleIter).getInfoNew(); - set_amxstring(amx, params[2], info && info->name ? info->name : "unk", params[3]); - set_amxstring(amx, params[4], info && info->author ? info->author : "unk", params[5]); - set_amxstring(amx, params[6], info && info->version ? info->version : "unk", params[7]); + const amxx_module_info_s *info = (*moduleIter).getInfoNew(); + const char *name = info && info->name ? info->name : "unk"; + const char *author = info && info->author ? info->author : "unk"; + const char *version = info && info->version ? info->version : "unk"; + + set_amxstring_utf8(amx, params[2], name, strlen(name), params[3] + 1); // + EOS + set_amxstring_utf8(amx, params[4], author, strlen(author), params[5] + 1); // + EOS + set_amxstring_utf8(amx, params[6], version, strlen(version), params[7] + 1); // + EOS } // compatibility problem possible diff --git a/amxmodx/amxmodx.h b/amxmodx/amxmodx.h index 04316b9b..8dd476e9 100755 --- a/amxmodx/amxmodx.h +++ b/amxmodx/amxmodx.h @@ -147,6 +147,11 @@ void UTIL_ShowMenu(edict_t* pEntity, int slots, int time, char *menu, int mlen); void UTIL_ClientSayText(edict_t *pEntity, int sender, char *msg); void UTIL_TeamInfo(edict_t *pEntity, int playerIndex, const char *pszTeamName); +template +int UTIL_CheckValidChar(D *c); +unsigned int UTIL_GetUTF8CharBytes(const char *stream); +unsigned int UTIL_ReplaceAll(char *subject, size_t maxlength, const char *search, const char *replace, bool caseSensitive); +char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive); char *UTIL_VarArgs(const char *fmt, ...); @@ -294,6 +299,7 @@ int amxstring_len(cell* cstr); int load_amxscript(AMX* amx, void** program, const char* path, char error[64], int debug); int set_amxnatives(AMX* amx, char error[64]); int set_amxstring(AMX *amx, cell amx_addr, const char *source, int max); +int set_amxstring_utf8(AMX *amx, cell amx_addr, const char *source, size_t sourcelen, size_t maxlen); int unload_amxscript(AMX* amx, void** program); void copy_amxmemory(cell* dest, cell* src, int len); diff --git a/amxmodx/datapacks.cpp b/amxmodx/datapacks.cpp index baab9c37..1392a62e 100644 --- a/amxmodx/datapacks.cpp +++ b/amxmodx/datapacks.cpp @@ -137,13 +137,14 @@ static cell AMX_NATIVE_CALL ReadPackString(AMX* amx, cell* params) } const char *str; - if (!(str = d->ReadString(NULL))) + size_t len; + if (!(str = d->ReadString(&len))) { LogError(amx, AMX_ERR_NATIVE, "DataPack operation is out of bounds."); return 0; } - return set_amxstring(amx, params[2], str, params[3]); + return set_amxstring_utf8(amx, params[2], str, len, params[3] + 1); // + EOS } static cell AMX_NATIVE_CALL ResetPack(AMX* amx, cell* params) diff --git a/amxmodx/datastructs.h b/amxmodx/datastructs.h index ee36680a..345843b4 100644 --- a/amxmodx/datastructs.h +++ b/amxmodx/datastructs.h @@ -163,6 +163,13 @@ public: *out='\0'; } + /* Don't truncate a multi-byte character */ + if (*(output - 1) & 1 << 7) + { + size = UTIL_CheckValidChar(output - 1); + *(output - size) = '\0'; + } + return 1; } // Returns 1 on success diff --git a/amxmodx/file.cpp b/amxmodx/file.cpp index 6e9ac4b8..32d368e8 100755 --- a/amxmodx/file.cpp +++ b/amxmodx/file.cpp @@ -169,7 +169,7 @@ static cell AMX_NATIVE_CALL read_file(AMX *amx, cell *params) /* 5 param */ buffor[--len] = 0; cell *length = get_amxaddr(amx, params[5]); - *length = set_amxstring(amx, params[3], buffor, params[4]); + *length = set_amxstring_utf8(amx, params[3], buffor, len, params[4] + 1); // + EOS return i; } @@ -583,7 +583,7 @@ static cell AMX_NATIVE_CALL amx_fgets(AMX *amx, cell *params) static char buffer[4096]; buffer[0] = '\0'; fgets(buffer, sizeof(buffer)-1, fp); - return set_amxstring(amx, params[2], buffer, params[3]); + return set_amxstring_utf8(amx, params[2], buffer, strlen(buffer), params[3] + 1); // + EOS } static cell AMX_NATIVE_CALL amx_fseek(AMX *amx, cell *params) diff --git a/amxmodx/format.cpp b/amxmodx/format.cpp index c13b5182..1ec60e2a 100644 --- a/amxmodx/format.cpp +++ b/amxmodx/format.cpp @@ -149,6 +149,12 @@ void AddString(U **buf_p, size_t &maxlen, const cell *string, int width, int pre if (size > (int)maxlen) size = maxlen; + /* If precision is provided, make sure we don't truncate a multi-byte character */ + if (prec >= size && (string[size - 1] & 1 << 7)) + { + size -= UTIL_CheckValidChar((cell *)string + size - 1); + } + maxlen -= size; width -= size; @@ -286,6 +292,58 @@ void AddFloat(U **buf_p, size_t &maxlen, double fval, int width, int prec, int f *buf_p = buf; } +template +void AddBinary(U **buf_p, size_t &maxlen, unsigned int val, int width, int flags) +{ + char text[32]; + int digits; + U *buf; + + digits = 0; + do + { + if (val & 1) + { + text[digits++] = '1'; + } + else + { + text[digits++] = '0'; + } + val >>= 1; + } while (val); + + buf = *buf_p; + + if (!(flags & LADJUST)) + { + while (digits < width && maxlen) + { + *buf++ = (flags & ZEROPAD) ? '0' : ' '; + width--; + maxlen--; + } + } + + while (digits-- && maxlen) + { + *buf++ = text[digits]; + width--; + maxlen--; + } + + if (flags & LADJUST) + { + while (width-- && maxlen) + { + *buf++ = (flags & ZEROPAD) ? '0' : ' '; + maxlen--; + } + } + + *buf_p = buf; +} + template void AddUInt(U **buf_p, size_t &maxlen, unsigned int val, int width, int flags) { @@ -527,6 +585,11 @@ reswitch: llen--; arg++; break; + case 'b': + CHECK_ARGS(0); + AddBinary(&buf_p, llen, *get_amxaddr(amx, params[arg]), width, flags); + arg++; + break; case 'd': case 'i': CHECK_ARGS(0); @@ -635,6 +698,14 @@ break_to_normal_string: done: *buf_p = static_cast(0); *param = arg; + + /* if max buffer length consumed, make sure we don't truncate a multi-byte character */ + if (llen <= 0 && *(buf_p - 1) & 1 << 7) + { + llen += UTIL_CheckValidChar(buf_p - 1); + *(buf_p - llen) = static_cast(0); + } + return maxlen-llen; } diff --git a/amxmodx/string.cpp b/amxmodx/string.cpp index b42de18f..af364707 100755 --- a/amxmodx/string.cpp +++ b/amxmodx/string.cpp @@ -103,13 +103,44 @@ int set_amxstring(AMX *amx, cell amx_addr, const char *source, int max) #endif while (max-- && *source) - *dest++ = (cell)*source++; + *dest++ = (unsigned char)*source++; *dest = 0; return dest - start; } +int set_amxstring_utf8(AMX *amx, cell amx_addr, const char *source, size_t sourcelen, size_t maxlen) +{ + size_t len = sourcelen; + bool needtocheck = false; + + register cell* dest = (cell *)(amx->base + (int)(((AMX_HEADER *)amx->base)->dat + amx_addr)); + register cell* start = dest; + + if (len >= maxlen) + { + len = maxlen - 1; + needtocheck = true; + } + + maxlen = len; + + while (maxlen-- && *source) + { + *dest++ = *(unsigned char*)source++; + } + + if (needtocheck && (start[len - 1] & 1 << 7)) + { + len -= UTIL_CheckValidChar(start + len - 1); + } + + start[len] = '\0'; + + return len; +} + extern "C" size_t get_amxstring_r(AMX *amx, cell amx_addr, char *destination, int maxlen) { register cell *source = (cell *)(amx->base + (int)(((AMX_HEADER *)amx->base)->dat + amx_addr)); @@ -287,6 +318,62 @@ static cell AMX_NATIVE_CALL replace(AMX *amx, cell *params) /* 4 param */ return 0; } +static cell AMX_NATIVE_CALL replace_string(AMX *amx, cell *params) +{ + int len; + size_t maxlength = (size_t)params[2]; + + char *text = get_amxstring(amx, params[1], 0, len); + const char *search = get_amxstring(amx, params[3], 1, len); + const char *replace = get_amxstring(amx, params[4], 2, len); + + bool caseSensitive = params[5] ? true : false; + + if (search[0] == '\0') + { + LogError(amx, AMX_ERR_NATIVE, "Cannot replace searches of empty strings."); + return -1; + } + + int count = UTIL_ReplaceAll(text, maxlength + 1, search, replace, caseSensitive); // + EOS + + set_amxstring(amx, params[1], text, maxlength); + + return count; +} + +static cell AMX_NATIVE_CALL replace_stringex(AMX *amx, cell *params) +{ + int len; + size_t maxlength = (size_t)params[2]; + + char *text = get_amxstring(amx, params[1], 0, len); + const char *search = get_amxstring(amx, params[3], 1, len); + const char *replace = get_amxstring(amx, params[4], 2, len); + + size_t searchLen = (params[5] == -1) ? strlen(search) : (size_t)params[5]; + size_t replaceLen = (params[6] == -1) ? strlen(replace) : (size_t)params[6]; + + bool caseSensitive = params[7] ? true : false; + + if (searchLen == 0) + { + LogError(amx, AMX_ERR_NATIVE, "Cannot replace searches of empty strings."); + return -1; + } + + char *ptr = UTIL_ReplaceEx(text, maxlength + 1, search, searchLen, replace, replaceLen, caseSensitive); // + EOS + + if (ptr == NULL) + { + return -1; + } + + set_amxstring(amx, params[1], ptr, maxlength); + + return ptr - text; +} + static cell AMX_NATIVE_CALL contain(AMX *amx, cell *params) /* 2 param */ { register cell *a = get_amxaddr(amx, params[2]); @@ -854,8 +941,8 @@ static cell AMX_NATIVE_CALL amx_strtok(AMX *amx, cell *params) right[right_pos] = 0; left[left_pos] = 0; - set_amxstring(amx, params[2], left, leftMax); - set_amxstring(amx, params[4], right, rightMax); + set_amxstring_utf8(amx, params[2], left, strlen(left), leftMax + 1); // +EOS + set_amxstring_utf8(amx, params[4], right, strlen(right), rightMax + 1); // +EOS delete [] left; delete [] right; @@ -928,8 +1015,9 @@ static cell AMX_NATIVE_CALL amx_strtok2(AMX *amx, cell *params) right[right_pos] = 0; left[left_pos] = 0; - set_amxstring(amx, params[2], left, left_max); - set_amxstring(amx, params[4], right, right_max); + + set_amxstring_utf8(amx, params[2], left, strlen(left), left_max + 1); // + EOS + set_amxstring_utf8(amx, params[4], right, strlen(right), right_max + 1); // + EOS delete [] left; delete [] right; @@ -1029,7 +1117,7 @@ do_copy: : end - beg ) : 0; - set_amxstring(amx, params[2], start, copylen); + set_amxstring_utf8(amx, params[2], start, strlen(start), copylen + 1); // + EOS end = (len-i+1 > (size_t)RightMax) ? (size_t)RightMax : len-i+1; if (end) @@ -1045,13 +1133,50 @@ do_copy: } //if we got here, there was nothing to break - set_amxstring(amx, params[2], &(string[beg]), LeftMax); + set_amxstring_utf8(amx, params[2], &(string[beg]), strlen(&(string[beg])), LeftMax + 1); // + EOS if (RightMax) *right = '\0'; return 1; } +static cell AMX_NATIVE_CALL split_string(AMX *amx, cell *params) +{ + int textLen, splitLen; + char *text = get_amxstring(amx, params[1], 0, textLen); + const char *split = get_amxstring(amx, params[2], 1, splitLen); + + if (splitLen > textLen) + { + return -1; + } + + int maxLen = params[4]; + + /** + * Note that it's <= ... you could also just add 1, + * but this is a bit nicer + */ + for (int i = 0; i <= textLen - splitLen; i++) + { + if (strncmp(&text[i], split, splitLen) == 0) + { + /* Split hereeeee */ + if (i >= maxLen + 1) // + null terminator + { + set_amxstring_utf8(amx, params[3], text, textLen, maxLen + 1); // + null terminator + } + else + { + set_amxstring_utf8(amx, params[3], text, textLen, i + 1); + } + return i + splitLen; + } + } + + return -1; +} + static cell AMX_NATIVE_CALL format_args(AMX *amx, cell *params) { int len; @@ -1065,29 +1190,102 @@ static cell AMX_NATIVE_CALL format_args(AMX *amx, cell *params) char* string = format_arguments(amx, pos, len); // indexed from 0 - return set_amxstring(amx, params[1], string, params[2]); + return set_amxstring_utf8(amx, params[1], string, len, params[2] + 1); // + EOS } static cell AMX_NATIVE_CALL is_digit(AMX *amx, cell *params) { - return isdigit(params[1]); + char chr = params[1]; + + if (UTIL_GetUTF8CharBytes(&chr) != 1) + { + return 0; + } + + return isdigit(chr); } static cell AMX_NATIVE_CALL is_alnum(AMX *amx, cell *params) { - return isalnum(params[1]); + char chr = params[1]; + + if (UTIL_GetUTF8CharBytes(&chr) != 1) + { + return 0; + } + + return isalnum(chr); } static cell AMX_NATIVE_CALL is_space(AMX *amx, cell *params) { - return isspace(params[1]); + char chr = params[1]; + + if (UTIL_GetUTF8CharBytes(&chr) != 1) + { + return 0; + } + + return isspace(chr); } static cell AMX_NATIVE_CALL is_alpha(AMX *amx, cell *params) { - return isalpha(params[1]); + char chr = params[1]; + + if (UTIL_GetUTF8CharBytes(&chr) != 1) + { + return 0; + } + + return isalpha(chr); } +static cell AMX_NATIVE_CALL is_char_upper(AMX *amx, cell *params) +{ + char chr = params[1]; + + if (UTIL_GetUTF8CharBytes(&chr) != 1) + { + return 0; + } + + return isupper(chr); +} + +static cell AMX_NATIVE_CALL is_char_lower(AMX *amx, cell *params) +{ + char chr = params[1]; + + if (UTIL_GetUTF8CharBytes(&chr) != 1) + { + return 0; + } + + return islower(chr); +} + +static cell AMX_NATIVE_CALL is_char_mb(AMX *amx, cell *params) +{ + char chr = params[1]; + + unsigned int bytes = UTIL_GetUTF8CharBytes(&chr); + if (bytes == 1) + { + return 0; + } + + return bytes; +} + +static cell AMX_NATIVE_CALL get_char_bytes(AMX *amx, cell *params) +{ + int len; + char *str = get_amxstring(amx, params[1], 0, len); + + return UTIL_GetUTF8CharBytes(str); +}; + static cell AMX_NATIVE_CALL amx_ucfirst(AMX *amx, cell *params) { cell *str = get_amxaddr(amx, params[1]); @@ -1163,6 +1361,18 @@ static cell AMX_NATIVE_CALL n_strcmp(AMX *amx, cell *params) return strcmp(str1, str2); } +static cell AMX_NATIVE_CALL n_strncmp(AMX *amx, cell *params) +{ + int len; + char *str1 = get_amxstring(amx, params[1], 0, len); + char *str2 = get_amxstring(amx, params[2], 1, len); + + if (params[4]) + return strncmp(str1, str2, (size_t)params[3]); + else + return strncasecmp(str1, str2, (size_t)params[3]); +} + static cell AMX_NATIVE_CALL n_strfind(AMX *amx, cell *params) { int len; @@ -1274,13 +1484,20 @@ AMX_NATIVE_INFO string_Natives[] = {"isalnum", is_alnum}, {"isspace", is_space}, {"isalpha", is_alpha}, + {"is_char_upper", is_char_upper}, + {"is_char_lower", is_char_lower}, + {"is_char_mb", is_char_mb}, + {"get_char_bytes", get_char_bytes}, {"num_to_str", numtostr}, {"numtostr", numtostr}, {"parse", parse}, {"replace", replace}, + {"replace_string", replace_string}, + {"replace_stringex",replace_stringex}, {"setc", setc}, {"strbreak", strbreak}, {"argparse", argparse}, + {"split_string", split_string}, {"strtolower", strtolower}, {"strtoupper", strtoupper}, {"str_to_num", strtonum}, @@ -1295,6 +1512,7 @@ AMX_NATIVE_INFO string_Natives[] = {"strcat", n_strcat}, {"strfind", n_strfind}, {"strcmp", n_strcmp}, + {"strncmp", n_strncmp}, {"str_to_float", str_to_float}, {"float_to_str", float_to_str}, {"vformat", vformat}, diff --git a/amxmodx/trie_natives.h b/amxmodx/trie_natives.h index 2bbc3412..fe7d7345 100644 --- a/amxmodx/trie_natives.h +++ b/amxmodx/trie_natives.h @@ -112,7 +112,16 @@ public: { if (m_type == TRIE_DATA_STRING && max >= 0) { - memcpy(out, m_data, (max > m_cellcount ? m_cellcount : max) * sizeof(cell)); + int len = (max > m_cellcount) ? m_cellcount : max; + memcpy(out, m_data, len * sizeof(cell)); + + /* Don't truncate a multi-byte character */ + if (m_data[len - 1] & 1 << 7) + { + len -= UTIL_CheckValidChar(m_data + len - 1); + out[len] = '\0'; + } + return true; } return false; diff --git a/amxmodx/util.cpp b/amxmodx/util.cpp index ed5b7bf5..2d7b0a7d 100755 --- a/amxmodx/util.cpp +++ b/amxmodx/util.cpp @@ -397,3 +397,275 @@ void UTIL_FakeClientCommand(edict_t *pEdict, const char *cmd, const char *arg1, g_fakecmd.fake = false; } +unsigned int UTIL_GetUTF8CharBytes(const char *stream) +{ + unsigned char c = *(unsigned char *)stream; + if (c & (1 << 7)) + { + if (c & (1 << 5)) + { + if (c & (1 << 4)) + { + return 4; + } + return 3; + } + return 2; + } + return 1; +} + +template int UTIL_CheckValidChar(char *); +template int UTIL_CheckValidChar(cell *); + +template +int UTIL_CheckValidChar(D *c) +{ + int count; + int bytecount = 0; + + for (count = 1; (*c & 0xC0) == 0x80; count++) + { + c--; + } + + switch (*c & 0xF0) + { + case 0xC0: + case 0xD0: + { + bytecount = 2; + break; + } + case 0xE0: + { + bytecount = 3; + break; + } + case 0xF0: + { + bytecount = 4; + break; + } + } + + if (bytecount != count) + { + return count; + } + + return 0; +} + +unsigned int UTIL_ReplaceAll(char *subject, size_t maxlength, const char *search, const char *replace, bool caseSensitive) +{ + size_t searchLen = strlen(search); + size_t replaceLen = strlen(replace); + + char *ptr = subject; + unsigned int total = 0; + while ((ptr = UTIL_ReplaceEx(ptr, maxlength, search, searchLen, replace, replaceLen, caseSensitive)) != NULL) + { + total++; + if (*ptr == '\0') + { + break; + } + } + + return total; +} + +unsigned int strncopy(char *dest, const char *src, size_t count) +{ + if (!count) + { + return 0; + } + + char *start = dest; + while ((*src) && (--count)) + { + *dest++ = *src++; + } + *dest = '\0'; + + return (dest - start); +} + +/** +* NOTE: Do not edit this for the love of god unless you have +* read the test cases and understand the code behind each one. +* While I don't guarantee there aren't mistakes, I do guarantee +* that plugins will end up relying on tiny idiosyncrasies of this +* function, just like they did with AMX Mod X. +* +* There are explicitly more cases than the AMX Mod X version because +* we're not doing a blind copy. Each case is specifically optimized +* for what needs to be done. Even better, we don't have to error on +* bad buffer sizes. Instead, this function will smartly cut off the +* string in a way that pushes old data out. +*/ +char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive) +{ + char *ptr = subject; + size_t browsed = 0; + size_t textLen = strlen(subject); + + /* It's not possible to search or replace */ + if (searchLen > textLen) + { + return NULL; + } + + /* Handle the case of one byte replacement. + * It's only valid in one case. + */ + if (maxLen == 1) + { + /* If the search matches and the replace length is 0, + * we can just terminate the string and be done. + */ + if ((caseSensitive ? strcmp(subject, search) : strcasecmp(subject, search)) == 0 && replaceLen == 0) + { + *subject = '\0'; + return subject; + } + else + { + return NULL; + } + } + + /* Subtract one off the maxlength so we can include the null terminator */ + maxLen--; + + while (*ptr != '\0' && (browsed <= textLen - searchLen)) + { + /* See if we get a comparison */ + if ((caseSensitive ? strncmp(ptr, search, searchLen) : strncasecmp(ptr, search, searchLen)) == 0) + { + if (replaceLen > searchLen) + { + /* First, see if we have enough space to do this operation */ + if (maxLen - textLen < replaceLen - searchLen) + { + /* First, see if the replacement length goes out of bounds. */ + if (browsed + replaceLen >= maxLen) + { + /* EXAMPLE CASE: + * Subject: AABBBCCC + * Buffer : 12 bytes + * Search : BBB + * Replace: DDDDDDDDDD + * OUTPUT : AADDDDDDDDD + * POSITION: ^ + */ + /* If it does, we'll just bound the length and do a strcpy. */ + replaceLen = maxLen - browsed; + + /* Note, we add one to the final result for the null terminator */ + strncopy(ptr, replace, replaceLen + 1); + + /* Don't truncate a multi-byte character */ + if (*(ptr + replaceLen - 1) & 1 << 7) + { + replaceLen -= UTIL_CheckValidChar(ptr + replaceLen - 1); + *(ptr + replaceLen) = '\0'; + } + } + else + { + /* EXAMPLE CASE: + * Subject: AABBBCCC + * Buffer : 12 bytes + * Search : BBB + * Replace: DDDDDDD + * OUTPUT : AADDDDDDDCC + * POSITION: ^ + */ + /* We're going to have some bytes left over... */ + size_t origBytesToCopy = (textLen - (browsed + searchLen)) + 1; + size_t realBytesToCopy = (maxLen - (browsed + replaceLen)) + 1; + char *moveFrom = ptr + searchLen + (origBytesToCopy - realBytesToCopy); + char *moveTo = ptr + replaceLen; + + /* First, move our old data out of the way. */ + memmove(moveTo, moveFrom, realBytesToCopy); + + /* Now, do our replacement. */ + memcpy(ptr, replace, replaceLen); + } + } + else + { + /* EXAMPLE CASE: + * Subject: AABBBCCC + * Buffer : 12 bytes + * Search : BBB + * Replace: DDDD + * OUTPUT : AADDDDCCC + * POSITION: ^ + */ + /* Yes, we have enough space. Do a normal move operation. */ + char *moveFrom = ptr + searchLen; + char *moveTo = ptr + replaceLen; + + /* First move our old data out of the way. */ + size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1; + memmove(moveTo, moveFrom, bytesToCopy); + + /* Now do our replacement. */ + memcpy(ptr, replace, replaceLen); + } + } + else if (replaceLen < searchLen) + { + /* EXAMPLE CASE: + * Subject: AABBBCCC + * Buffer : 12 bytes + * Search : BBB + * Replace: D + * OUTPUT : AADCCC + * POSITION: ^ + */ + /* If the replacement does not grow the string length, we do not + * need to do any fancy checking at all. Yay! + */ + char *moveFrom = ptr + searchLen; /* Start after the search pointer */ + char *moveTo = ptr + replaceLen; /* Copy to where the replacement ends */ + + /* Copy our replacement in, if any */ + if (replaceLen) + { + memcpy(ptr, replace, replaceLen); + } + + /* Figure out how many bytes to move down, including null terminator */ + size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1; + + /* Move the rest of the string down */ + memmove(moveTo, moveFrom, bytesToCopy); + } + else + { + /* EXAMPLE CASE: + * Subject: AABBBCCC + * Buffer : 12 bytes + * Search : BBB + * Replace: DDD + * OUTPUT : AADDDCCC + * POSITION: ^ + */ + /* We don't have to move anything around, just do a straight copy */ + memcpy(ptr, replace, replaceLen); + } + + return ptr + replaceLen; + } + ptr++; + browsed++; + } + + return NULL; +} \ No newline at end of file diff --git a/plugins/include/string.inc b/plugins/include/string.inc index b1fea47d..f348b56a 100755 --- a/plugins/include/string.inc +++ b/plugins/include/string.inc @@ -13,69 +13,226 @@ #define charsmax(%1) (sizeof(%1)-1) -/* Checks if source contains string. On success function -* returns position in source, on failure returns -1. */ -native contain(const source[],const string[]); +/** + * @global Unless otherwise noted, all string functions which take in a + * writable buffer and maximum length should NOT have the null terminator INCLUDED + * in the length. This means that this is valid: + * copy(string, charsmax(string), ...) + */ + +/** + * Calculates the length of a string. + * + * @param string String to check. + * @return Number of valid character bytes in the string. + */ +native strlen(const string[]); -/* Checks if source contains string with case ignoring. On success function -* returns position in source, on failure returns -1. */ +/** + * Tests whether a string is found inside another string. + * + * @param source String to search in. + * @param string Substring to find inside the original string. + * + * @return -1 on failure (no match found). Any other value + * indicates a position in the string where the match starts. + */ +native contain(const source[], const string[]); + +/** + * Tests whether a string is found inside another string with case ignoring. + * + * @param source String to search in. + * @param string Substring to find inside the original string. + * + * @return -1 on failure (no match found). Any other value + * indicates a position in the string where the match starts. + */ native containi(const source[],const string[]); -/* Replaces given string to another in given text. */ +/** + * Given a string, replaces the first occurrence of a search string with a + * replacement string. + * + * @param text String to perform search and replacements on. + * @param len Maximum length of the string buffer. + * @param what String to search for. + * @param with String to replace the search string with. + * + * @return The new string length after replacement, or 0 if no replacements were made. + */ native replace(text[], len, const what[], const with[]); -/* Adds one string to another. Last parameter different from 0, specifies -* how many chars we want to add. Function returns number of all merged chars. */ +/** + * Given a string, replaces all occurrences of a search string with a + * replacement string. + * + * @note Similar to replace_all() stock, but implemented as native and + * with different algorithm. This native doesn't error on bad + * buffer size and will smartly cut off the string in a way + * that pushes old data out. + * + * @note Only available in 1.8.3 and above. + * + * @param text String to perform search and replacements on. + * @param maxlength Maximum length of the string buffer. + * @param search String to search for. + * @param replace String to replace the search string with. + * @param caseSensitive If true (default), search is case sensitive. + * + * @return Number of replacements that were performed. + */ +native replace_string(text[], maxlength, const search[], const replace[], bool:caseSensitive=true); + +/** + * Given a string, replaces the first occurrence of a search string with a + * replacement string. + * + * @note Similar to replace() native, but implemented with more options and + * with different algorithm. This native doesn't error on bad + * buffer size and will smartly cut off the string in a way + * that pushes old data out. + * + * @note Only available in 1.8.3 and above. + * + * @param text String to perform search and replacements on. + * @param maxlength Maximum length of the string buffer. + * @param search String to search for. + * @param replace String to replace the search string with. + * @param searchLen If higher than -1, its value will be used instead of + * a strlen() call on the search parameter. + * @param replaceLen If higher than -1, its value will be used instead of + * a strlen() call on the replace parameter. + * @param caseSensitive If true (default), search is case sensitive. + * + * @return Index into the buffer (relative to the start) from where + * the last replacement ended, or -1 if no replacements were + * made. + */ +native replace_stringex(text[], maxlength, const search[], const replace[], searchLen=-1, replaceLen=-1, bool:caseSensitive=true); + +/** + * Concatenates one string onto another. + * + * @param dest String to append to. + * @param len Maximum length of entire buffer. + * @param src Source string to concatenate. + * @param max Number of characters to add. + * + * @return Number of of all merged characters. + */ native add(dest[],len,const src[],max=0); -/* Fills string with given format and parameters. - * Function returns number of copied chars. - * Example: format(dest,"Hello %s. You are %d years old","Tom",17). - * If any of your input buffers overlap with the destination buffer, - * format() falls back to a "copy-back" version as of 1.65. This is - * slower, so you should using a source string that is the same as - * the destination. +/** + * Formats a string according to the AMX Mod X format rules (see documentation). + * + * @note Example: format(dest, "Hello %s. You are %d years old", "Tom", 17). + * If any of your input buffers overlap with the destination buffer, + * format() falls back to a "copy-back" version as of 1.65. This is + * slower, so you should using a source string that is the same as + * the destination. + * + * @param output Destination string buffer. + * @param len Maximum length of output string buffer. + * @param format Formatting rules. + * @param ... Variable number of format parameters. + * + * @return Number of cells written. */ -native format(output[] ,len ,const format[] , any:...); +native format(output[], len, const format[], any:...); -/* Same as format(), except does not perform a "copy back" check. - * This means formatex() is faster, but DOES NOT ALLOW this type - * of call: - * formatex(buffer, len, "%s", buffer) - * formatex(buffer, len, buffer, buffer) - * formatex(buffer, len, "%s", buffer[5]) - * This is because the output is directly stored into "buffer", - * rather than copied back at the end. +/** + * Formats a string according to the AMX Mod X format rules (see documentation). + * + * @note Same as format(), except does not perform a "copy back" check. + * This means formatex() is faster, but DOES NOT ALLOW this type + * of call: + * formatex(buffer, len, "%s", buffer) + * formatex(buffer, len, buffer, buffer) + * formatex(buffer, len, "%s", buffer[5]) + * This is because the output is directly stored into "buffer", + * rather than copied back at the end. + * + * @param output Destination string buffer. + * @param len Maximum length of output string buffer. + * @param format Formatting rules. + * @param ... Variable number of format parameters. + * + * @return Number of cells written. */ -native formatex(output[] ,len ,const format[] , any:...); +native formatex(output[], len, const format[], any:...); -/* Replacement for format_args. Much faster and %L compatible. - * This works exactly like vsnprintf() from C. - * You must pass in the output buffer and its size, - * the string to format, and the number of the FIRST variable - * argument parameter. For example, for: - * function (a, b, c, ...) - * You would pass 4 (a is 1, b is 2, c is 3, et cetera). - * There is no vformatex(). +/** + * Formats a string according to the AMX Mod X format rules (see documentation). + * + * @note This is the same as format(), except it grabs parameters from a + * parent parameter stack, rather than a local. This is useful for + * implementing your own variable argument functions. + * + * @note Replacement for format_args. Much faster and %L compatible. + * This works exactly like vsnprintf() from C. + * You must pass in the output buffer and its size, + * the string to format, and the number of the FIRST variable + * argument parameter. For example, for: + * function (a, b, c, ...) + * You would pass 4 (a is 1, b is 2, c is 3, et cetera). + * There is no vformatex(). + * + * @param buffer Destination string buffer. + * @param len Maximum length of output string buffer. + * @param fmt Formatting rules. + * @param vararg Argument number which contains the '...' symbol. + * Note: Arguments start at 1. + * @return Number of bytes written. */ native vformat(buffer[], len, const fmt[], vararg); -/* - * Same as vformat(), except works in normal style dynamic natives. - * Instead of passing the format arg string, you can only pass the - * actual format argument number itself. - * If you pass 0, it will read the format string from an optional - * fifth parameter. + /** + * Formats a string according to the AMX Mod X format rules (see documentation). + * + * @note Same as vformat(), except works in normal style dynamic natives. + * Instead of passing the format arg string, you can only pass the + * actual format argument number itself. + * If you pass 0, it will read the format string from an optional + * fifth parameter. + * + * @param buffer Destination string buffer. + * @param len Maximum length of output string buffer. + * @param fmt_arg Argument number which contains the format. + * @param vararg Argument number which contains the '...' symbol. + * Note: Arguments start at 1. + * @return Number of bytes written. */ native vdformat(buffer[], len, fmt_arg, vararg, ...); -/* Gets parameters from function as formated string. */ -native format_args(output[] ,len ,pos = 0); +/** + * Gets parameters from function as formated string. + * + * @param output Destination string buffer. + * @param len Maximum length of output string buffer. + * @param pos Argument number which contains the '...' symbol. + * + * @return Number of bytes written. + */ +native format_args(output[], len, pos = 0); -/* Converts number to string. */ +/** + * Converts an integer to a string. + * + * @param num Integer to convert. + * @param string Buffer to store string in. + * @param len Maximum length of string buffer. + * + * @return Number of cells written to buffer. + */ native num_to_str(num,string[],len); -/* Returns converted string to number. */ +/** + * Converts a string to an integer. + * + * @param string String to convert. + * @return Integer conversion of string, or 0 on failure. + */ native str_to_num(const string[]); /** @@ -144,47 +301,122 @@ native strtol(const string[], &endPos = 0, base = 0); */ native Float:strtof(const string[], &endPos = 0); -/* Converts float to string. */ +/** + * Converts a floating point number to a string. + * + * @param fl Floating point number to convert. + * @param string Buffer to store string in. + * @param len Maximum length of string buffer. + * + * @return Number of cells written to buffer. + */ native float_to_str(Float:fl, string[], len); -/* Parses a float. */ +/** + * Converts a string to a floating point number. + * + * @param string String to convert to a foat. + * @return Floating point result, or 0.0 on error. + */ native Float:str_to_float(const string[]); -/* Checks if two strings equal. If len var is set -* then there are only c chars comapred. */ +/** + * Returns whether two strings are equal. + * + * @param a First string (left). + * @param b Second string (right). + * @param c Number of characters to compare. + * + * @return True if equal, false otherwise. + */ native equal(const a[],const b[],c=0); -/* Checks if two strings equal with case ignoring. -* If len var is set then there are only c chars comapred. */ +/** + * Returns whether two strings are equal with case ignoring. + * + * @param a First string (left). + * @param b Second string (right). + * @param c Number of characters to compare. + * + * @return True if equal, false otherwise. + */ native equali(const a[],const b[],c=0); -/* Copies one string to another. By len var -* you may specify max. number of chars to copy. */ +/** + * Copies one string to another string. + * + * @note If the destination buffer is too small to hold the source string, the + * destination will be truncated. + * + * @param dest Destination string buffer to copy to. + * @param len Destination buffer length. + * @param src Source string buffer to copy from. + * + * @return Number of cells written. + */ native copy(dest[],len,const src[]); -/* Copies one string to another until char ch is found. -* By len var you may specify max. number of chars to copy. */ +/** + * Copies one string to another string until ch is found. + * + * @param dest Destination string buffer to copy to. + * @param len Destination buffer length. + * @param src Source string buffer to copy from. + * @param ch Character to search for. + * + * @return Number of cells written. + */ native copyc(dest[],len,const src[],ch); -/* Sets string with given character. */ +/** + * Sets string with given character. + * + * @param src Destination string buffer to copy to. + * @param len Destination buffer length. + * @param ch Character to set string. + * + * @noreturn + */ native setc(src[],len,ch); -/* Gets parameters from text. -* Example: to split text: "^"This is^" the best year", -* call function like this: parse(text,arg1,len1,arg2,len2,arg3,len3,arg4,len4) -* and you will get: "This is", "the", "best", "year" -* Function returns number of parsed parameters. */ +/** + * Gets parameters from text. + * + * @note Example: to split text: "^"This is^" the best year", + * call function like this: parse(text,arg1,len1,arg2,len2,arg3,len3,arg4,len4) + * and you will get: "This is", "the", "best", "year" + * Function returns number of parsed parameters. + * + * @param text String to parse. + * @param ... Variable number of format parameters. + * + * @return Number of parsed parameters. + */ native parse(const text[], ... ); -/* Breaks a string into two halves, by token. - See strbreak() for doing this with parameters. - Example: - str1[] = This *is*some text - strtok(str1, left, 24, right, 24, '*') - left will be "This " - Right will be "is*some text" - If you use trimSpaces, all spaces are trimmed from Left. -*/ +/** + * Breaks a string in two by token. + * + * @note Trimming spaces is buggy. Consider strtok2 instead. + * + * @note See argbreak() for doing this with parameters. + * Example: + * str1[] = This *is*some text + * strtok(str1, left, 24, right, 24, '*') + * left will be "This " + * Right will be "is*some text" + * If you use trimSpaces, all spaces are trimmed from Left. + * + * @param text String to tokenize + * @param Left Buffer to store left half + * @param leftLen Size of left buffer + * @param Right Buffer to store right half + * @param rightLen Size of right buffer + * @param token Token to split by + * @param trimSpaces Whether spaces are trimmed. + * + * @noreturn + */ native strtok(const text[], Left[], leftLen, Right[], rightLen, token=' ', trimSpaces=0); /** @@ -231,9 +463,9 @@ native strtok(const text[], Left[], leftLen, Right[], rightLen, token=' ', trimS #define TRIM_FULL TRIM_OUTER|TRIM_INNER /** - * Breaks a string in two by token + * Breaks a string in two by token. * - * Only available in 1.8.3 and above + * @note Only available in 1.8.3 and above. * * @param text String to tokenize * @param left Buffer to store left half @@ -248,40 +480,118 @@ native strtok(const text[], Left[], leftLen, Right[], rightLen, token=' ', trimS */ native strtok2(const text[], left[], const llen, right[], const rlen, const token = ' ', const trim = 0); -/* Strips spaces from the beginning and end of a string. */ +/** + * Removes whitespace characters from the beginning and end of a string. + * + * @param text The string to trim. + * @return Number of bytes written. + */ native trim(text[]); -/* Converts all chars in string to lower case. */ +/** + * Converts all chars in string to lower case. + * + * @param string The string to convert. + * @return Number of bytes written. + */ native strtolower(string[]); -/* Converts all chars in string to upper case. */ +/** + * Converts all chars in string to upper case. + * + * @param string The string to convert. + * @return Number of bytes written. + */ native strtoupper(string[]); -/* Make a string's first character uppercase */ +/** + * Make a string's first character uppercase. + * + * @param string The string to convert. + * @return 1 on success, otherwise 0. + */ native ucfirst(string[]); -/* Returns true when value is digit. */ +/** + * Returns whether a character is numeric. + * + * @note Multi-byte characters will always return false. + * + * @param ch Character to test. + * @return True if character is numeric, otherwise false. + */ native isdigit(ch); -/* Returns true when value is letter. */ +/** + * Returns whether a character is an ASCII alphabet character. + * + * @note Multi-byte characters will always return false. + * + * @param ch Character to test. + * @return True if character is alphabetical, otherwise false. + */ native isalpha(ch); -/* Returns true when value is space. */ +/** + * Returns whether a character is whitespace. + * + * @note Multi-byte characters will always return false. + * + * @param ch Character to test. + * @return True if character is whitespace, otherwise false. + */ native isspace(ch); -/* Returns true when value is letter or digit. */ +/** + * Returns whether a character is numeric or an ASCII alphabet character. + * + * @note Multi-byte characters will always return false. + * + * @param ch Character to test. + * @return True if character is numeric, otherwise false. + */ native isalnum(ch); -/* Concatenates a string. Maxlength is the total buffer of the destination. */ -native strcat(dest[], const source[], maxlength); +/** + * Returns if a character is multi-byte or not. + * + * @note Only available in 1.8.3 and above. + * + * @param ch Character to test. + * @return 0 for a normal 7-bit ASCII character, + * otherwise number of bytes in multi-byte character. + */ +native is_char_mb(ch); -/* Finds a string in another string. Returns -1 if not found. */ -native strfind(const string[], const sub[], ignorecase=0, pos=0); +/** + * Returns whether an alphabetic character is uppercase. + * + * @note Only available in 1.8.3 and above. + * @note Multi-byte characters will always return false. + * + * @param ch Character to test. + * @return True if character is uppercase, otherwise false. + */ +native bool:is_char_upper(ch); -/* Compares two strings with the C function strcmp(). Returns 0 on equal. */ -native strcmp(const string1[], const string2[], ignorecase=0); +/** + * Returns whether an alphabetic character is lowercase. + * + * @note Only available in 1.8.3 and above. + * @note Multi-byte characters will always return false. + * + * @param ch Character to test. + * @return True if character is lowercase, otherwise false. + */ +native bool:is_char_lower(ch); -/* Tests if given string contains only digits. Also, returns false for zero-length strings. */ +/** + * Returns whether a given string contains only digits. + * This returns false for zero-length strings. + * + * @param sString Character to test. + * @return True if string contains only digit, otherwise false. + */ stock bool:is_str_num(const sString[]) { new i = 0; @@ -292,31 +602,139 @@ stock bool:is_str_num(const sString[]) return sString[i] == 0 && i != 0; } -// Warning: this function is deprecated as it does not work properly. Use -// argparse() or argbreak(). -native strbreak(const text[], Left[], leftLen, Right[], rightLen); +/** + * Returns the number of bytes a character is using. This is + * for multi-byte characters (UTF-8). For normal ASCII characters, + * this will return 1. + * + * @note Only available in 1.8.3 and above. + * + * @param source Source input string. + * @return Number of bytes the current character uses. + */ +native get_char_bytes(const source[]); + +/** + * Returns an uppercase character to a lowercase character. + * + * @note Only available in 1.8.3 and above. + * + * @param chr Characer to convert. + * @return Lowercase character on success, + * no change on failure. + */ +stock char_to_upper(chr) +{ + if (is_char_lower(chr)) + { + return (chr & ~(1<<5)); + } + return chr; +} + +/** + * Returns a lowercase character to an uppercase character. + * + * @note Only available in 1.8.3 and above. + * + * @param chr Characer to convert. + * @return Uppercase character on success, + * no change on failure. + */ +stock char_to_lower(chr) +{ + if (is_char_upper(chr)) + { + return (chr | (1<<5)); + } + return chr; +} + +/** + * Concatenates one string onto another. + * + * @param dest String to append to. + * @param source Source string to concatenate. + * @param maxlength Maximum length of entire buffer. + * @return Number of bytes written. + */ +native strcat(dest[], const source[], maxlength); + +/** + * Tests whether a string is found inside another string. + * + * @param string String to search in. + * @param sub Substring to find inside the original string. + * @param ignorecase If true, search is case insensitive. + * If false (default), search is case sensitive. + * @param pos + * @return -1 on failure (no match found). Any other value + * indicates a position in the string where the match starts. + */ +native strfind(const string[], const sub[], ignorecase=0, pos=0); + +/** + * Compares two strings lexographically. + * + * @param string1 First string (left). + * @param string2 Second string (right). + * @param ignorecase If true, comparison is case insensitive. + * If false (default), comparison is case sensitive. + * @return -1 if string1 < string2 + * 0 if string1 == string2 + * 1 if string1 > string2 + */ +native strcmp(const string1[], const string2[], ignorecase=0); + +/** + * Compares two strings parts lexographically. + * + * @note Only available in 1.8.3 and above. + * + * @param string1 First string (left). + * @param string2 Second string (right). + * @param num Number of characters to compare. + * @param ignorecase If true, comparison is case insensitive. + * If false (default), comparison is case sensitive. + * @return -1 if string1 < string2 + * 0 if string1 == string2 + * 1 if string1 > string2 + */ +native strncmp(const string1[], const string2[], num, bool:ignorecase=false); + +/** + * Backwards compatibility stock - use argbreak or argparse. + * @deprecated this function does not work properly. + */ +//#pragma deprecated Use argbreak() instead +stock strbreak(const text[], Left[], leftLen, Right[], rightLen) +{ + return argbreak(text, Left, leftLen, Right, rightLen); +} /** * Parses an argument string to find the first argument. You can use this to * replace strbreak(). * - * You can use argparse() to break a string into all of its arguments: - * new arg[N], pos; - * while (true) { - * pos = argparse(string, pos, arg, sizeof(arg) - 1); - * if (pos == -1) - * break; - * } + * @note Only available in 1.8.3 and above. * - * All initial whitespace is removed. Remaining characters are read until an - * argument separator is encountered. A separator is any whitespace not inside - * a double-quotation pair (i.e. "x b" is one argument). If only one quotation - * mark appears, argparse() acts as if one existed at the end of the string. - * Quotation marks are never written back, and do not act as separators. For - * example, "a""b""c" will return "abc". An empty quote pair ("") will count - * as an argument containing no characters. + * @note You can use argparse() to break a string into all of its arguments: + * new arg[N], pos; + * while (true) { + * pos = argparse(string, pos, arg, sizeof(arg) - 1); + * if (pos == -1) + * break; + * } * - * argparse() will write an empty string to argbuffer if no argument is found. + * @note All initial whitespace is removed. Remaining characters are read until an + * argument separator is encountered. A separator is any whitespace not inside + * a double-quotation pair (i.e. "x b" is one argument). If only one quotation + * mark appears, argparse() acts as if one existed at the end of the string. + * Quotation marks are never written back, and do not act as separators. For + * example, "a""b""c" will return "abc". An empty quote pair ("") will count + * as an argument containing no characters. + * + * @note argparse() will write an empty string to argbuffer if no argument is found. * * @param text String to tokenize. * @param pos Position to start parsing from. @@ -328,7 +746,19 @@ native strbreak(const text[], Left[], leftLen, Right[], rightLen); */ native argparse(const text[], pos, argbuffer[], maxlen); -/* Emulates strbreak() using argparse(). */ +/** + * Emulates strbreak() using argparse(). + * + * @param text Source input string. + * @param left Buffer to store string left part. + * @param leftlen Maximum length of the string part buffer. + * @param right Buffer to store string right part. + * @param rightlen Maximum length of the string part buffer. + * + * @return -1 if no match was found; otherwise, an index into source + * marking the first index after the searched text. The + * index is always relative to the start of the input string. + */ stock argbreak(const text[], left[], leftlen, right[], rightlen) { new pos = argparse(text, 0, left, leftlen); @@ -344,11 +774,34 @@ stock argbreak(const text[], left[], leftlen, right[], rightlen) return pos; } -/* It is basically strbreak but you have a delimiter that is more than one character in length. - You pass the Input string, the Left output, the max length of the left output, - the right output , the max right length, and then the delimiter string. - By Suicid3 -*/ +/** + * Returns text in a string up until a certain character sequence is reached. + * + * @note Only available in 1.8.3 and above. + * + * @param source Source input string. + * @param split A string which specifies a search point to break at. + * @param part Buffer to store string part. + * @param partLen Maximum length of the string part buffer. + * + * @return -1 if no match was found; otherwise, an index into source + * marking the first index after the searched text. The + * index is always relative to the start of the input string. + */ +native split_string(const source[], const split[], part[], partLen); + + /** + * It is basically strbreak but you have a delimiter that is more than one character in length. By Suicid3. + * + * @param szInput Source input string. + * @param szLeft Buffer to store left string part. + * @param pL_Max Maximum length of the string part buffer. + * @param szRight Buffer to store right string part. + * @param pR_Max Maximum length of the string part buffer. + * @param szDelim A string which specifies a search point to break at. + * + * @noreturn + */ stock split(const szInput[], szLeft[], pL_Max, szRight[], pR_Max, const szDelim[]) { new iEnd = contain(szInput, szDelim); @@ -373,7 +826,15 @@ stock split(const szInput[], szLeft[], pL_Max, szRight[], pR_Max, const szDelim[ return; } -/* Removes a path from szFilePath leaving the name of the file in szFile for a pMax length. */ + /** + * Removes a path from szFilePath leaving the name of the file in szFile for a pMax length. + * + * @param szFilePath String to perform search and replacements on. + * @param szFile Buffer to store file name. + * @param pMax Maximum length of the string buffer. + * + * @noreturn + */ stock remove_filepath(const szFilePath[], szFile[], pMax) { new len = strlen(szFilePath); @@ -385,9 +846,20 @@ stock remove_filepath(const szFilePath[], szFile[], pMax) return; } -/* Replaces a contained string iteratively. - * This ensures that no infinite replacements will take place by - * intelligently moving to the next string position each iteration. + /** + * Replaces a contained string iteratively. + * + * @note Consider using replace_string() instead. + * + * @note This ensures that no infinite replacements will take place by + * intelligently moving to the next string position each iteration. + * + * @param string String to perform search and replacements on. + * @param len Maximum length of the string buffer. + * @param what String to search for. + * @param with String to replace the search string with. + + * @return Number of replacements on success, otherwise 0. */ stock replace_all(string[], len, const what[], const with[]) { @@ -435,3 +907,78 @@ stock replace_all(string[], len, const what[], const with[]) return total; } + +/** + * Breaks a string into pieces and stores each piece into an array of buffers. + * + * @param text The string to split. + * @param split The string to use as a split delimiter. + * @param buffers An array of string buffers (2D array). + * @param maxStrings Number of string buffers (first dimension size). + * @param maxStringLength Maximum length of each string buffer. + * @param copyRemainder False (default) discard excess pieces, true to ignore + * delimiters after last piece. + * @return Number of strings retrieved. + */ +stock explode_string(const text[], const split[], buffers[][], maxStrings, maxStringLength, bool:copyRemainder = false) +{ + new reloc_idx, idx, total; + + if (maxStrings < 1 || !split[0]) + { + return 0; + } + + while ((idx = split_string(text[reloc_idx], split, buffers[total], maxStringLength)) != -1) + { + reloc_idx += idx; + if (++total == maxStrings) + { + if (copyRemainder) + { + copy(buffers[total-1], maxStringLength, text[reloc_idx-idx]); + } + return total; + } + } + + copy(buffers[total++], maxStringLength, text[reloc_idx]); + + return total; +} + +/** + * Joins an array of strings into one string, with a "join" string inserted in + * between each given string. This function complements ExplodeString. + * + * @param strings An array of strings. + * @param numStrings Number of strings in the array. + * @param join The join string to insert between each string. + * @param buffer Output buffer to write the joined string to. + * @param maxLength Maximum length of the output buffer. + * @return Number of bytes written to the output buffer. + */ +stock implode_strings(const strings[][], numStrings, const join[], buffer[], maxLength) +{ + new total, length, part_length; + new join_length = strlen(join); + for (new i=0; i + +/** + * Warning: To get expected result, file encoding must be UTF-8 without BOM. + */ + +public plugin_init() +{ + register_plugin("UTF-8 Test", AMXX_VERSION_STR, "AMXX Dev Team"); + register_srvcmd("utf8test", "OnServerCommand"); +} + +new ErrorCount; +new TestNumber; + +enum TestType +{ + TT_Equal = 0, + TT_LessThan, + TT_GreaterThan, + TT_LessThanEqual, + TT_GreaterThanEqual, + TT_NotEqual +}; + +new const TestWords[TestType][] = +{ + "==", + "<", + ">", + "<=", + ">=", + "!=" +}; + +test(any:a, any:b = true, TestType:type = TT_Equal) +{ + ++TestNumber; + + new passed = 0; + + switch (type) + { + case TT_Equal: passed = a == b; + case TT_LessThan: passed = a < b; + case TT_GreaterThan: passed = a > b; + case TT_LessThanEqual: passed = a <= b; + case TT_GreaterThanEqual: passed = a >= b; + case TT_NotEqual: passed = a != b; + } + + if (!passed) + { + server_print("^tFailed test #%d (%d %s %d)", TestNumber, a, bool:TestWords[type], b); + ErrorCount++; + } +} + +showResult() +{ + if (!ErrorCount) + { + server_print("All tests passed (%d/%d).", TestNumber, TestNumber); + } + else + { + server_print("Test failed %d/%d, aborting.", TestNumber - ErrorCount, TestNumber); + } +} + +public OnServerCommand() +{ + /** + * Initiliaze some data. + */ + new reference[] = "𤭢hi AMXX® Hello㋡ crab?ൠ"; + + new Array:a = ArrayCreate(sizeof reference); + ArrayPushString(a, reference); + + new Trie:t = TrieCreate(); + TrieSetString(t, "reference", reference); + + new DataPack:d = CreateDataPack(); + WritePackString(d, reference); + ResetPack(d); + + set_localinfo("reference", reference); + + + server_print("Counting character bytes..."); + { + test(get_char_bytes("®") == 2); + test(get_char_bytes("㋡") == 3); + test(get_char_bytes("𤭢") == 4); + test(get_char_bytes("ൠ") == 3); + } + + server_print("Checking character bytes..."); + { + /** + * is_char_mb() returns also number of bytes if not 0. + */ + test(is_char_mb(reference[0]) != 0); // 𤭢 + test(is_char_mb(reference[11]) != 0); // ® + test(is_char_mb(reference[19]) != 0); // ㋡ + test(is_char_mb(reference[29]) != 0); // ൠ + } + + server_print("Checking truncated character bytes - atcprintf..."); + { + /** + * Truncating '𤭢' at different index. '𤭢' = 4 bytes + * A buffer of 4 = 3 bytes + EOS. + * Expected result: empty buffer. + */ + new buffer1[4]; + for(new i = charsmax(buffer1), length1; i >= 0; --i) + { + length1 = formatex(buffer1, i, "%s", reference); + test(buffer1[0] == EOS && length1 == 0); + } + + /** + * Truncating inside '®'. + * Expected result: '®' should be skipped. + */ + new buffer2[12]; + new length2 = formatex(buffer2, charsmax(buffer2), "%s", reference); + test(strcmp(buffer2, "𤭢hi AMXX") == 0 && length2 == strlen("𤭢hi AMXX")); + + /** + * Truncating inside 'ൠ'. + * Buffer of 14: Enough to hold "㋡ crab?ൠ" + * Retrieve 11 characters using precision format from '㋡' to inside 'ൠ'.. + * Expected result: 'ൠ'. should be skipped. + */ + new buffer3[14]; + new length3 = formatex(buffer3, charsmax(buffer3), "%.11s", reference[19]); + test(strcmp(buffer3, "㋡ crab?") == 0 && length3 == get_char_bytes("㋡") + strlen(" crab?")); + } + + server_print("Checking truncated character bytes - set_amxstring_utf8..."); + { + /** + * Splits string at '㋡'. + * Buffer can hold only 16 characters. + * Expected result: '㋡' should not be included and returned position should be after '㋡'. + */ + new buffer1[16]; + new index1 = split_string(reference, "㋡", buffer1, charsmax(buffer1)); + test(strcmp(buffer1, "𤭢hi AMXX® H") == 0 && index1 == strlen("𤭢hi AMXX® Hello") + get_char_bytes("㋡")); + + /** + * Splits string at '𤭢'. + * Expected result: Empty string and returned position should be after '𤭢'. + */ + new buffer2[5]; + new index2 = split_string(reference, "𤭢", buffer2, charsmax(buffer2)); + test(buffer2[0] == EOS && index2 == get_char_bytes("𤭢")); + + /** + * Splits string at '\ൠ'. + * Expected result: Empty string and returned position should -1 (not found). + */ + new buffer3[12]; + new index3 = split_string(reference, "\ൠ", buffer3, charsmax(buffer3)); + test(buffer3[0] == EOS && index3 == -1); + + /** + * Truncating '𤭢' at different index. '𤭢' = 4 bytes + * A buffer of 4 = 3 bytes + EOS. + * Expected result: empty buffer. + */ + new buffer4[4]; + for(new i = charsmax(buffer4), length4; i >= 0; --i) + { + length4 = get_localinfo("reference", buffer4, i); + test(buffer4[0] == EOS && length4 == 0); + } + } + + server_print("Checking truncated character bytes - direct copy..."); + { + /** + * Replaces '®' by '𤭢'. + * Expected result: '𤭢' should eat '® He" which counts 4 bytes. + */ + new count1 = replace_string(reference, charsmax(reference), "®", "𤭢"); + test(strcmp(reference, "𤭢hi AMXX𤭢ello㋡ crab?ൠ") == 0 && count1 == 1); + + /** + * Replaces '®' by '𤭢'. + * Expected result: not found. + */ + new count2 = replace_string(reference, charsmax(reference), "®", "𤭢"); + test(strcmp(reference, "𤭢hi AMXX𤭢ello㋡ crab?ൠ") == 0 && count2 == 0); + + /** + * Replaces 'ൠ' by '𤭢'. + * Expected result: 'ൠ' = 3 bytes, '𤭢' = 4 bytes. Not enough spaces to hold '𤭢', skipping it. + */ + new count3 = replace_string(reference, charsmax(reference), "ൠ", "𤭢"); + test(strcmp(reference, "𤭢hi AMXX𤭢ello㋡ crab?") == 0 && count3 == 1); + + /** + * Gets reference string with limited buffer. + * Expected result: '㋡' should be ignored as no spaces. + */ + new buffer[charsmax(reference) - 9]; + ArrayGetString(a, 0, buffer, charsmax(buffer)); + test(strcmp(buffer, "𤭢hi AMXX® Hello") == 0); + + /** + * Gets reference string with limited buffer. + * Expected result: '㋡' should be ignored as no spaces. + */ + TrieGetString(t, "reference", buffer, charsmax(buffer)); + test(strcmp(buffer, "𤭢hi AMXX® Hello") == 0); + + /** + * Gets reference string with limited buffer. + * Expected result: '㋡' should be ignored as no room. + */ + new length = ReadPackString(d, buffer, charsmax(buffer)); + test(strcmp(buffer, "𤭢hi AMXX® Hello") == 0 && length == strlen("𤭢hi AMXX® Hello")); + } + + ArrayDestroy(a); + TrieDestroy(t); + DestroyDataPack(d); + + showResult(); +} \ No newline at end of file diff --git a/support/PackageScript b/support/PackageScript index 53e61635..69d510fd 100644 --- a/support/PackageScript +++ b/support/PackageScript @@ -239,6 +239,7 @@ scripting_files = [ 'testsuite/sqlxtest.sq3', 'testsuite/sqlxtest.sql', 'testsuite/trietest.sma', + 'testsuite/utf8test.sma', 'include/amxconst.inc', 'include/amxmisc.inc', 'include/amxmodx.inc',