Regex: Add regex_compile_ex and regex_match_ex natives.

Purpose is to use the PCRE flags and error num directly.
Harcoding flags letters is really not readable and friendly.
Also this makes error params optionnal.
This commit is contained in:
Arkshine 2014-07-05 01:29:57 +02:00
parent 201a3003d9
commit f9503cb98a
4 changed files with 302 additions and 10 deletions

View File

@ -1,3 +1,35 @@
/* AMX Mod X
* Regular Expressions Module
*
* by the AMX Mod X Development Team
*
* This file is part of AMX Mod X.
*
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* In addition, as a special exception, the author gives permission to
* link the code of this program with the Half-Life Game Engine ("HL
* Engine") and Modified Game Libraries ("MODs") developed by Valve,
* L.L.C ("Valve"). You must obey the GNU General Public License in all
* respects for all of the code used other than the HL Engine and MODs
* from Valve. If you modify this file, you may extend this exception
* to your version of the file, but you are not obligated to do so. If
* you do not wish to do so, delete this exception statement from your
* version.
*/
#include "pcre.h" #include "pcre.h"
#include "CRegEx.h" #include "CRegEx.h"
#include <string.h> #include <string.h>
@ -97,6 +129,23 @@ int RegEx::Compile(const char *pattern, const char* flags)
return 1; return 1;
} }
int RegEx::Compile(const char *pattern, int iFlags)
{
if (!mFree)
Clear();
re = pcre_compile(pattern, iFlags, &mError, &mErrorOffset, NULL);
if (re == NULL)
{
return 0;
}
mFree = false;
return 1;
}
int RegEx::Match(const char *str) int RegEx::Match(const char *str)
{ {
int rc = 0; int rc = 0;

View File

@ -1,3 +1,35 @@
/* AMX Mod X
* Regular Expressions Module
*
* by the AMX Mod X Development Team
*
* This file is part of AMX Mod X.
*
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* In addition, as a special exception, the author gives permission to
* link the code of this program with the Half-Life Game Engine ("HL
* Engine") and Modified Game Libraries ("MODs") developed by Valve,
* L.L.C ("Valve"). You must obey the GNU General Public License in all
* respects for all of the code used other than the HL Engine and MODs
* from Valve. If you modify this file, you may extend this exception
* to your version of the file, but you are not obligated to do so. If
* you do not wish to do so, delete this exception statement from your
* version.
*/
#ifndef _INCLUDE_CREGEX_H #ifndef _INCLUDE_CREGEX_H
#define _INCLUDE_CREGEX_H #define _INCLUDE_CREGEX_H
@ -10,6 +42,7 @@ public:
void Clear(); void Clear();
int Compile(const char *pattern, const char* flags = NULL); int Compile(const char *pattern, const char* flags = NULL);
int Compile(const char *pattern, int iFlags);
int Match(const char *str); int Match(const char *str);
void ClearMatch(); void ClearMatch();
const char *GetSubstring(int s, char buffer[], int max); const char *GetSubstring(int s, char buffer[], int max);

View File

@ -1,3 +1,35 @@
/* AMX Mod X
* Regular Expressions Module
*
* by the AMX Mod X Development Team
*
* This file is part of AMX Mod X.
*
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* In addition, as a special exception, the author gives permission to
* link the code of this program with the Half-Life Game Engine ("HL
* Engine") and Modified Game Libraries ("MODs") developed by Valve,
* L.L.C ("Valve"). You must obey the GNU General Public License in all
* respects for all of the code used other than the HL Engine and MODs
* from Valve. If you modify this file, you may extend this exception
* to your version of the file, but you are not obligated to do so. If
* you do not wish to do so, delete this exception statement from your
* version.
*/
#include <string.h> #include <string.h>
#include "pcre.h" #include "pcre.h"
#include "amxxmodule.h" #include "amxxmodule.h"
@ -19,6 +51,7 @@ int GetPEL()
return (int)PEL.size() - 1; return (int)PEL.size() - 1;
} }
// native Regex:regex_compile(const pattern[], &ret, error[], maxLen, const flags[]=""); // native Regex:regex_compile(const pattern[], &ret, error[], maxLen, const flags[]="");
static cell AMX_NATIVE_CALL regex_compile(AMX *amx, cell *params) static cell AMX_NATIVE_CALL regex_compile(AMX *amx, cell *params)
{ {
@ -39,7 +72,30 @@ static cell AMX_NATIVE_CALL regex_compile(AMX *amx, cell *params)
} }
return id+1; return id+1;
}// 1.8 includes the last parameter }
// native Regex:regex_compile_ex(const pattern[], flags = 0, error[] = "", maxLen = 0, &RegexError:errcode = REGEX_ERROR_NONE);
static cell AMX_NATIVE_CALL regex_compile_ex(AMX *amx, cell *params)
{
int len;
const char *regex = MF_GetAmxString(amx, params[1], 0, &len);
int id = GetPEL();
RegEx *x = PEL[id];
if (x->Compile(regex, params[2]) == 0)
{
cell *eOff = MF_GetAmxAddr(amx, params[5]);
const char *err = x->mError;
*eOff = x->mErrorOffset;
MF_SetAmxString(amx, params[3], err ? err : "unknown", params[4]);
return -1;
}
return id + 1;
}
// 1.8 includes the last parameter
// Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = ""); // Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params) static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
{ {
@ -86,6 +142,7 @@ static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
return id+1; return id+1;
} }
// native regex_match_c(const string[], Regex:id, &ret); // native regex_match_c(const string[], Regex:id, &ret);
static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params) static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params)
{ {
@ -125,6 +182,47 @@ static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params)
} }
} }
// native regex_match_ex(Regex:id, const string[], &RegexError:ret = REGEX_ERROR_NONE);
static cell AMX_NATIVE_CALL regex_match_ex(AMX *amx, cell *params)
{
int id = params[1] - 1;
if (id >= (int)PEL.size() || id < 0 || PEL[id]->isFree())
{
MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id);
return 0;
}
int len;
const char *str = MF_GetAmxString(amx, params[2], 0, &len);
RegEx *x = PEL[id];
int e = x->Match(str);
if (e == -1)
{
/* there was a match error. move on. */
cell *res = MF_GetAmxAddr(amx, params[3]);
*res = x->mErrorOffset;
/* only clear the match results, since the regex object
may still be referenced later */
x->ClearMatch();
return -2;
}
else if (e == 0)
{
/* only clear the match results, since the regex object
may still be referenced later */
x->ClearMatch();
return 0;
}
else
{
return x->mSubStrings;
}
}
static cell AMX_NATIVE_CALL regex_substr(AMX *amx, cell *params) static cell AMX_NATIVE_CALL regex_substr(AMX *amx, cell *params)
{ {
int id = params[1]-1; int id = params[1]-1;
@ -168,8 +266,10 @@ static cell AMX_NATIVE_CALL regex_free(AMX *amx, cell *params)
AMX_NATIVE_INFO regex_Natives[] = { AMX_NATIVE_INFO regex_Natives[] = {
{"regex_compile", regex_compile}, {"regex_compile", regex_compile},
{"regex_compile_ex", regex_compile_ex},
{"regex_match", regex_match}, {"regex_match", regex_match},
{"regex_match_c", regex_match_c}, {"regex_match_c", regex_match_c},
{"regex_match_ex", regex_match_ex},
{"regex_substr", regex_substr}, {"regex_substr", regex_substr},
{"regex_free", regex_free}, {"regex_free", regex_free},
{NULL, NULL}, {NULL, NULL},

View File

@ -18,6 +18,7 @@
#pragma library regex #pragma library regex
#endif #endif
enum Regex enum Regex
{ {
REGEX_MATCH_FAIL = -2, REGEX_MATCH_FAIL = -2,
@ -26,6 +27,70 @@ enum Regex
REGEX_OK REGEX_OK
}; };
/**
* @section Flags for compiling regex expressions.
* These come directly from the pcre library and can be used in MatchRegex and CompileRegex.
*
* @note To be used with regex_compile_ex.
* Only available in 1.8.3 and above.
*/
#define PCRE_CASELESS 0x00000001 /* Ignore Case */
#define PCRE_MULTILINE 0x00000002 /* Multilines (affects ^ and $ so that they match the start/end of a line rather than matching the start/end of the string). */
#define PCRE_DOTALL 0x00000004 /* Single line (affects . so that it matches any character, even new line characters). */
#define PCRE_EXTENDED 0x00000008 /* Pattern extension (ignore whitespace and # comments). */
#define PCRE_ANCHORED 0x00000010 /* Force pattern anchoring. */
#define PCRE_DOLLAR_ENDONLY 0x00000020 /* $ not to match newline at end. */
#define PCRE_UNGREEDY 0x00000200 /* Invert greediness of quantifiers */
#define PCRE_NOTEMPTY 0x00000400 /* An empty string is not a valid match. */
#define PCRE_UTF8 0x00000800 /* Use UTF-8 Chars */
#define PCRE_NO_UTF8_CHECK 0x00002000 /* Do not check the pattern for UTF-8 validity (only relevant if PCRE_UTF8 is set) */
#define PCRE_UCP 0x20000000 /* Use Unicode properties for \ed, \ew, etc. */
/**
* Regex expression error codes.
*
* @note To be used with regex_compile_ex and regex_match_ex natives.
* Only available in 1.8.3 and above.
*/
enum RegexError
{
REGEX_ERROR_NONE = 0, /* No error */
REGEX_ERROR_NOMATCH = -1, /* No match was found */
REGEX_ERROR_NULL = -2,
REGEX_ERROR_BADOPTION = -3,
REGEX_ERROR_BADMAGIC = -4,
REGEX_ERROR_UNKNOWN_OPCODE = -5,
REGEX_ERROR_NOMEMORY = -6,
REGEX_ERROR_NOSUBSTRING = -7,
REGEX_ERROR_MATCHLIMIT = -8,
REGEX_ERROR_CALLOUT = -9, /* Never used by PCRE itself */
REGEX_ERROR_BADUTF8 = -10,
REGEX_ERROR_BADUTF8_OFFSET = -11,
REGEX_ERROR_PARTIAL = -12,
REGEX_ERROR_BADPARTIAL = -13,
REGEX_ERROR_INTERNAL = -14,
REGEX_ERROR_BADCOUNT = -15,
REGEX_ERROR_DFA_UITEM = -16,
REGEX_ERROR_DFA_UCOND = -17,
REGEX_ERROR_DFA_UMLIMIT = -18,
REGEX_ERROR_DFA_WSSIZE = -19,
REGEX_ERROR_DFA_RECURSE = -20,
REGEX_ERROR_RECURSIONLIMIT = -21,
REGEX_ERROR_NULLWSLIMIT = -22, /* No longer actually used */
REGEX_ERROR_BADNEWLINE = -23,
REGEX_ERROR_BADOFFSET = -24,
REGEX_ERROR_SHORTUTF8 = -25,
REGEX_ERROR_RECURSELOOP = -26,
REGEX_ERROR_JIT_STACKLIMIT = -27,
REGEX_ERROR_BADMODE = -28,
REGEX_ERROR_BADENDIANNESS = -29,
REGEX_ERROR_DFA_BADRESTART = -30,
REGEX_ERROR_JIT_BADOPTION = -31,
REGEX_ERROR_BADLENGTH = -32,
};
/** /**
* Precompile a regular expression. Use this if you intend on using the * Precompile a regular expression. Use this if you intend on using the
* same expression multiple times. Pass the regex handle returned here to * same expression multiple times. Pass the regex handle returned here to
@ -72,6 +137,49 @@ native Regex:regex_compile(const pattern[], &ret, error[], maxLen, const flags[]
*/ */
native regex_match_c(const string[], Regex:pattern, &ret); native regex_match_c(const string[], Regex:pattern, &ret);
/**
* Precompile a regular expression.
*
* @note Use this if you intend on using the ame expression multiple times.
* Pass the regex handle returned here to regex_match_ex to check for matches.
*
* @note Unlike regex_compile, this allows you to use directly PCRE flags, and
* to get a more complete set of regular expression error codes.
* Only available in 1.8.3 and above.
*
* @param pattern The regular expression pattern.
* @param flags General flags for the regular expression, see PCRE_* defines.
* @param error Error message encountered, if applicable.
* @param maxLen Maximum string length of the error buffer.
* @param errcode Regex type error code encountered, if applicable.
*
* @return Valid regex handle (> 0) on success, or -1 on failure.
*/
native Regex:regex_compile_ex(const pattern[], flags = 0, error[]= "", maxLen = 0, &RegexError:errcode = REGEX_ERROR_NONE);
/**
* Matches a string against a pre-compiled regular expression pattern.
*
* @note Use the regex handle passed to this function to extract
* matches with regex_substr().
*
* @note You should free the returned handle with regex_free()
* when you are done with this pattern.
*
* @note Unlike regex_match_c(), this allows you to get a more complete
* set of regular expression error codes and parameter is optional.
* Only available in 1.8.3 and above.
*
* @param str The string to check.
* @param regex Regex Handle from regex_compile_ex()
* @param ret Error code, if applicable.
*
* @return -2 = Matching error (error code is stored in ret)
* 0 = No match.
* >1 = Number of results.
*/
native regex_match_ex(Handle:regex, const str[], &RegexError:ret = REGEX_ERROR_NONE);
/** /**
* Matches a string against a regular expression pattern. * Matches a string against a regular expression pattern.
* *
@ -106,29 +214,31 @@ native Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen,
/** /**
* Returns a matched substring from a regex handle. * Returns a matched substring from a regex handle.
* Substring ids start at 0 and end at ret-1, where ret is from the corresponding *
* regex_match or regex_match_c function call. * @note Substring ids start at 0 and end at ret - 1, where ret is from the corresponding
* regex_match, regex_match_c or regex_match_ex function call.
* *
* @param id The regex handle to extract data from. * @param id The regex handle to extract data from.
* @param str_id The index of the expression to get - starts at 0, and ends at ret - 1. * @param str_id The index of the expression to get - starts at 0, and ends at ret - 1.
* @param buffer The buffer to set to the matching substring. * @param buffer The buffer to set to the matching substring.
* @param maxLen The maximum string length of the buffer. * @param maxLen The maximum string length of the buffer.
* *
* @return 1 on success, otherwise 0 on failure.
*/ */
native regex_substr(Regex:id, str_id, buffer[], maxLen); native regex_substr(Regex:id, str_id, buffer[], maxLen);
/** /**
* Frees the memory associated with a regex result, and sets the handle to 0. * Frees the memory associated with a regex result, and sets the handle to 0.
* This must be called on all results from regex_match() when you are done extracting *
* the results with regex_substr(). * @note This must be called on all results from regex_match() when you are done extracting
* The results of regex_compile() (and subsequently, regex_match_c()) only need to be freed * the results with regex_substr().
* when you are done using the pattern.
* *
* @note The results of regex_compile() or regex_compile_ex() (and subsequently, regex_match_c() or regex_match_ex())
* only need to be freed when you are done using the pattern.
*
* @note Do not use the handle again after freeing it!
* *
* @param id The regex handle to free. * @param id The regex handle to free.
*
* @noreturn * @noreturn
*
* @note Do not use the handle again after freeing it!
*/ */
native regex_free(&Regex:id); native regex_free(&Regex:id);