Implemented amb673 - pre-compiled regular expression support.
Also commented in the include file a bit better.
This commit is contained in:
parent
cf36abf7e6
commit
48022d3c5c
@ -43,12 +43,49 @@ bool RegEx::isFree(bool set, bool val)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int RegEx::Compile(const char *pattern)
|
int RegEx::Compile(const char *pattern, const char* flags)
|
||||||
{
|
{
|
||||||
if (!mFree)
|
if (!mFree)
|
||||||
Clear();
|
Clear();
|
||||||
|
|
||||||
re = pcre_compile(pattern, 0, &mError, &mErrorOffset, NULL);
|
|
||||||
|
int iFlags = 0;
|
||||||
|
|
||||||
|
if (flags != NULL)
|
||||||
|
{
|
||||||
|
for ( ; *flags != 0; flags++)
|
||||||
|
{
|
||||||
|
switch (*flags)
|
||||||
|
{
|
||||||
|
case 'i':
|
||||||
|
{
|
||||||
|
iFlags |= PCRE_CASELESS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'm':
|
||||||
|
{
|
||||||
|
iFlags |= PCRE_MULTILINE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 's':
|
||||||
|
{
|
||||||
|
iFlags |= PCRE_DOTALL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'x':
|
||||||
|
{
|
||||||
|
iFlags |= PCRE_EXTENDED;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
re = pcre_compile(pattern, iFlags, &mError, &mErrorOffset, NULL);
|
||||||
|
|
||||||
if (re == NULL)
|
if (re == NULL)
|
||||||
{
|
{
|
||||||
@ -67,6 +104,8 @@ int RegEx::Match(const char *str)
|
|||||||
if (mFree || re == NULL)
|
if (mFree || re == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
this->ClearMatch();
|
||||||
|
|
||||||
//save str
|
//save str
|
||||||
subject = new char[strlen(str)+1];
|
subject = new char[strlen(str)+1];
|
||||||
strcpy(subject, str);
|
strcpy(subject, str);
|
||||||
@ -88,6 +127,16 @@ int RegEx::Match(const char *str)
|
|||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
void RegEx::ClearMatch()
|
||||||
|
{
|
||||||
|
// Clears match results
|
||||||
|
mErrorOffset = 0;
|
||||||
|
mError = NULL;
|
||||||
|
if (subject)
|
||||||
|
delete [] subject;
|
||||||
|
subject = NULL;
|
||||||
|
mSubStrings = 0;
|
||||||
|
}
|
||||||
|
|
||||||
const char *RegEx::GetSubstring(int s, char buffer[], int max)
|
const char *RegEx::GetSubstring(int s, char buffer[], int max)
|
||||||
{
|
{
|
||||||
|
@ -9,8 +9,9 @@ public:
|
|||||||
bool isFree(bool set=false, bool val=false);
|
bool isFree(bool set=false, bool val=false);
|
||||||
void Clear();
|
void Clear();
|
||||||
|
|
||||||
int Compile(const char *pattern);
|
int Compile(const char *pattern, const char* flags = NULL);
|
||||||
int Match(const char *str);
|
int Match(const char *str);
|
||||||
|
void ClearMatch();
|
||||||
const char *GetSubstring(int s, char buffer[], int max);
|
const char *GetSubstring(int s, char buffer[], int max);
|
||||||
public:
|
public:
|
||||||
int mErrorOffset;
|
int mErrorOffset;
|
||||||
|
@ -19,7 +19,28 @@ int GetPEL()
|
|||||||
|
|
||||||
return (int)PEL.size() - 1;
|
return (int)PEL.size() - 1;
|
||||||
}
|
}
|
||||||
|
// native Regex:regex_compile(const pattern[], &ret, error[], maxLen, const flags[]="");
|
||||||
|
static cell AMX_NATIVE_CALL regex_compile(AMX *amx, cell *params)
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
const char *regex = MF_GetAmxString(amx, params[1], 0, &len);
|
||||||
|
const char *flags = MF_GetAmxString(amx, params[5], 1, &len);
|
||||||
|
|
||||||
|
int id = GetPEL();
|
||||||
|
RegEx *x = PEL[id];
|
||||||
|
|
||||||
|
if (x->Compile(regex, flags) == 0)
|
||||||
|
{
|
||||||
|
cell *eOff = MF_GetAmxAddr(amx, params[2]);
|
||||||
|
const char *err = x->mError;
|
||||||
|
*eOff = x->mErrorOffset;
|
||||||
|
MF_SetAmxString(amx, params[3], err?err:"unknown", params[4]);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return id+1;
|
||||||
|
}// 1.8 includes the last parameter
|
||||||
|
// Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
|
||||||
static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
|
static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
|
||||||
{
|
{
|
||||||
int len;
|
int len;
|
||||||
@ -29,7 +50,14 @@ static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
|
|||||||
int id = GetPEL();
|
int id = GetPEL();
|
||||||
RegEx *x = PEL[id];
|
RegEx *x = PEL[id];
|
||||||
|
|
||||||
if (x->Compile(regex) == 0)
|
char* flags = NULL;
|
||||||
|
|
||||||
|
if ((params[0] / sizeof(cell)) >= 6) // compiled with 1.8's extra parameter
|
||||||
|
{
|
||||||
|
flags = MF_GetAmxString(amx, params[6], 2, &len);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (x->Compile(regex, flags) == 0)
|
||||||
{
|
{
|
||||||
cell *eOff = MF_GetAmxAddr(amx, params[3]);
|
cell *eOff = MF_GetAmxAddr(amx, params[3]);
|
||||||
const char *err = x->mError;
|
const char *err = x->mError;
|
||||||
@ -58,6 +86,44 @@ static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
|
|||||||
|
|
||||||
return id+1;
|
return id+1;
|
||||||
}
|
}
|
||||||
|
// native regex_match_c(const string[], Regex:id, &ret);
|
||||||
|
static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params)
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
int id = params[2]-1;
|
||||||
|
const char *str = MF_GetAmxString(amx, params[1], 0, &len);
|
||||||
|
|
||||||
|
if (id >= (int)PEL.size() || id < 0 || PEL[id]->isFree())
|
||||||
|
{
|
||||||
|
MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
RegEx *x = PEL[id];
|
||||||
|
|
||||||
|
int e = x->Match(str);
|
||||||
|
if (e == -1)
|
||||||
|
{
|
||||||
|
/* there was a match error. move on. */
|
||||||
|
cell *res = MF_GetAmxAddr(amx, params[3]);
|
||||||
|
*res = x->mErrorOffset;
|
||||||
|
/* only clear the match results, since the regex object
|
||||||
|
may still be referenced later */
|
||||||
|
x->ClearMatch();
|
||||||
|
return -2;
|
||||||
|
} else if (e == 0) {
|
||||||
|
cell *res = MF_GetAmxAddr(amx, params[3]);
|
||||||
|
*res = 0;
|
||||||
|
/* only clear the match results, since the regex object
|
||||||
|
may still be referenced later */
|
||||||
|
x->ClearMatch();
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
cell *res = MF_GetAmxAddr(amx, params[3]);
|
||||||
|
*res = x->mSubStrings;
|
||||||
|
return x->mSubStrings;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static cell AMX_NATIVE_CALL regex_substr(AMX *amx, cell *params)
|
static cell AMX_NATIVE_CALL regex_substr(AMX *amx, cell *params)
|
||||||
{
|
{
|
||||||
@ -101,7 +167,9 @@ static cell AMX_NATIVE_CALL regex_free(AMX *amx, cell *params)
|
|||||||
}
|
}
|
||||||
|
|
||||||
AMX_NATIVE_INFO regex_Natives[] = {
|
AMX_NATIVE_INFO regex_Natives[] = {
|
||||||
|
{"regex_compile", regex_compile},
|
||||||
{"regex_match", regex_match},
|
{"regex_match", regex_match},
|
||||||
|
{"regex_match_c", regex_match_c},
|
||||||
{"regex_substr", regex_substr},
|
{"regex_substr", regex_substr},
|
||||||
{"regex_free", regex_free},
|
{"regex_free", regex_free},
|
||||||
{NULL, NULL},
|
{NULL, NULL},
|
||||||
|
@ -26,22 +26,109 @@ enum Regex
|
|||||||
REGEX_OK
|
REGEX_OK
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Return values:
|
/**
|
||||||
-2 = Matching error (error code stored in ret)
|
* Precompile a regular expression. Use this if you intend on using the
|
||||||
-1 = Error in pattern (error message and offset # in error[] and ret)
|
* same expression multiple times. Pass the regex handle returned here to
|
||||||
0 = No match
|
* regex_match_c to check for matches.
|
||||||
>1 = Id for getting more info (you must call regex_free() later on)
|
*
|
||||||
(also note that ret will contain the number of substrings found)
|
* @param pattern The regular expression pattern.
|
||||||
|
* @param errcode Error code encountered, if applicable.
|
||||||
|
* @param error Error message encountered, if applicable.
|
||||||
|
* @param maxLen Maximum string length of the error buffer.
|
||||||
|
* @param flags General flags for the regular expression.
|
||||||
|
* i = Ignore case
|
||||||
|
* m = Multilines (affects ^ and $ so that they match
|
||||||
|
* the start/end of a line rather than matching the
|
||||||
|
* start/end of the string).
|
||||||
|
* s = Single line (affects . so that it matches any character,
|
||||||
|
* even new line characters).
|
||||||
|
* x = Pattern extension (ignore whitespace and # comments).
|
||||||
|
*
|
||||||
|
* @return -1 on error in the pattern, > valid regex handle (> 0) on success.
|
||||||
|
*
|
||||||
|
* @note This handle is automatically freed on map change. However,
|
||||||
|
* if you are completely done with it before then, you should
|
||||||
|
* call regex_free on this handle.
|
||||||
*/
|
*/
|
||||||
|
native Regex:regex_compile(const pattern[], &ret, error[], maxLen, const flags[]="");
|
||||||
|
|
||||||
native Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen);
|
/**
|
||||||
|
* Matches a string against a pre-compiled regular expression pattern.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @param pattern The regular expression pattern.
|
||||||
|
* @param string The string to check.
|
||||||
|
* @param ret Error code, if applicable, or number of results on success.
|
||||||
|
*
|
||||||
|
* @return -2 = Matching error (error code is stored in ret)
|
||||||
|
* 0 = No match.
|
||||||
|
* >1 = Number of results.
|
||||||
|
*
|
||||||
|
* @note You should free the returned handle (with regex_free())
|
||||||
|
* when you are done with this pattern.
|
||||||
|
*
|
||||||
|
* @note Use the regex handle passed to this function to extract
|
||||||
|
* matches with regex_substr().
|
||||||
|
*/
|
||||||
|
native regex_match_c(const string[], Regex:pattern, &ret);
|
||||||
|
|
||||||
/* Returns a matched substring from a regex handle
|
/**
|
||||||
* substring ids start at 0 and end at ret-1, where ret is from the above function
|
* Matches a string against a regular expression pattern.
|
||||||
|
*
|
||||||
|
* @note If you intend on using the same regular expression pattern
|
||||||
|
* multiple times, consider using regex_compile and regex_match_c
|
||||||
|
* instead of making this function reparse the expression each time.
|
||||||
|
*
|
||||||
|
* @param string The string to check.
|
||||||
|
* @param pattern The regular expression pattern.
|
||||||
|
* @param ret Error code, or result state of the match.
|
||||||
|
* @param error Error message, if applicable.
|
||||||
|
* @param maxLen Maximum length of the error buffer.
|
||||||
|
* @param flags General flags for the regular expression.
|
||||||
|
* i = Ignore case
|
||||||
|
* m = Multilines (affects ^ and $ so that they match
|
||||||
|
* the start/end of a line rather than matching the
|
||||||
|
* start/end of the string).
|
||||||
|
* s = Single line (affects . so that it matches any character,
|
||||||
|
* even new line characters).
|
||||||
|
* x = Pattern extension (ignore whitespace and # comments).
|
||||||
|
*
|
||||||
|
* @return -2 = Matching error (error code is stored in ret)
|
||||||
|
* -1 = Error in pattern (error message and offset # in error and ret)
|
||||||
|
* 0 = No match.
|
||||||
|
* >1 = Handle for getting more information (via regex_substr)
|
||||||
|
*
|
||||||
|
* @note Flags only exist in amxmodx 1.8 and later.
|
||||||
|
* @note You should free the returned handle (with regex_free())
|
||||||
|
* when you are done extracting all of the substrings.
|
||||||
|
*/
|
||||||
|
native Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a matched substring from a regex handle.
|
||||||
|
* Substring ids start at 0 and end at ret-1, where ret is from the corresponding
|
||||||
|
* regex_match or regex_match_c function call.
|
||||||
|
*
|
||||||
|
* @param id The regex handle to extract data from.
|
||||||
|
* @param str_id The index of the expression to get - starts at 0, and ends at ret - 1.
|
||||||
|
* @param buffer The buffer to set to the matching substring.
|
||||||
|
* @param maxLen The maximum string length of the buffer.
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
native regex_substr(Regex:id, str_id, buffer[], maxLen);
|
native regex_substr(Regex:id, str_id, buffer[], maxLen);
|
||||||
|
|
||||||
/* Frees the memory associated with a regex results and sets the handle to 0.
|
/**
|
||||||
* You must do this if the handle >=1, once you're done.
|
* Frees the memory associated with a regex result, and sets the handle to 0.
|
||||||
|
* This must be called on all results from regex_match() when you are done extracting
|
||||||
|
* the results with regex_substr().
|
||||||
|
* The results of regex_compile() (and subsequently, regex_match_c()) only need to be freed
|
||||||
|
* when you are done using the pattern.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @param id The regex handle to free.
|
||||||
|
*
|
||||||
|
* @noreturn
|
||||||
|
*
|
||||||
|
* @note Do not use the handle again after freeing it!
|
||||||
*/
|
*/
|
||||||
native regex_free(&Regex:id);
|
native regex_free(&Regex:id);
|
Loading…
Reference in New Issue
Block a user