Regex: Remove regex_match_ex and RegexError tag, change regex_match_all to use flags and update documentation.

- regex_match_ex was kind of duplicated.
- regex_match_all uses now integer flags (header like regex_compiler_ex)
- Removed RegexError tag to allow old natives to use these constants without mismatch tag error.
- Made all error params optional
This commit is contained in:
Arkshine 2014-07-08 00:08:35 +02:00
parent a44d20b26b
commit 287f471ac4
2 changed files with 196 additions and 252 deletions

View File

@ -64,9 +64,8 @@ static cell AMX_NATIVE_CALL regex_compile(AMX *amx, cell *params)
if (x->Compile(regex, flags) == 0) if (x->Compile(regex, flags) == 0)
{ {
cell *eOff = MF_GetAmxAddr(amx, params[2]);
const char *err = x->mError; const char *err = x->mError;
*eOff = x->mErrorOffset; *MF_GetAmxAddr(amx, params[2]) = x->mErrorOffset;
MF_SetAmxString(amx, params[3], err?err:"unknown", params[4]); MF_SetAmxString(amx, params[3], err?err:"unknown", params[4]);
return -1; return -1;
} }
@ -74,7 +73,7 @@ static cell AMX_NATIVE_CALL regex_compile(AMX *amx, cell *params)
return id+1; return id+1;
} }
// native Regex:regex_compile_ex(const pattern[], flags = 0, error[] = "", maxLen = 0, &RegexError:errcode = REGEX_ERROR_NONE); // native Regex:regex_compile_ex(const pattern[], flags = 0, error[] = "", maxLen = 0, &errcode = 0);
static cell AMX_NATIVE_CALL regex_compile_ex(AMX *amx, cell *params) static cell AMX_NATIVE_CALL regex_compile_ex(AMX *amx, cell *params)
{ {
int len; int len;
@ -85,9 +84,8 @@ static cell AMX_NATIVE_CALL regex_compile_ex(AMX *amx, cell *params)
if (x->Compile(regex, params[2]) == 0) if (x->Compile(regex, params[2]) == 0)
{ {
cell *eOff = MF_GetAmxAddr(amx, params[5]);
const char *err = x->mError; const char *err = x->mError;
*eOff = x->mErrorOffset; *MF_GetAmxAddr(amx, params[5]) = x->mErrorOffset;
MF_SetAmxString(amx, params[3], err ? err : "unknown", params[4]); MF_SetAmxString(amx, params[3], err ? err : "unknown", params[4]);
return -1; return -1;
} }
@ -105,22 +103,35 @@ cell match(AMX *amx, cell *params, bool all)
RegEx *x = PEL[id]; RegEx *x = PEL[id];
char *flags = NULL; char *flags = NULL;
cell *errorCode;
int result = 0;
if ((params[0] / sizeof(cell)) >= 6) // compiled with 1.8's extra parameter if (!all)
{
if (*params / sizeof(cell) >= 6) // compiled with 1.8's extra parameter
{ {
flags = MF_GetAmxString(amx, params[6], 2, &len); flags = MF_GetAmxString(amx, params[6], 2, &len);
} }
if (x->Compile(regex, flags) == 0) result = x->Compile(regex, flags);
errorCode = MF_GetAmxAddr(amx, params[3]);
}
else
{
result = x->Compile(regex, params[3]);
errorCode = MF_GetAmxAddr(amx, params[6]);
}
if (!result)
{ {
cell *eOff = MF_GetAmxAddr(amx, params[3]);
const char *err = x->mError; const char *err = x->mError;
*eOff = x->mErrorOffset; *errorCode = x->mErrorOffset;
MF_SetAmxString(amx, params[4], err ? err : "unknown", params[5]); MF_SetAmxString(amx, params[4], err ? err : "unknown", params[5]);
return -1; return -1;
} }
int e; int e;
if (all) if (all)
e = x->MatchAll(str); e = x->MatchAll(str);
else else
@ -129,33 +140,31 @@ cell match(AMX *amx, cell *params, bool all)
if (e == -1) if (e == -1)
{ {
/* there was a match error. destroy this and move on. */ /* there was a match error. destroy this and move on. */
cell *res = MF_GetAmxAddr(amx, params[3]); *errorCode = x->mErrorOffset;
*res = x->mErrorOffset;
x->Clear(); x->Clear();
return -2; return -2;
} }
else if (e == 0) { else if (e == 0)
cell *res = MF_GetAmxAddr(amx, params[3]); {
*res = 0; *errorCode = 0;
x->Clear(); x->Clear();
return 0; return 0;
} }
else { else
cell *res = MF_GetAmxAddr(amx, params[3]); {
*res = x->Count(); *errorCode = x->Count();
} }
return id + 1; return id + 1;
} }
// 1.8 includes the last parameter // native Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
// Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params) static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
{ {
return match(amx, params, false); return match(amx, params, false);
} }
// Regex:regex_match_all(const string[], const pattern[], &ret, error[], maxLen, const flags[] = ""); // native Regex:regex_match_all(const string[], const pattern[], flags = 0, error[] = "", maxLen = 0, &errcode = 0);
static cell AMX_NATIVE_CALL regex_match_all(AMX *amx, cell *params) static cell AMX_NATIVE_CALL regex_match_all(AMX *amx, cell *params)
{ {
return match(amx, params, true); return match(amx, params, true);
@ -163,9 +172,7 @@ static cell AMX_NATIVE_CALL regex_match_all(AMX *amx, cell *params)
cell match_c(AMX *amx, cell *params, bool all) cell match_c(AMX *amx, cell *params, bool all)
{ {
int len;
int id = params[2] - 1; int id = params[2] - 1;
const char *str = MF_GetAmxString(amx, params[1], 0, &len);
if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree()) if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
{ {
@ -173,6 +180,10 @@ cell match_c(AMX *amx, cell *params, bool all)
return 0; return 0;
} }
int len;
const char *str = MF_GetAmxString(amx, params[1], 0, &len);
cell *errorCode = MF_GetAmxAddr(amx, params[3]);
RegEx *x = PEL[id]; RegEx *x = PEL[id];
int e; int e;
@ -184,24 +195,25 @@ cell match_c(AMX *amx, cell *params, bool all)
if (e == -1) if (e == -1)
{ {
/* there was a match error. move on. */ /* there was a match error. move on. */
cell *res = MF_GetAmxAddr(amx, params[3]); *errorCode = x->mErrorOffset;
*res = x->mErrorOffset;
/* only clear the match results, since the regex object /* only clear the match results, since the regex object
may still be referenced later */ may still be referenced later */
x->ClearMatch(); x->ClearMatch();
return -2; return -2;
} }
else if (e == 0) { else if (e == 0)
cell *res = MF_GetAmxAddr(amx, params[3]); {
*res = 0; *errorCode = 0;
/* only clear the match results, since the regex object /* only clear the match results, since the regex object
may still be referenced later */ may still be referenced later */
x->ClearMatch(); x->ClearMatch();
return 0; return 0;
} }
else { else
cell *res = MF_GetAmxAddr(amx, params[3]); {
*res = x->Count(); *errorCode = x->Count();
return x->Count(); return x->Count();
} }
} }
@ -218,47 +230,7 @@ static cell AMX_NATIVE_CALL regex_match_all_c(AMX *amx, cell *params)
return match_c(amx, params, true); return match_c(amx, params, true);
} }
// native regex_match_ex(Regex:id, const string[], &RegexError:ret = REGEX_ERROR_NONE); // native regex_substr(Regex:id, str_id, buffer[], maxLen);
static cell AMX_NATIVE_CALL regex_match_ex(AMX *amx, cell *params)
{
int id = params[1] - 1;
if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
{
MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id);
return 0;
}
int len;
const char *str = MF_GetAmxString(amx, params[2], 0, &len);
RegEx *x = PEL[id];
int e = x->Match(str);
if (e == -1)
{
/* there was a match error. move on. */
cell *res = MF_GetAmxAddr(amx, params[3]);
*res = x->mErrorOffset;
/* only clear the match results, since the regex object
may still be referenced later */
x->ClearMatch();
return -2;
}
else if (e == 0)
{
/* only clear the match results, since the regex object
may still be referenced later */
x->ClearMatch();
return 0;
}
else
{
return x->Count();
}
}
static cell AMX_NATIVE_CALL regex_substr(AMX *amx, cell *params) static cell AMX_NATIVE_CALL regex_substr(AMX *amx, cell *params)
{ {
int id = params[1]-1; int id = params[1]-1;
@ -305,7 +277,6 @@ AMX_NATIVE_INFO regex_Natives[] = {
{"regex_compile_ex", regex_compile_ex}, {"regex_compile_ex", regex_compile_ex},
{"regex_match", regex_match}, {"regex_match", regex_match},
{"regex_match_c", regex_match_c}, {"regex_match_c", regex_match_c},
{"regex_match_ex", regex_match_ex},
{"regex_match_all", regex_match_all}, {"regex_match_all", regex_match_all},
{"regex_match_all_c", regex_match_all_c}, {"regex_match_all_c", regex_match_all_c},
{"regex_substr", regex_substr}, {"regex_substr", regex_substr},

View File

@ -50,121 +50,6 @@ enum Regex
REGEX_OK REGEX_OK
}; };
/**
* Precompile a regular expression.
*
* @note Use this if you intend on using the ame expression multiple times.
* Pass the regex handle returned here to regex_match_c to check for matches.
*
* @note This handle is automatically freed on map change. However,
* if you are completely done with it before then, you should
* call regex_free on this handle.
*
* @param pattern The regular expression pattern.
* @param ret Error code encountered, if applicable.
* @param error Error message encountered, if applicable.
* @param maxLen Maximum string length of the error buffer.
* @param flags General flags for the regular expression.
* i = Ignore case
* m = Multilines (affects ^ and $ so that they match
* the start/end of a line rather than matching the
* start/end of the string).
* s = Single line (affects . so that it matches any character,
* even new line characters).
* x = Pattern extension (ignore whitespace and # comments).
*
* @return -1 on error in the pattern, > valid regex handle (> 0) on success.
*/
native Regex:regex_compile(const pattern[], &ret, error[], maxLen, const flags[]="");
/**
* Matches a string against a pre-compiled regular expression pattern.
*
* @note You should free the returned handle with regex_free()
* when you are done with this pattern.
*
* @note Use the regex handle passed to this function to extract
* matches with regex_substr().
*
* @param string The string to check.
* @param pattern The regular expression pattern.
* @param ret Error code, if applicable, or number of results on success.
*
* @return -2 = Matching error (error code is stored in ret)
* 0 = No match.
* >1 = Number of results.
*/
native regex_match_c(const string[], Regex:pattern, &ret);
/**
* Matches a string against a regular expression pattern.
*
* @note If you intend on using the same regular expression pattern
* multiple times, consider using regex_compile and regex_match_ex
* instead of making this function reparse the expression each time.
*
* @note Flags only exist in amxmodx 1.8 and later.
*
* @note You should free the returned handle with regex_free()
* when you are done extracting all of the substrings.
*
* @param string The string to check.
* @param pattern The regular expression pattern.
* @param ret Error code, or result state of the match.
* @param error Error message, if applicable.
* @param maxLen Maximum length of the error buffer.
* @param flags General flags for the regular expression.
* i = Ignore case
* m = Multilines (affects ^ and $ so that they match
* the start/end of a line rather than matching the
* start/end of the string).
* s = Single line (affects . so that it matches any character,
* even new line characters).
* x = Pattern extension (ignore whitespace and # comments).
*
* @return -2 = Matching error (error code is stored in ret)
* -1 = Error in pattern (error message and offset # in error and ret)
* 0 = No match.
* >1 = Handle for getting more information (via regex_substr)
*/
native Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
/**
* Returns a matched substring from a regex handle.
*
* @note Substring ids start at 0 and end at ret - 1, where ret is from the corresponding
* regex_match, regex_match_c or regex_match_ex function call.
*
* @param id The regex handle to extract data from.
* @param str_id The index of the expression to get - starts at 0, and ends at ret - 1.
* @param buffer The buffer to set to the matching substring.
* @param maxLen The maximum string length of the buffer.
*
* @return 1 on success, otherwise 0 on failure.
*/
native regex_substr(Regex:id, str_id, buffer[], maxLen);
/**
* Frees the memory associated with a regex result, and sets the handle to 0.
*
* @note This must be called on all results from regex_match() when you are done extracting
* the results with regex_substr().
*
* @note The results of regex_compile() or regex_compile_ex() (and subsequently, regex_match_c() or regex_match_ex())
* only need to be freed when you are done using the pattern.
*
* @note Do not use the handle again after freeing it!
*
* @param id The regex handle to free.
* @noreturn
*/
native regex_free(&Regex:id);
/**
* The following natives are only available in 1.8.3 and above.
*/
/** /**
* Flags for compiling regex expressions. * Flags for compiling regex expressions.
* These come directly from the pcre library and can be used in regex_compile_ex. * These come directly from the pcre library and can be used in regex_compile_ex.
@ -187,7 +72,7 @@ native regex_free(&Regex:id);
* Regex expression error codes. * Regex expression error codes.
* This can be used with regex_compile_ex and regex_match_ex. * This can be used with regex_compile_ex and regex_match_ex.
*/ */
enum RegexError enum /*RegexError*/
{ {
REGEX_ERROR_NONE = 0, /* No error */ REGEX_ERROR_NONE = 0, /* No error */
REGEX_ERROR_NOMATCH = -1, /* No match was found */ REGEX_ERROR_NOMATCH = -1, /* No match was found */
@ -229,73 +114,60 @@ enum RegexError
* Precompile a regular expression. * Precompile a regular expression.
* *
* @note Use this if you intend on using the ame expression multiple times. * @note Use this if you intend on using the ame expression multiple times.
* Pass the regex handle returned here to regex_match_ex() to check for matches. * Pass the regex handle returned here to regex_match_c to check for matches.
* *
* @note Unlike regex_compile(), this allows you to use directly PCRE flags, and * @note This handle is automatically freed on map change. However,
* to get a more complete set of regular expression error codes. * if you are completely done with it before then, you should
* call regex_free on this handle.
* *
* @param pattern The regular expression pattern. * @param pattern The regular expression pattern.
* @param flags General flags for the regular expression, see PCRE_* defines. * @param ret Error code encountered, if applicable.
* @param error Error message encountered, if applicable. * @param error Error message encountered, if applicable.
* @param maxLen Maximum string length of the error buffer. * @param maxLen Maximum string length of the error buffer.
* @param errcode Regex type error code encountered, if applicable. * @param flags General flags for the regular expression.
* i = Ignore case
* m = Multilines (affects ^ and $ so that they match
* the start/end of a line rather than matching the
* start/end of the string).
* s = Single line (affects . so that it matches any character,
* even new line characters).
* x = Pattern extension (ignore whitespace and # comments).
* *
* @return Valid regex handle (> 0) on success, or -1 on failure. * @return -1 on error in the pattern, > valid regex handle (> 0) on success.
*/ */
native Regex:regex_compile_ex(const pattern[], flags = 0, error[]= "", maxLen = 0, &RegexError:errcode = REGEX_ERROR_NONE); native Regex:regex_compile(const pattern[], &ret = 0, error[] = "", maxLen = 0, const flags[]="");
/** /**
* Matches a string against a pre-compiled regular expression pattern. * Matches a string against a pre-compiled regular expression pattern.
* *
* @note Use the regex handle passed to this function to extract
* matches with regex_substr().
*
* @note You should free the returned handle with regex_free() * @note You should free the returned handle with regex_free()
* when you are done with this pattern. * when you are done with this pattern.
* *
* @note Unlike regex_match_c(), this allows you to get a more complete
* set of regular expression error codes and parameter is optional.
*
* @param str The string to check.
* @param regex Regex Handle from regex_compile_ex()
* @param ret Error code, if applicable.
*
* @return -2 = Matching error (error code is stored in ret)
* 0 = No match.
* >1 = Number of results.
*/
native regex_match_ex(Handle:regex, const str[], &RegexError:ret = REGEX_ERROR_NONE);
/**
* Matches a string against a pre-compiled regular expression pattern, matching all
* occurances of the pattern inside the string. This is similar to using the "g" flag
* in perl regex.
*
*
* @param pattern The regular expression pattern.
* @param string The string to check.
* @param ret Error code, if applicable, or number of results on success.
*
* @return -2 = Matching error (error code is stored in ret)
* 0 = No match.
* >1 = Number of results.
*
* @note You should free the returned handle (with regex_free())
* when you are done with this pattern.
*
* @note Use the regex handle passed to this function to extract * @note Use the regex handle passed to this function to extract
* matches with regex_substr(). * matches with regex_substr().
*
* @param string The string to check.
* @param pattern The regular expression pattern.
* @param ret Error code, if applicable, or number of results on success. See REGEX_ERROR_* defines.
*
* @return -2 = Matching error (error code is stored in ret)
* 0 = No match.
* >1 = Number of results.
*/ */
native regex_match_all_c(const string[], Regex:pattern, &ret); native regex_match_c(const string[], Regex:pattern, &ret = 0);
/** /**
* Matches a string against a regular expression pattern, matching all occurances of the * Matches a string against a regular expression pattern.
* pattern inside the string. This is similar to using the "g" flag in perl regex.
* *
* @note If you intend on using the same regular expression pattern * @note If you intend on using the same regular expression pattern
* multiple times, consider using regex_compile and regex_match_c * multiple times, consider using regex_compile and regex_match_ex
* instead of making this function reparse the expression each time. * instead of making this function reparse the expression each time.
* *
* @note Flags only exist in amxmodx 1.8 and later.
*
* @note You should free the returned handle with regex_free()
* when you are done extracting all of the substrings.
*
* @param string The string to check. * @param string The string to check.
* @param pattern The regular expression pattern. * @param pattern The regular expression pattern.
* @param ret Error code, or result state of the match. * @param ret Error code, or result state of the match.
@ -314,12 +186,113 @@ native regex_match_all_c(const string[], Regex:pattern, &ret);
* -1 = Error in pattern (error message and offset # in error and ret) * -1 = Error in pattern (error message and offset # in error and ret)
* 0 = No match. * 0 = No match.
* >1 = Handle for getting more information (via regex_substr) * >1 = Handle for getting more information (via regex_substr)
*/
native Regex:regex_match(const string[], const pattern[], &ret = 0, error[] = "", maxLen = 0, const flags[] = "");
/**
* Returns a matched substring from a regex handle.
*
* @note Substring ids start at 0 and end at ret - 1, where ret is from the corresponding
* regex_match, regex_match_c or regex_match_ex function call.
*
* @param id The regex handle to extract data from.
* @param str_id The index of the expression to get - starts at 0, and ends at ret - 1.
* @param buffer The buffer to set to the matching substring.
* @param maxLen The maximum string length of the buffer.
*
* @return 1 on success, otherwise 0 on failure.
*/
native regex_substr(Regex:id, str_id, buffer[], maxLen);
/**
* Frees the memory associated with a regex result, and sets the handle to 0.
*
* @note This must be called on all results from regex_match() when you are done extracting
* the results with regex_substr().
*
* @note The results of regex_compile() or regex_compile_ex() (and subsequently, regex_match_c() or regex_match_ex())
* only need to be freed when you are done using the pattern.
*
* @note Do not use the handle again after freeing it!
*
* @param id The regex handle to free.
* @noreturn
*/
native regex_free(&Regex:id);
/**
* The following natives are only available in 1.8.3 and above.
*/
/**
* Precompile a regular expression.
*
* @note Use this if you intend on using the ame expression multiple times.
* Pass the regex handle returned here to regex_match_ex() to check for matches.
*
* @note Unlike regex_compile(), this allows you to use directly PCRE flags, and
* to get a more complete set of regular expression error codes.
*
* @param pattern The regular expression pattern.
* @param flags General flags for the regular expression, see PCRE_* defines.
* @param error Error message encountered, if applicable.
* @param maxLen Maximum string length of the error buffer.
* @param errcode Regex type error code encountered, if applicable. See REGEX_ERROR_* defines.
*
* @return Valid regex handle (> 0) on success, or -1 on failure.
*/
native Regex:regex_compile_ex(const pattern[], flags = 0, error[]= "", maxLen = 0, &errcode = 0);
/**
* Matches a string against a pre-compiled regular expression pattern, matching all
* occurances of the pattern inside the string. This is similar to using the "g" flag
* in perl regex.
*
*
* @param pattern The regular expression pattern.
* @param string The string to check.
* @param ret Error code, if applicable, or number of results on success.
* See REGEX_ERROR_* defines.
*
* @return -2 = Matching error (error code is stored in ret)
* 0 = No match.
* >1 = Number of results.
*
* @note You should free the returned handle (with regex_free())
* when you are done with this pattern.
*
* @note Use the regex handle passed to this function to extract
* matches with regex_substr().
*/
native regex_match_all_c(const string[], Regex:pattern, &ret = 0);
/**
* Matches a string against a regular expression pattern, matching all occurances of the
* pattern inside the string. This is similar to using the "g" flag in perl regex.
*
* @note If you intend on using the same regular expression pattern
* multiple times, consider using regex_compile and regex_match_ex
* instead of making this function reparse the expression each time.
* *
* @note Flags only exist in amxmodx 1.8 and later. * @note Flags only exist in amxmodx 1.8 and later.
* @note You should free the returned handle (with regex_free()) *
* @note You should free the returned handle with regex_free()
* when you are done extracting all of the substrings. * when you are done extracting all of the substrings.
*
* @param string The string to check.
* @param pattern The regular expression pattern.
* @param flags General flags for the regular expression, see PCRE_* defines.
* @param error Error message encountered, if applicable.
* @param maxLen Maximum string length of the error buffer.
* @param errcode Regex type error code encountered, if applicable. See REGEX_ERROR_* defines.
*
* @return -2 = Matching error (error code is stored in ret)
* -1 = Error in pattern (error message and offset # in error and ret)
* 0 = No match.
* >1 = Handle for getting more information (via regex_substr)
*/ */
native Regex:regex_match_all(const string[], const pattern[], &ret, error[], maxLen, const flags[] = ""); native Regex:regex_match_all(const string[], const pattern[], flags = 0, error[]= "", maxLen = 0, &errcode = 0);
/** /**
* Matches a string against a regular expression pattern. * Matches a string against a regular expression pattern.
@ -339,16 +312,16 @@ native Regex:regex_match_all(const string[], const pattern[], &ret, error[], max
* 0 = No match. * 0 = No match.
* >1 = Number of results. * >1 = Number of results.
*/ */
stock regex_match_simple(const str[], const pattern[], flags = 0, error[]="", maxLen = 0) stock regex_match_simple(const str[], const pattern[], flags = 0, error[]= "", maxLen = 0, &errcode = 0)
{ {
new Regex:regex = regex_compile_ex(pattern, flags, error, maxLen); new Regex:regex = regex_compile_ex(pattern, flags, error, maxLen, errcode);
if (regex < 0) if (regex < 0)
{ {
return -1; return -1;
} }
new substrings = regex_match_ex(regex, str); new substrings = regex_match_c(regex, str);
regex_free(regex); regex_free(regex);