Regex: Add regex_match_all_c and regex_match_all natives (by Nextra)

This commit is contained in:
Arkshine 2014-07-07 20:49:38 +02:00
parent ee4f6b8a89
commit a44d20b26b
4 changed files with 219 additions and 43 deletions

View File

@ -42,7 +42,7 @@ RegEx::RegEx()
re = NULL; re = NULL;
mFree = true; mFree = true;
subject = NULL; subject = NULL;
mSubStrings = 0; mSubStrings.clear();
} }
void RegEx::Clear() void RegEx::Clear()
@ -54,9 +54,9 @@ void RegEx::Clear()
re = NULL; re = NULL;
mFree = true; mFree = true;
if (subject) if (subject)
delete [] subject; delete[] subject;
subject = NULL; subject = NULL;
mSubStrings = 0; mSubStrings.clear();
} }
RegEx::~RegEx() RegEx::~RegEx()
@ -152,11 +152,11 @@ int RegEx::Match(const char *str)
if (mFree || re == NULL) if (mFree || re == NULL)
return -1; return -1;
this->ClearMatch(); this->ClearMatch();
//save str //save str
subject = new char[strlen(str)+1]; subject = new char[strlen(str) + 1];
strcpy(subject, str); strcpy(subject, str);
rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30); rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30);
@ -166,37 +166,110 @@ int RegEx::Match(const char *str)
if (rc == PCRE_ERROR_NOMATCH) if (rc == PCRE_ERROR_NOMATCH)
{ {
return 0; return 0;
} else { }
else {
mErrorOffset = rc; mErrorOffset = rc;
return -1; return -1;
} }
} }
mSubStrings = rc; RegExSub res;
mSubStrings.ensure(rc);
for (int s = 0; s < rc; ++s)
{
res.start = ovector[2 * s];
res.end = ovector[2 * s + 1];
mSubStrings.append(res);
}
return 1; return 1;
} }
int RegEx::MatchAll(const char *str)
{
int rc = 0;
int rr = 0;
int offset = 0;
if (mFree || re == NULL)
return -1;
this->ClearMatch();
//save str
subject = new char[strlen(str) + 1];
strcpy(subject, str);
RegExSub sub, whole;
while ((rr = pcre_exec(re, NULL, subject, (int)strlen(subject), offset, 0, ovector, 30)))
{
if (rr < 0)
{
if (rr == PCRE_ERROR_NOMATCH)
{
break;
}
else
{
mErrorOffset = rr;
if (rc)
this->ClearMatch();
return -1;
}
}
rc += rr;
mSubStrings.ensure(rc);
for (int s = 1; s < rr; ++s)
{
sub.start = ovector[2 * s];
sub.end = ovector[2 * s + 1];
mSubStrings.append(sub);
}
offset = ovector[1];
}
if (!rc)
return 0;
sub = mSubStrings.at(0);
whole.start = sub.start;
sub = mSubStrings.back();
whole.end = sub.end;
mSubStrings.insert(0, whole);
return 1;
}
void RegEx::ClearMatch() void RegEx::ClearMatch()
{ {
// Clears match results // Clears match results
mErrorOffset = 0; mErrorOffset = 0;
mError = NULL; mError = NULL;
if (subject) if (subject)
delete [] subject; delete[] subject;
subject = NULL; subject = NULL;
mSubStrings = 0; mSubStrings.clear();
} }
const char *RegEx::GetSubstring(int s, char buffer[], int max) const char *RegEx::GetSubstring(int s, char buffer[], int max)
{ {
int i = 0; int i = 0;
if (s >= mSubStrings || s < 0) if ((size_t)s >= mSubStrings.length() || s < 0)
return NULL; return NULL;
char *substr_a = subject + ovector[2*s]; RegExSub sub = mSubStrings.at(s);
int substr_l = ovector[2*s+1] - ovector[2*s];
for (i = 0; i<substr_l; i++) char *substr_a = subject + sub.start;
int substr_l = sub.end - sub.start;
for (i = 0; i < substr_l; i++)
{ {
if (i >= max) if (i >= max)
break; break;
@ -206,5 +279,4 @@ const char *RegEx::GetSubstring(int s, char buffer[], int max)
buffer[i] = '\0'; buffer[i] = '\0';
return buffer; return buffer;
} }

View File

@ -32,29 +32,40 @@
*/ */
#ifndef _INCLUDE_CREGEX_H #ifndef _INCLUDE_CREGEX_H
#define _INCLUDE_CREGEX_H #define _INCLUDE_CREGEX_H
#include <am-vector.h>
class RegEx class RegEx
{ {
public: public:
struct RegExSub {
int start, end;
};
RegEx(); RegEx();
~RegEx(); ~RegEx();
bool isFree(bool set=false, bool val=false); bool isFree(bool set=false, bool val=false);
void Clear(); void Clear();
int Compile(const char *pattern, const char* flags = NULL); int Compile(const char *pattern, const char* flags = NULL);
int Compile(const char *pattern, int iFlags); int Compile(const char *pattern, int iFlags);
int Match(const char *str); int Match(const char *str);
int MatchAll(const char *str);
void ClearMatch(); void ClearMatch();
const char *GetSubstring(int s, char buffer[], int max); const char *GetSubstring(int s, char buffer[], int max);
public: public:
int mErrorOffset; int mErrorOffset;
const char *mError; const char *mError;
int mSubStrings; int Count() { return mSubStrings.length(); }
private: private:
pcre *re; pcre *re;
bool mFree; bool mFree;
int ovector[30]; int ovector[30];
char *subject; char *subject;
ke::Vector<RegExSub> mSubStrings;
}; };
#endif //_INCLUDE_CREGEX_H #endif //_INCLUDE_CREGEX_H

View File

@ -79,7 +79,7 @@ static cell AMX_NATIVE_CALL regex_compile_ex(AMX *amx, cell *params)
{ {
int len; int len;
const char *regex = MF_GetAmxString(amx, params[1], 0, &len); const char *regex = MF_GetAmxString(amx, params[1], 0, &len);
int id = GetPEL(); int id = GetPEL();
RegEx *x = PEL[id]; RegEx *x = PEL[id];
@ -95,9 +95,7 @@ static cell AMX_NATIVE_CALL regex_compile_ex(AMX *amx, cell *params)
return id + 1; return id + 1;
} }
// 1.8 includes the last parameter cell match(AMX *amx, cell *params, bool all)
// Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
{ {
int len; int len;
const char *str = MF_GetAmxString(amx, params[1], 0, &len); const char *str = MF_GetAmxString(amx, params[1], 0, &len);
@ -105,24 +103,29 @@ static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
int id = GetPEL(); int id = GetPEL();
RegEx *x = PEL[id]; RegEx *x = PEL[id];
char* flags = NULL; char* flags = NULL;
if ((params[0] / sizeof(cell)) >= 6) // compiled with 1.8's extra parameter if ((params[0] / sizeof(cell)) >= 6) // compiled with 1.8's extra parameter
{ {
flags = MF_GetAmxString(amx, params[6], 2, &len); flags = MF_GetAmxString(amx, params[6], 2, &len);
} }
if (x->Compile(regex, flags) == 0) if (x->Compile(regex, flags) == 0)
{ {
cell *eOff = MF_GetAmxAddr(amx, params[3]); cell *eOff = MF_GetAmxAddr(amx, params[3]);
const char *err = x->mError; const char *err = x->mError;
*eOff = x->mErrorOffset; *eOff = x->mErrorOffset;
MF_SetAmxString(amx, params[4], err?err:"unknown", params[5]); MF_SetAmxString(amx, params[4], err ? err : "unknown", params[5]);
return -1; return -1;
} }
int e = x->Match(str); int e;
if (all)
e = x->MatchAll(str);
else
e = x->Match(str);
if (e == -1) if (e == -1)
{ {
/* there was a match error. destroy this and move on. */ /* there was a match error. destroy this and move on. */
@ -130,24 +133,38 @@ static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
*res = x->mErrorOffset; *res = x->mErrorOffset;
x->Clear(); x->Clear();
return -2; return -2;
} else if (e == 0) { }
else if (e == 0) {
cell *res = MF_GetAmxAddr(amx, params[3]); cell *res = MF_GetAmxAddr(amx, params[3]);
*res = 0; *res = 0;
x->Clear(); x->Clear();
return 0; return 0;
} else { }
else {
cell *res = MF_GetAmxAddr(amx, params[3]); cell *res = MF_GetAmxAddr(amx, params[3]);
*res = x->mSubStrings; *res = x->Count();
} }
return id+1; return id + 1;
} }
// native regex_match_c(const string[], Regex:id, &ret); // 1.8 includes the last parameter
static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params) // Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
{
return match(amx, params, false);
}
// Regex:regex_match_all(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
static cell AMX_NATIVE_CALL regex_match_all(AMX *amx, cell *params)
{
return match(amx, params, true);
}
cell match_c(AMX *amx, cell *params, bool all)
{ {
int len; int len;
int id = params[2]-1; int id = params[2] - 1;
const char *str = MF_GetAmxString(amx, params[1], 0, &len); const char *str = MF_GetAmxString(amx, params[1], 0, &len);
if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree()) if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
@ -155,31 +172,50 @@ static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params)
MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id); MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id);
return 0; return 0;
} }
RegEx *x = PEL[id]; RegEx *x = PEL[id];
int e = x->Match(str); int e;
if (all)
e = x->MatchAll(str);
else
e = x->Match(str);
if (e == -1) if (e == -1)
{ {
/* there was a match error. move on. */ /* there was a match error. move on. */
cell *res = MF_GetAmxAddr(amx, params[3]); cell *res = MF_GetAmxAddr(amx, params[3]);
*res = x->mErrorOffset; *res = x->mErrorOffset;
/* only clear the match results, since the regex object /* only clear the match results, since the regex object
may still be referenced later */ may still be referenced later */
x->ClearMatch(); x->ClearMatch();
return -2; return -2;
} else if (e == 0) { }
else if (e == 0) {
cell *res = MF_GetAmxAddr(amx, params[3]); cell *res = MF_GetAmxAddr(amx, params[3]);
*res = 0; *res = 0;
/* only clear the match results, since the regex object /* only clear the match results, since the regex object
may still be referenced later */ may still be referenced later */
x->ClearMatch(); x->ClearMatch();
return 0; return 0;
} else {
cell *res = MF_GetAmxAddr(amx, params[3]);
*res = x->mSubStrings;
return x->mSubStrings;
} }
else {
cell *res = MF_GetAmxAddr(amx, params[3]);
*res = x->Count();
return x->Count();
}
}
// native regex_match_c(const string[], Regex:id, &ret);
static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params)
{
return match_c(amx, params, false);
}
// native regex_match_all_c(const string[], Regex:id, &ret);
static cell AMX_NATIVE_CALL regex_match_all_c(AMX *amx, cell *params)
{
return match_c(amx, params, true);
} }
// native regex_match_ex(Regex:id, const string[], &RegexError:ret = REGEX_ERROR_NONE); // native regex_match_ex(Regex:id, const string[], &RegexError:ret = REGEX_ERROR_NONE);
@ -219,7 +255,7 @@ static cell AMX_NATIVE_CALL regex_match_ex(AMX *amx, cell *params)
} }
else else
{ {
return x->mSubStrings; return x->Count();
} }
} }
@ -270,6 +306,8 @@ AMX_NATIVE_INFO regex_Natives[] = {
{"regex_match", regex_match}, {"regex_match", regex_match},
{"regex_match_c", regex_match_c}, {"regex_match_c", regex_match_c},
{"regex_match_ex", regex_match_ex}, {"regex_match_ex", regex_match_ex},
{"regex_match_all", regex_match_all},
{"regex_match_all_c", regex_match_all_c},
{"regex_substr", regex_substr}, {"regex_substr", regex_substr},
{"regex_free", regex_free}, {"regex_free", regex_free},
{NULL, NULL}, {NULL, NULL},

View File

@ -266,6 +266,61 @@ native Regex:regex_compile_ex(const pattern[], flags = 0, error[]= "", maxLen =
*/ */
native regex_match_ex(Handle:regex, const str[], &RegexError:ret = REGEX_ERROR_NONE); native regex_match_ex(Handle:regex, const str[], &RegexError:ret = REGEX_ERROR_NONE);
/**
* Matches a string against a pre-compiled regular expression pattern, matching all
* occurances of the pattern inside the string. This is similar to using the "g" flag
* in perl regex.
*
*
* @param pattern The regular expression pattern.
* @param string The string to check.
* @param ret Error code, if applicable, or number of results on success.
*
* @return -2 = Matching error (error code is stored in ret)
* 0 = No match.
* >1 = Number of results.
*
* @note You should free the returned handle (with regex_free())
* when you are done with this pattern.
*
* @note Use the regex handle passed to this function to extract
* matches with regex_substr().
*/
native regex_match_all_c(const string[], Regex:pattern, &ret);
/**
* Matches a string against a regular expression pattern, matching all occurances of the
* pattern inside the string. This is similar to using the "g" flag in perl regex.
*
* @note If you intend on using the same regular expression pattern
* multiple times, consider using regex_compile and regex_match_c
* instead of making this function reparse the expression each time.
*
* @param string The string to check.
* @param pattern The regular expression pattern.
* @param ret Error code, or result state of the match.
* @param error Error message, if applicable.
* @param maxLen Maximum length of the error buffer.
* @param flags General flags for the regular expression.
* i = Ignore case
* m = Multilines (affects ^ and $ so that they match
* the start/end of a line rather than matching the
* start/end of the string).
* s = Single line (affects . so that it matches any character,
* even new line characters).
* x = Pattern extension (ignore whitespace and # comments).
*
* @return -2 = Matching error (error code is stored in ret)
* -1 = Error in pattern (error message and offset # in error and ret)
* 0 = No match.
* >1 = Handle for getting more information (via regex_substr)
*
* @note Flags only exist in amxmodx 1.8 and later.
* @note You should free the returned handle (with regex_free())
* when you are done extracting all of the substrings.
*/
native Regex:regex_match_all(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
/** /**
* Matches a string against a regular expression pattern. * Matches a string against a regular expression pattern.
* *
@ -298,4 +353,4 @@ stock regex_match_simple(const str[], const pattern[], flags = 0, error[]="", ma
regex_free(regex); regex_free(regex);
return substrings; return substrings;
} }