Regex: Add regex_match_all_c and regex_match_all natives (by Nextra)
This commit is contained in:
parent
ee4f6b8a89
commit
a44d20b26b
@ -42,7 +42,7 @@ RegEx::RegEx()
|
|||||||
re = NULL;
|
re = NULL;
|
||||||
mFree = true;
|
mFree = true;
|
||||||
subject = NULL;
|
subject = NULL;
|
||||||
mSubStrings = 0;
|
mSubStrings.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegEx::Clear()
|
void RegEx::Clear()
|
||||||
@ -54,9 +54,9 @@ void RegEx::Clear()
|
|||||||
re = NULL;
|
re = NULL;
|
||||||
mFree = true;
|
mFree = true;
|
||||||
if (subject)
|
if (subject)
|
||||||
delete [] subject;
|
delete[] subject;
|
||||||
subject = NULL;
|
subject = NULL;
|
||||||
mSubStrings = 0;
|
mSubStrings.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
RegEx::~RegEx()
|
RegEx::~RegEx()
|
||||||
@ -156,7 +156,7 @@ int RegEx::Match(const char *str)
|
|||||||
this->ClearMatch();
|
this->ClearMatch();
|
||||||
|
|
||||||
//save str
|
//save str
|
||||||
subject = new char[strlen(str)+1];
|
subject = new char[strlen(str) + 1];
|
||||||
strcpy(subject, str);
|
strcpy(subject, str);
|
||||||
|
|
||||||
rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30);
|
rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30);
|
||||||
@ -166,37 +166,110 @@ int RegEx::Match(const char *str)
|
|||||||
if (rc == PCRE_ERROR_NOMATCH)
|
if (rc == PCRE_ERROR_NOMATCH)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
mErrorOffset = rc;
|
mErrorOffset = rc;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mSubStrings = rc;
|
RegExSub res;
|
||||||
|
mSubStrings.ensure(rc);
|
||||||
|
|
||||||
|
for (int s = 0; s < rc; ++s)
|
||||||
|
{
|
||||||
|
res.start = ovector[2 * s];
|
||||||
|
res.end = ovector[2 * s + 1];
|
||||||
|
mSubStrings.append(res);
|
||||||
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int RegEx::MatchAll(const char *str)
|
||||||
|
{
|
||||||
|
int rc = 0;
|
||||||
|
int rr = 0;
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
|
if (mFree || re == NULL)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
this->ClearMatch();
|
||||||
|
|
||||||
|
//save str
|
||||||
|
subject = new char[strlen(str) + 1];
|
||||||
|
strcpy(subject, str);
|
||||||
|
|
||||||
|
RegExSub sub, whole;
|
||||||
|
while ((rr = pcre_exec(re, NULL, subject, (int)strlen(subject), offset, 0, ovector, 30)))
|
||||||
|
{
|
||||||
|
if (rr < 0)
|
||||||
|
{
|
||||||
|
if (rr == PCRE_ERROR_NOMATCH)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mErrorOffset = rr;
|
||||||
|
|
||||||
|
if (rc)
|
||||||
|
this->ClearMatch();
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rc += rr;
|
||||||
|
mSubStrings.ensure(rc);
|
||||||
|
|
||||||
|
for (int s = 1; s < rr; ++s)
|
||||||
|
{
|
||||||
|
sub.start = ovector[2 * s];
|
||||||
|
sub.end = ovector[2 * s + 1];
|
||||||
|
mSubStrings.append(sub);
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = ovector[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!rc)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
sub = mSubStrings.at(0);
|
||||||
|
whole.start = sub.start;
|
||||||
|
sub = mSubStrings.back();
|
||||||
|
whole.end = sub.end;
|
||||||
|
|
||||||
|
mSubStrings.insert(0, whole);
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
void RegEx::ClearMatch()
|
void RegEx::ClearMatch()
|
||||||
{
|
{
|
||||||
// Clears match results
|
// Clears match results
|
||||||
mErrorOffset = 0;
|
mErrorOffset = 0;
|
||||||
mError = NULL;
|
mError = NULL;
|
||||||
if (subject)
|
if (subject)
|
||||||
delete [] subject;
|
delete[] subject;
|
||||||
subject = NULL;
|
subject = NULL;
|
||||||
mSubStrings = 0;
|
mSubStrings.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *RegEx::GetSubstring(int s, char buffer[], int max)
|
const char *RegEx::GetSubstring(int s, char buffer[], int max)
|
||||||
{
|
{
|
||||||
int i = 0;
|
int i = 0;
|
||||||
if (s >= mSubStrings || s < 0)
|
if ((size_t)s >= mSubStrings.length() || s < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
char *substr_a = subject + ovector[2*s];
|
RegExSub sub = mSubStrings.at(s);
|
||||||
int substr_l = ovector[2*s+1] - ovector[2*s];
|
|
||||||
|
|
||||||
for (i = 0; i<substr_l; i++)
|
char *substr_a = subject + sub.start;
|
||||||
|
int substr_l = sub.end - sub.start;
|
||||||
|
|
||||||
|
for (i = 0; i < substr_l; i++)
|
||||||
{
|
{
|
||||||
if (i >= max)
|
if (i >= max)
|
||||||
break;
|
break;
|
||||||
@ -207,4 +280,3 @@ const char *RegEx::GetSubstring(int s, char buffer[], int max)
|
|||||||
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -33,28 +33,39 @@
|
|||||||
#ifndef _INCLUDE_CREGEX_H
|
#ifndef _INCLUDE_CREGEX_H
|
||||||
#define _INCLUDE_CREGEX_H
|
#define _INCLUDE_CREGEX_H
|
||||||
|
|
||||||
|
#include <am-vector.h>
|
||||||
|
|
||||||
class RegEx
|
class RegEx
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
struct RegExSub {
|
||||||
|
int start, end;
|
||||||
|
};
|
||||||
|
|
||||||
RegEx();
|
RegEx();
|
||||||
~RegEx();
|
~RegEx();
|
||||||
|
|
||||||
bool isFree(bool set=false, bool val=false);
|
bool isFree(bool set=false, bool val=false);
|
||||||
void Clear();
|
void Clear();
|
||||||
|
|
||||||
int Compile(const char *pattern, const char* flags = NULL);
|
int Compile(const char *pattern, const char* flags = NULL);
|
||||||
int Compile(const char *pattern, int iFlags);
|
int Compile(const char *pattern, int iFlags);
|
||||||
int Match(const char *str);
|
int Match(const char *str);
|
||||||
|
int MatchAll(const char *str);
|
||||||
void ClearMatch();
|
void ClearMatch();
|
||||||
const char *GetSubstring(int s, char buffer[], int max);
|
const char *GetSubstring(int s, char buffer[], int max);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
int mErrorOffset;
|
int mErrorOffset;
|
||||||
const char *mError;
|
const char *mError;
|
||||||
int mSubStrings;
|
int Count() { return mSubStrings.length(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
pcre *re;
|
pcre *re;
|
||||||
bool mFree;
|
bool mFree;
|
||||||
int ovector[30];
|
int ovector[30];
|
||||||
char *subject;
|
char *subject;
|
||||||
|
ke::Vector<RegExSub> mSubStrings;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //_INCLUDE_CREGEX_H
|
#endif //_INCLUDE_CREGEX_H
|
||||||
|
@ -95,9 +95,7 @@ static cell AMX_NATIVE_CALL regex_compile_ex(AMX *amx, cell *params)
|
|||||||
return id + 1;
|
return id + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1.8 includes the last parameter
|
cell match(AMX *amx, cell *params, bool all)
|
||||||
// Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
|
|
||||||
static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
|
|
||||||
{
|
{
|
||||||
int len;
|
int len;
|
||||||
const char *str = MF_GetAmxString(amx, params[1], 0, &len);
|
const char *str = MF_GetAmxString(amx, params[1], 0, &len);
|
||||||
@ -118,11 +116,16 @@ static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
|
|||||||
cell *eOff = MF_GetAmxAddr(amx, params[3]);
|
cell *eOff = MF_GetAmxAddr(amx, params[3]);
|
||||||
const char *err = x->mError;
|
const char *err = x->mError;
|
||||||
*eOff = x->mErrorOffset;
|
*eOff = x->mErrorOffset;
|
||||||
MF_SetAmxString(amx, params[4], err?err:"unknown", params[5]);
|
MF_SetAmxString(amx, params[4], err ? err : "unknown", params[5]);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int e = x->Match(str);
|
int e;
|
||||||
|
if (all)
|
||||||
|
e = x->MatchAll(str);
|
||||||
|
else
|
||||||
|
e = x->Match(str);
|
||||||
|
|
||||||
if (e == -1)
|
if (e == -1)
|
||||||
{
|
{
|
||||||
/* there was a match error. destroy this and move on. */
|
/* there was a match error. destroy this and move on. */
|
||||||
@ -130,24 +133,38 @@ static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
|
|||||||
*res = x->mErrorOffset;
|
*res = x->mErrorOffset;
|
||||||
x->Clear();
|
x->Clear();
|
||||||
return -2;
|
return -2;
|
||||||
} else if (e == 0) {
|
}
|
||||||
|
else if (e == 0) {
|
||||||
cell *res = MF_GetAmxAddr(amx, params[3]);
|
cell *res = MF_GetAmxAddr(amx, params[3]);
|
||||||
*res = 0;
|
*res = 0;
|
||||||
x->Clear();
|
x->Clear();
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
cell *res = MF_GetAmxAddr(amx, params[3]);
|
cell *res = MF_GetAmxAddr(amx, params[3]);
|
||||||
*res = x->mSubStrings;
|
*res = x->Count();
|
||||||
}
|
}
|
||||||
|
|
||||||
return id+1;
|
return id + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// native regex_match_c(const string[], Regex:id, &ret);
|
// 1.8 includes the last parameter
|
||||||
static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params)
|
// Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
|
||||||
|
static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
|
||||||
|
{
|
||||||
|
return match(amx, params, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regex:regex_match_all(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
|
||||||
|
static cell AMX_NATIVE_CALL regex_match_all(AMX *amx, cell *params)
|
||||||
|
{
|
||||||
|
return match(amx, params, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
cell match_c(AMX *amx, cell *params, bool all)
|
||||||
{
|
{
|
||||||
int len;
|
int len;
|
||||||
int id = params[2]-1;
|
int id = params[2] - 1;
|
||||||
const char *str = MF_GetAmxString(amx, params[1], 0, &len);
|
const char *str = MF_GetAmxString(amx, params[1], 0, &len);
|
||||||
|
|
||||||
if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
|
if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
|
||||||
@ -158,7 +175,12 @@ static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params)
|
|||||||
|
|
||||||
RegEx *x = PEL[id];
|
RegEx *x = PEL[id];
|
||||||
|
|
||||||
int e = x->Match(str);
|
int e;
|
||||||
|
if (all)
|
||||||
|
e = x->MatchAll(str);
|
||||||
|
else
|
||||||
|
e = x->Match(str);
|
||||||
|
|
||||||
if (e == -1)
|
if (e == -1)
|
||||||
{
|
{
|
||||||
/* there was a match error. move on. */
|
/* there was a match error. move on. */
|
||||||
@ -168,18 +190,32 @@ static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params)
|
|||||||
may still be referenced later */
|
may still be referenced later */
|
||||||
x->ClearMatch();
|
x->ClearMatch();
|
||||||
return -2;
|
return -2;
|
||||||
} else if (e == 0) {
|
}
|
||||||
|
else if (e == 0) {
|
||||||
cell *res = MF_GetAmxAddr(amx, params[3]);
|
cell *res = MF_GetAmxAddr(amx, params[3]);
|
||||||
*res = 0;
|
*res = 0;
|
||||||
/* only clear the match results, since the regex object
|
/* only clear the match results, since the regex object
|
||||||
may still be referenced later */
|
may still be referenced later */
|
||||||
x->ClearMatch();
|
x->ClearMatch();
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
|
||||||
cell *res = MF_GetAmxAddr(amx, params[3]);
|
|
||||||
*res = x->mSubStrings;
|
|
||||||
return x->mSubStrings;
|
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
cell *res = MF_GetAmxAddr(amx, params[3]);
|
||||||
|
*res = x->Count();
|
||||||
|
return x->Count();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// native regex_match_c(const string[], Regex:id, &ret);
|
||||||
|
static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params)
|
||||||
|
{
|
||||||
|
return match_c(amx, params, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// native regex_match_all_c(const string[], Regex:id, &ret);
|
||||||
|
static cell AMX_NATIVE_CALL regex_match_all_c(AMX *amx, cell *params)
|
||||||
|
{
|
||||||
|
return match_c(amx, params, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// native regex_match_ex(Regex:id, const string[], &RegexError:ret = REGEX_ERROR_NONE);
|
// native regex_match_ex(Regex:id, const string[], &RegexError:ret = REGEX_ERROR_NONE);
|
||||||
@ -219,7 +255,7 @@ static cell AMX_NATIVE_CALL regex_match_ex(AMX *amx, cell *params)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return x->mSubStrings;
|
return x->Count();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -270,6 +306,8 @@ AMX_NATIVE_INFO regex_Natives[] = {
|
|||||||
{"regex_match", regex_match},
|
{"regex_match", regex_match},
|
||||||
{"regex_match_c", regex_match_c},
|
{"regex_match_c", regex_match_c},
|
||||||
{"regex_match_ex", regex_match_ex},
|
{"regex_match_ex", regex_match_ex},
|
||||||
|
{"regex_match_all", regex_match_all},
|
||||||
|
{"regex_match_all_c", regex_match_all_c},
|
||||||
{"regex_substr", regex_substr},
|
{"regex_substr", regex_substr},
|
||||||
{"regex_free", regex_free},
|
{"regex_free", regex_free},
|
||||||
{NULL, NULL},
|
{NULL, NULL},
|
||||||
|
@ -266,6 +266,61 @@ native Regex:regex_compile_ex(const pattern[], flags = 0, error[]= "", maxLen =
|
|||||||
*/
|
*/
|
||||||
native regex_match_ex(Handle:regex, const str[], &RegexError:ret = REGEX_ERROR_NONE);
|
native regex_match_ex(Handle:regex, const str[], &RegexError:ret = REGEX_ERROR_NONE);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Matches a string against a pre-compiled regular expression pattern, matching all
|
||||||
|
* occurances of the pattern inside the string. This is similar to using the "g" flag
|
||||||
|
* in perl regex.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @param pattern The regular expression pattern.
|
||||||
|
* @param string The string to check.
|
||||||
|
* @param ret Error code, if applicable, or number of results on success.
|
||||||
|
*
|
||||||
|
* @return -2 = Matching error (error code is stored in ret)
|
||||||
|
* 0 = No match.
|
||||||
|
* >1 = Number of results.
|
||||||
|
*
|
||||||
|
* @note You should free the returned handle (with regex_free())
|
||||||
|
* when you are done with this pattern.
|
||||||
|
*
|
||||||
|
* @note Use the regex handle passed to this function to extract
|
||||||
|
* matches with regex_substr().
|
||||||
|
*/
|
||||||
|
native regex_match_all_c(const string[], Regex:pattern, &ret);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Matches a string against a regular expression pattern, matching all occurances of the
|
||||||
|
* pattern inside the string. This is similar to using the "g" flag in perl regex.
|
||||||
|
*
|
||||||
|
* @note If you intend on using the same regular expression pattern
|
||||||
|
* multiple times, consider using regex_compile and regex_match_c
|
||||||
|
* instead of making this function reparse the expression each time.
|
||||||
|
*
|
||||||
|
* @param string The string to check.
|
||||||
|
* @param pattern The regular expression pattern.
|
||||||
|
* @param ret Error code, or result state of the match.
|
||||||
|
* @param error Error message, if applicable.
|
||||||
|
* @param maxLen Maximum length of the error buffer.
|
||||||
|
* @param flags General flags for the regular expression.
|
||||||
|
* i = Ignore case
|
||||||
|
* m = Multilines (affects ^ and $ so that they match
|
||||||
|
* the start/end of a line rather than matching the
|
||||||
|
* start/end of the string).
|
||||||
|
* s = Single line (affects . so that it matches any character,
|
||||||
|
* even new line characters).
|
||||||
|
* x = Pattern extension (ignore whitespace and # comments).
|
||||||
|
*
|
||||||
|
* @return -2 = Matching error (error code is stored in ret)
|
||||||
|
* -1 = Error in pattern (error message and offset # in error and ret)
|
||||||
|
* 0 = No match.
|
||||||
|
* >1 = Handle for getting more information (via regex_substr)
|
||||||
|
*
|
||||||
|
* @note Flags only exist in amxmodx 1.8 and later.
|
||||||
|
* @note You should free the returned handle (with regex_free())
|
||||||
|
* when you are done extracting all of the substrings.
|
||||||
|
*/
|
||||||
|
native Regex:regex_match_all(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Matches a string against a regular expression pattern.
|
* Matches a string against a regular expression pattern.
|
||||||
*
|
*
|
||||||
|
Loading…
Reference in New Issue
Block a user