Regex: Update PCRE to v8.35.

I was über lazy at first, so took libs from SM.
But actually it's quite easy to compile, so let's update to latest version \o/.
This commit is contained in:
Arkshine
2014-07-05 13:53:30 +02:00
parent d1153b8049
commit d4de0e6f1e
241 changed files with 51074 additions and 15011 deletions

View File

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language (but see
below for why this module is different).
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -120,7 +120,7 @@ static const pcre_uint8 coptable[] = {
0, 0, /* \P, \p */
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
0, /* \X */
0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
1, /* Char */
1, /* Chari */
1, /* not */
@@ -151,11 +151,14 @@ static const pcre_uint8 coptable[] = {
/* Character class & ref repeats */
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
0, 0, /* CRRANGE, CRMINRANGE */
0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */
0, /* CLASS */
0, /* NCLASS */
0, /* XCLASS - variable length */
0, /* REF */
0, /* REFI */
0, /* DNREF */
0, /* DNREFI */
0, /* RECURSE */
0, /* CALLOUT */
0, /* Alt */
@@ -171,8 +174,8 @@ static const pcre_uint8 coptable[] = {
0, 0, /* ONCE, ONCE_NC */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, NCREF */
0, 0, /* RREF, NRREF */
0, 0, /* CREF, DNCREF */
0, 0, /* RREF, DNRREF */
0, /* DEF */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
@@ -194,7 +197,7 @@ static const pcre_uint8 poptable[] = {
1, 1, /* \P, \p */
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
1, /* \X */
0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
1, /* Char */
1, /* Chari */
1, /* not */
@@ -220,11 +223,14 @@ static const pcre_uint8 poptable[] = {
/* Character class & ref repeats */
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
1, 1, /* CRRANGE, CRMINRANGE */
1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */
1, /* CLASS */
1, /* NCLASS */
1, /* XCLASS - variable length */
0, /* REF */
0, /* REFI */
0, /* DNREF */
0, /* DNREFI */
0, /* RECURSE */
0, /* CALLOUT */
0, /* Alt */
@@ -240,8 +246,8 @@ static const pcre_uint8 poptable[] = {
0, 0, /* ONCE, ONCE_NC */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, NCREF */
0, 0, /* RREF, NRREF */
0, 0, /* CREF, DNCREF */
0, 0, /* RREF, DNRREF */
0, /* DEF */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
@@ -636,7 +642,7 @@ for (;;)
const pcre_uchar *code;
int state_offset = current_state->offset;
int codevalue, rrc;
unsigned int count;
int count;
#ifdef PCRE_DEBUG
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@@ -1094,15 +1100,23 @@ for (;;)
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
}
break;
case PT_WORD:
@@ -1120,6 +1134,12 @@ for (;;)
}
break;
case PT_UCNC:
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
c >= 0xe000;
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@@ -1249,7 +1269,7 @@ for (;;)
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
else
{ ADD_NEW(state_offset, count); }
@@ -1283,7 +1303,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
else
{ ADD_NEW(state_offset, count); }
@@ -1338,15 +1358,23 @@ for (;;)
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
}
break;
case PT_WORD:
@@ -1364,6 +1392,12 @@ for (;;)
}
break;
case PT_UCNC:
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
c >= 0xe000;
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@@ -1439,7 +1473,7 @@ for (;;)
goto ANYNL01;
case CHAR_CR:
if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */
ANYNL01:
@@ -1576,15 +1610,23 @@ for (;;)
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
}
break;
case PT_WORD:
@@ -1602,6 +1644,12 @@ for (;;)
}
break;
case PT_UCNC:
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
c >= 0xe000;
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@@ -1694,7 +1742,7 @@ for (;;)
goto ANYNL02;
case CHAR_CR:
if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */
ANYNL02:
@@ -1705,7 +1753,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
break;
default:
@@ -1749,7 +1797,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
ADD_NEW_DATA(-(state_offset + count), 0, 0);
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
}
}
break;
@@ -1790,7 +1838,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
ADD_NEW_DATA(-(state_offset + count), 0, 0);
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
}
}
break;
@@ -1839,15 +1887,23 @@ for (;;)
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
}
break;
case PT_WORD:
@@ -1865,6 +1921,12 @@ for (;;)
}
break;
case PT_UCNC:
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
c >= 0xe000;
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@@ -1879,7 +1941,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
else
{ ADD_NEW(state_offset, count); }
@@ -1918,7 +1980,7 @@ for (;;)
}
if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else
{ ADD_NEW_DATA(-state_offset, count, ncount); }
@@ -1950,7 +2012,7 @@ for (;;)
goto ANYNL03;
case CHAR_CR:
if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */
ANYNL03:
@@ -1960,7 +2022,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else
{ ADD_NEW_DATA(-state_offset, count, ncount); }
@@ -2000,7 +2062,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
else
{ ADD_NEW_DATA(-state_offset, count, 0); }
@@ -2037,7 +2099,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
else
{ ADD_NEW_DATA(-state_offset, count, 0); }
@@ -2148,7 +2210,7 @@ for (;;)
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
}
else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
{
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}
@@ -2407,7 +2469,7 @@ for (;;)
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
{
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
else
{ ADD_NEW(state_offset, count); }
@@ -2456,7 +2518,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
else
{ ADD_NEW(state_offset, count); }
@@ -2509,31 +2571,65 @@ for (;;)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRPOSSTAR:
ADD_ACTIVE(next_state_offset + 1, 0);
if (isinclass) { ADD_NEW(state_offset, 0); }
if (isinclass)
{
if (*ecode == OP_CRPOSSTAR)
{
active_count--; /* Remove non-match possibility */
next_active_state--;
}
ADD_NEW(state_offset, 0);
}
break;
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRPOSPLUS:
count = current_state->count; /* Already matched */
if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
if (isinclass) { count++; ADD_NEW(state_offset, count); }
if (isinclass)
{
if (count > 0 && *ecode == OP_CRPOSPLUS)
{
active_count--; /* Remove non-match possibility */
next_active_state--;
}
count++;
ADD_NEW(state_offset, count);
}
break;
case OP_CRQUERY:
case OP_CRMINQUERY:
case OP_CRPOSQUERY:
ADD_ACTIVE(next_state_offset + 1, 0);
if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
if (isinclass)
{
if (*ecode == OP_CRPOSQUERY)
{
active_count--; /* Remove non-match possibility */
next_active_state--;
}
ADD_NEW(next_state_offset + 1, 0);
}
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
count = current_state->count; /* Already matched */
if (count >= GET2(ecode, 1))
if (count >= (int)GET2(ecode, 1))
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
if (isinclass)
{
unsigned int max = GET2(ecode, 1 + IMM2_SIZE);
int max = (int)GET2(ecode, 1 + IMM2_SIZE);
if (*ecode == OP_CRPOSRANGE)
{
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= max && max != 0) /* Max 0 => no limit */
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
else
@@ -2633,9 +2729,11 @@ for (;;)
condcode = code[LINK_SIZE+1];
/* Back reference conditions are not supported */
/* Back reference conditions and duplicate named recursion conditions
are not supported */
if (condcode == OP_CREF || condcode == OP_NCREF)
if (condcode == OP_CREF || condcode == OP_DNCREF ||
condcode == OP_DNRREF)
return PCRE_ERROR_DFA_UCOND;
/* The DEFINE condition is always false */
@@ -2647,7 +2745,7 @@ for (;;)
which means "test if in any recursion". We can't test for specifically
recursed groups. */
else if (condcode == OP_RREF || condcode == OP_NRREF)
else if (condcode == OP_RREF)
{
int value = GET2(code, LINK_SIZE + 2);
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
@@ -3023,15 +3121,7 @@ for (;;)
ptr > md->start_used_ptr) /* Inspected non-empty string */
)
)
{
if (offsetcount >= 2)
{
offsets[0] = (int)(md->start_used_ptr - start_subject);
offsets[1] = (int)(end_subject - start_subject);
}
match_count = PCRE_ERROR_PARTIAL;
}
DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
"%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
rlevel*2-2, SP));
@@ -3376,7 +3466,7 @@ for (;;)
if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
{
/* Advance to a known first char. */
/* Advance to a known first pcre_uchar (i.e. data item) */
if (has_first_char)
{
@@ -3384,12 +3474,12 @@ for (;;)
{
pcre_uchar csc;
while (current_subject < end_subject &&
(csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
(csc = UCHAR21TEST(current_subject)) != first_char && csc != first_char2)
current_subject++;
}
else
while (current_subject < end_subject &&
RAWUCHARTEST(current_subject) != first_char)
UCHAR21TEST(current_subject) != first_char)
current_subject++;
}
@@ -3419,36 +3509,26 @@ for (;;)
ANYCRLF, and we are now at a LF, advance the match position by one
more character. */
if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
current_subject < end_subject &&
RAWUCHARTEST(current_subject) == CHAR_NL)
UCHAR21TEST(current_subject) == CHAR_NL)
current_subject++;
}
}
/* Or to a non-unique first char after study */
/* Advance to a non-unique first pcre_uchar after study */
else if (start_bits != NULL)
{
while (current_subject < end_subject)
{
register pcre_uint32 c = RAWUCHARTEST(current_subject);
register pcre_uint32 c = UCHAR21TEST(current_subject);
#ifndef COMPILE_PCRE8
if (c > 255) c = 255;
#endif
if ((start_bits[c/8] & (1 << (c&7))) == 0)
{
current_subject++;
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
/* In non 8-bit mode, the iteration will stop for
characters > 255 at the beginning or not stop at all. */
if (utf)
ACROSSCHAR(current_subject < end_subject, *current_subject,
current_subject++);
#endif
}
else break;
if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
current_subject++;
}
}
}
@@ -3467,19 +3547,20 @@ for (;;)
/* If the pattern was studied, a minimum subject length may be set. This
is a lower bound; no actual string of that length may actually match the
pattern. Although the value is, strictly, in characters, we treat it as
bytes to avoid spending too much time in this optimization. */
in pcre_uchar units to avoid spending too much time in this optimization.
*/
if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
(pcre_uint32)(end_subject - current_subject) < study->minlength)
return PCRE_ERROR_NOMATCH;
/* If req_char is set, we know that that character must appear in the
subject for the match to succeed. If the first character is set, req_char
must be later in the subject; otherwise the test starts at the match
point. This optimization can save a huge amount of work in patterns with
nested unlimited repeats that aren't going to match. Writing separate
code for cased/caseless versions makes it go faster, as does using an
autoincrement and backing off on a match.
/* If req_char is set, we know that that pcre_uchar must appear in the
subject for the match to succeed. If the first pcre_uchar is set,
req_char must be later in the subject; otherwise the test starts at the
match point. This optimization can save a huge amount of work in patterns
with nested unlimited repeats that aren't going to match. Writing
separate code for cased/caseless versions makes it go faster, as does
using an autoincrement and backing off on a match.
HOWEVER: when the subject string is very, very long, searching to its end
can take a long time, and give bad performance on quite ordinary
@@ -3499,7 +3580,7 @@ for (;;)
{
while (p < end_subject)
{
register pcre_uint32 pp = RAWUCHARINCTEST(p);
register pcre_uint32 pp = UCHAR21INCTEST(p);
if (pp == req_char || pp == req_char2) { p--; break; }
}
}
@@ -3507,18 +3588,18 @@ for (;;)
{
while (p < end_subject)
{
if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
if (UCHAR21INCTEST(p) == req_char) { p--; break; }
}
}
/* If we can't find the required character, break the matching loop,
/* If we can't find the required pcre_uchar, break the matching loop,
which will cause a return or PCRE_ERROR_NOMATCH. */
if (p >= end_subject) break;
/* If we have found the required character, save the point where we
/* If we have found the required pcre_uchar, save the point where we
found it, so that we don't search again next time round the loop if
the start hasn't passed this character yet. */
the start hasn't passed this point yet. */
req_char_ptr = p;
}
@@ -3545,7 +3626,17 @@ for (;;)
/* Anything other than "no match" means we are done, always; otherwise, carry
on only if not anchored. */
if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
if (rc != PCRE_ERROR_NOMATCH || anchored)
{
if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
{
offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
if (offsetcount > 2)
offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
}
return rc;
}
/* Advance to the next subject character unless we are at the end of a line
and firstline is set. */
@@ -3565,9 +3656,9 @@ for (;;)
not contain any explicit matches for \r or \n, and the newline option is CRLF
or ANY or ANYCRLF, advance the match position by one more character. */
if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
current_subject < end_subject &&
RAWUCHARTEST(current_subject) == CHAR_NL &&
UCHAR21TEST(current_subject) == CHAR_NL &&
(re->flags & PCRE_HASCRORLF) == 0 &&
(md->nltype == NLTYPE_ANY ||
md->nltype == NLTYPE_ANYCRLF ||