@@ -10,12 +10,15 @@ elif builder.target_platform == 'mac':
 | 
			
		||||
elif builder.target_platform == 'windows':
 | 
			
		||||
  binary.compiler.postlink += [binary.Dep('lib_win\\pcre.lib')]
 | 
			
		||||
 | 
			
		||||
binary.compiler.defines += ['PCRE_STATIC']
 | 
			
		||||
 | 
			
		||||
binary.compiler.defines += [
 | 
			
		||||
  'PCRE_STATIC',
 | 
			
		||||
  'HAVE_STDINT_H',
 | 
			
		||||
]
 | 
			
		||||
binary.sources = [
 | 
			
		||||
  'sdk/amxxmodule.cpp',
 | 
			
		||||
  'module.cpp',
 | 
			
		||||
  'CRegEx.cpp',
 | 
			
		||||
  'utils.cpp',
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
AMXX.modules += [builder.Add(binary)]
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,41 @@
 | 
			
		||||
/* AMX Mod X
 | 
			
		||||
 *   Regular Expressions Module
 | 
			
		||||
 *
 | 
			
		||||
 * by the AMX Mod X Development Team
 | 
			
		||||
 *
 | 
			
		||||
 * This file is part of AMX Mod X.
 | 
			
		||||
 *
 | 
			
		||||
 *
 | 
			
		||||
 *  This program is free software; you can redistribute it and/or modify it
 | 
			
		||||
 *  under the terms of the GNU General Public License as published by the
 | 
			
		||||
 *  Free Software Foundation; either version 2 of the License, or (at
 | 
			
		||||
 *  your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 *  This program is distributed in the hope that it will be useful, but
 | 
			
		||||
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 | 
			
		||||
 *  General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 *  You should have received a copy of the GNU General Public License
 | 
			
		||||
 *  along with this program; if not, write to the Free Software Foundation,
 | 
			
		||||
 *  Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 | 
			
		||||
 *
 | 
			
		||||
 *  In addition, as a special exception, the author gives permission to
 | 
			
		||||
 *  link the code of this program with the Half-Life Game Engine ("HL
 | 
			
		||||
 *  Engine") and Modified Game Libraries ("MODs") developed by Valve,
 | 
			
		||||
 *  L.L.C ("Valve"). You must obey the GNU General Public License in all
 | 
			
		||||
 *  respects for all of the code used other than the HL Engine and MODs
 | 
			
		||||
 *  from Valve. If you modify this file, you may extend this exception
 | 
			
		||||
 *  to your version of the file, but you are not obligated to do so. If
 | 
			
		||||
 *  you do not wish to do so, delete this exception statement from your
 | 
			
		||||
 *  version.
 | 
			
		||||
 */
 | 
			
		||||
#include "amxxmodule.h"
 | 
			
		||||
#include "pcre.h"
 | 
			
		||||
#include "CRegEx.h"
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include "amxxmodule.h"
 | 
			
		||||
#include <ctype.h>
 | 
			
		||||
#include "utils.h"
 | 
			
		||||
 | 
			
		||||
RegEx::RegEx()
 | 
			
		||||
{
 | 
			
		||||
@@ -10,7 +44,10 @@ RegEx::RegEx()
 | 
			
		||||
	re = NULL;
 | 
			
		||||
	mFree = true;
 | 
			
		||||
	subject = NULL;
 | 
			
		||||
	mSubStrings = 0;
 | 
			
		||||
	mSubStrings.clear();
 | 
			
		||||
	mMatchesSubs.clear();
 | 
			
		||||
	mSubsNameTable.clear();
 | 
			
		||||
	mNumSubpatterns = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RegEx::Clear()
 | 
			
		||||
@@ -22,9 +59,12 @@ void RegEx::Clear()
 | 
			
		||||
	re = NULL;
 | 
			
		||||
	mFree = true;
 | 
			
		||||
	if (subject)
 | 
			
		||||
		delete [] subject;
 | 
			
		||||
		delete[] subject;
 | 
			
		||||
	subject = NULL;
 | 
			
		||||
	mSubStrings = 0;
 | 
			
		||||
	mSubStrings.clear();
 | 
			
		||||
	mMatchesSubs.clear();
 | 
			
		||||
	mSubsNameTable.clear();
 | 
			
		||||
	mNumSubpatterns = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
RegEx::~RegEx()
 | 
			
		||||
@@ -97,57 +137,198 @@ int RegEx::Compile(const char *pattern, const char* flags)
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int RegEx::Compile(const char *pattern, int iFlags)
 | 
			
		||||
{
 | 
			
		||||
	if (!mFree)
 | 
			
		||||
		Clear();
 | 
			
		||||
 | 
			
		||||
	re = pcre_compile(pattern, iFlags, &mError, &mErrorOffset, NULL);
 | 
			
		||||
 | 
			
		||||
	if (re == NULL)
 | 
			
		||||
	{
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	mFree = false;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Retrieve the number of captured groups
 | 
			
		||||
	 * including the full match.
 | 
			
		||||
	 */
 | 
			
		||||
	pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &mNumSubpatterns);
 | 
			
		||||
	++mNumSubpatterns;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Build the table with the named groups,
 | 
			
		||||
	 * which contain an index and a name per group.
 | 
			
		||||
	 */
 | 
			
		||||
	MakeSubpatternsTable(mNumSubpatterns);
 | 
			
		||||
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int RegEx::Match(const char *str)
 | 
			
		||||
{
 | 
			
		||||
	int rc = 0;
 | 
			
		||||
 | 
			
		||||
	if (mFree || re == NULL)
 | 
			
		||||
		return -1;
 | 
			
		||||
		
 | 
			
		||||
	this->ClearMatch();
 | 
			
		||||
 | 
			
		||||
	ClearMatch();
 | 
			
		||||
 | 
			
		||||
	//save str
 | 
			
		||||
	subject = new char[strlen(str)+1];
 | 
			
		||||
	subject = new char[strlen(str) + 1];
 | 
			
		||||
	strcpy(subject, str);
 | 
			
		||||
 | 
			
		||||
	rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30);
 | 
			
		||||
	rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, REGEX_MAX_SUBPATTERNS);
 | 
			
		||||
 | 
			
		||||
	if (rc < 0)
 | 
			
		||||
	{
 | 
			
		||||
		if (rc == PCRE_ERROR_NOMATCH)
 | 
			
		||||
		{
 | 
			
		||||
			return 0;
 | 
			
		||||
		} else {
 | 
			
		||||
		}
 | 
			
		||||
		else {
 | 
			
		||||
			mErrorOffset = rc;
 | 
			
		||||
			return -1;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	mSubStrings = rc;
 | 
			
		||||
	RegExSub res;
 | 
			
		||||
	mSubStrings.ensure(rc);
 | 
			
		||||
 | 
			
		||||
	for (int s = 0; s < rc; ++s)
 | 
			
		||||
	{
 | 
			
		||||
		res.start = ovector[2 * s];
 | 
			
		||||
		res.end = ovector[2 * s + 1];
 | 
			
		||||
		mSubStrings.append(res);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int RegEx::MatchAll(const char *str)
 | 
			
		||||
{
 | 
			
		||||
	int rr = 0;
 | 
			
		||||
	int rc = 0;
 | 
			
		||||
	int startOffset = 0;
 | 
			
		||||
	int exoptions = 0;
 | 
			
		||||
	int notEmpty = 0;
 | 
			
		||||
	int sizeOffsets = mNumSubpatterns * 3;
 | 
			
		||||
	int subjectLen = strlen(str);
 | 
			
		||||
 | 
			
		||||
	if (mFree || re == NULL)
 | 
			
		||||
	{
 | 
			
		||||
		return -1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ClearMatch();
 | 
			
		||||
	
 | 
			
		||||
	subject = new char[subjectLen + 1];
 | 
			
		||||
	strcpy(subject, str);
 | 
			
		||||
 | 
			
		||||
	RegExSub sub;
 | 
			
		||||
 | 
			
		||||
	while (1)
 | 
			
		||||
	{
 | 
			
		||||
		rr = pcre_exec(re, NULL, subject, (int)subjectLen, startOffset, exoptions | notEmpty, ovector, REGEX_MAX_SUBPATTERNS);
 | 
			
		||||
 | 
			
		||||
		/**
 | 
			
		||||
		 * The string was already proved to be valid UTF-8
 | 
			
		||||
		 */
 | 
			
		||||
		exoptions |= PCRE_NO_UTF8_CHECK;
 | 
			
		||||
 | 
			
		||||
		/**
 | 
			
		||||
		 * Too many substrings
 | 
			
		||||
		 */
 | 
			
		||||
		if (rr == 0)
 | 
			
		||||
		{
 | 
			
		||||
			rr = sizeOffsets / 3;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (rr > 0)
 | 
			
		||||
		{
 | 
			
		||||
			mMatchesSubs.append(rr);
 | 
			
		||||
 | 
			
		||||
			for (int s = 0; s < rr; ++s)
 | 
			
		||||
			{
 | 
			
		||||
				sub.start = ovector[2 * s];
 | 
			
		||||
				sub.end = ovector[2 * s + 1];
 | 
			
		||||
 | 
			
		||||
				mSubStrings.append(sub);
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		else if (rr == PCRE_ERROR_NOMATCH)
 | 
			
		||||
		{
 | 
			
		||||
			/**
 | 
			
		||||
			 * If we previously set PCRE_NOTEMPTY after a null match,
 | 
			
		||||
			 * this is not necessarily the end. We need to advance
 | 
			
		||||
			 * the start offset, and continue. Fudge the offset values
 | 
			
		||||
			 * to achieve this, unless we're already at the end of the string. 
 | 
			
		||||
			 */
 | 
			
		||||
			if (notEmpty && startOffset < (int)subjectLen) 
 | 
			
		||||
			{
 | 
			
		||||
				ovector[0] = startOffset;
 | 
			
		||||
				ovector[1] = startOffset + 1;
 | 
			
		||||
			}
 | 
			
		||||
			else
 | 
			
		||||
			{
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		else
 | 
			
		||||
		{
 | 
			
		||||
			mErrorOffset = rr;
 | 
			
		||||
 | 
			
		||||
			if (mMatchesSubs.length())
 | 
			
		||||
			{
 | 
			
		||||
				ClearMatch();
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			return -1;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/**
 | 
			
		||||
		 * If we have matched an empty string, mimic what Perl's /g options does.
 | 
			
		||||
		 * This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
 | 
			
		||||
		 * the match again at the same point. If this fails (picked up above) we
 | 
			
		||||
		 * advance to the next character. 
 | 
			
		||||
		 */
 | 
			
		||||
		notEmpty = (ovector[1] == ovector[0]) ? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
 | 
			
		||||
 | 
			
		||||
		/** 
 | 
			
		||||
		 * Advance to the next piece. 
 | 
			
		||||
		 */
 | 
			
		||||
		startOffset = ovector[1];
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!mMatchesSubs.length())
 | 
			
		||||
	{
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RegEx::ClearMatch()
 | 
			
		||||
{
 | 
			
		||||
	// Clears match results
 | 
			
		||||
	mErrorOffset = 0;
 | 
			
		||||
	mError = NULL;
 | 
			
		||||
	if (subject)
 | 
			
		||||
		delete [] subject;
 | 
			
		||||
		delete[] subject;
 | 
			
		||||
	subject = NULL;
 | 
			
		||||
	mSubStrings = 0;
 | 
			
		||||
	mSubStrings.clear();
 | 
			
		||||
	mMatchesSubs.clear();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const char *RegEx::GetSubstring(int s, char buffer[], int max)
 | 
			
		||||
const char *getSubstring(char *subject, size_t start, size_t end, char buffer[], size_t max, size_t *outlen)
 | 
			
		||||
{
 | 
			
		||||
	int i = 0;
 | 
			
		||||
	if (s >= mSubStrings || s < 0)
 | 
			
		||||
		return NULL;
 | 
			
		||||
	size_t i;
 | 
			
		||||
	char * substr_a = subject + start;
 | 
			
		||||
	size_t substr_l = end - start;
 | 
			
		||||
 | 
			
		||||
	char *substr_a = subject + ovector[2*s];
 | 
			
		||||
	int substr_l = ovector[2*s+1] - ovector[2*s];
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i<substr_l; i++)
 | 
			
		||||
	for (i = 0; i < substr_l; i++)
 | 
			
		||||
	{
 | 
			
		||||
		if (i >= max)
 | 
			
		||||
			break;
 | 
			
		||||
@@ -156,6 +337,506 @@ const char *RegEx::GetSubstring(int s, char buffer[], int max)
 | 
			
		||||
 | 
			
		||||
	buffer[i] = '\0';
 | 
			
		||||
 | 
			
		||||
	if (outlen)
 | 
			
		||||
	{
 | 
			
		||||
		*outlen = i;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return buffer;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const char *RegEx::GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen)
 | 
			
		||||
{
 | 
			
		||||
	if (start >= mSubStrings.length())
 | 
			
		||||
	{
 | 
			
		||||
		return NULL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	RegExSub sub = mSubStrings.at(start);
 | 
			
		||||
 | 
			
		||||
	return getSubstring(subject, sub.start, sub.end, buffer, max, outlen);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RegEx::MakeSubpatternsTable(int numSubpatterns)
 | 
			
		||||
{
 | 
			
		||||
	int nameCount = 0;
 | 
			
		||||
	int rc = pcre_fullinfo(re, NULL, PCRE_INFO_NAMECOUNT, &nameCount);
 | 
			
		||||
	
 | 
			
		||||
	if (rc < 0) 
 | 
			
		||||
	{
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (nameCount > 0) 
 | 
			
		||||
	{
 | 
			
		||||
		const char *nameTable;
 | 
			
		||||
		int nameSize = 0;
 | 
			
		||||
		int i = 0;
 | 
			
		||||
 | 
			
		||||
		int rc1 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMETABLE, &nameTable);
 | 
			
		||||
		int rc2 = pcre_fullinfo(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameSize);
 | 
			
		||||
 | 
			
		||||
		rc = rc2 ? rc2 : rc1;
 | 
			
		||||
 | 
			
		||||
		if (rc < 0)
 | 
			
		||||
		{
 | 
			
		||||
			mSubsNameTable.clear();
 | 
			
		||||
			return;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		NamedGroup data;
 | 
			
		||||
 | 
			
		||||
		while (i++ < nameCount) 
 | 
			
		||||
		{
 | 
			
		||||
			data.index = 0xff * (unsigned char)nameTable[0] + (unsigned char)nameTable[1];
 | 
			
		||||
			data.name = nameTable + 2;
 | 
			
		||||
 | 
			
		||||
			mSubsNameTable.append(ke::Move(data));
 | 
			
		||||
			nameTable += nameSize;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int RegEx::Replace(char *text, size_t textMaxLen, const char *replace, size_t replaceLen, int flags)
 | 
			
		||||
{
 | 
			
		||||
	char *output = text;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Retrieve all matches and store them in 
 | 
			
		||||
	 * mSubStrings list.
 | 
			
		||||
	 */
 | 
			
		||||
	if (MatchAll(output) == -1)
 | 
			
		||||
	{
 | 
			
		||||
		return -1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	size_t subjectLen = strlen(subject);
 | 
			
		||||
	size_t total = 0;
 | 
			
		||||
	size_t baseIndex = 0;
 | 
			
		||||
	size_t diffLength = 0;
 | 
			
		||||
 | 
			
		||||
	char *toReplace = new char[textMaxLen + 1];
 | 
			
		||||
	char *toSearch = NULL;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * All characters which is not matched are not copied when replacing matches.
 | 
			
		||||
	 * Then original text (output buffer) should be considerated as empty.
 | 
			
		||||
	 */
 | 
			
		||||
	if (flags & REGEX_FORMAT_NOCOPY)
 | 
			
		||||
	{
 | 
			
		||||
		*output = '\0';
 | 
			
		||||
	}
 | 
			
		||||
	else
 | 
			
		||||
	{
 | 
			
		||||
		/**
 | 
			
		||||
		 * This is used only when we do replace matches.
 | 
			
		||||
		 */
 | 
			
		||||
		toSearch  = new char[textMaxLen + 1];
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/** 
 | 
			
		||||
	 * Loop over all matches found.
 | 
			
		||||
	 */
 | 
			
		||||
	for (size_t i = 0; i < mMatchesSubs.length(); ++i)
 | 
			
		||||
	{
 | 
			
		||||
		char *ptr = toReplace;
 | 
			
		||||
 | 
			
		||||
		size_t browsed = 0;
 | 
			
		||||
		size_t searchLen = 0;
 | 
			
		||||
		size_t length = 0;
 | 
			
		||||
	
 | 
			
		||||
		/**
 | 
			
		||||
		 * Build the replace string as it can contain backreference
 | 
			
		||||
		 * and this needs to be parsed.
 | 
			
		||||
		 */
 | 
			
		||||
		for (const char *s = replace, *end = s + replaceLen; s < end && browsed <= textMaxLen; ++s, ++browsed)
 | 
			
		||||
		{
 | 
			
		||||
			unsigned int c = *s;
 | 
			
		||||
 | 
			
		||||
			/**
 | 
			
		||||
			 * Supported format specifiers:
 | 
			
		||||
			 *
 | 
			
		||||
			 *   $number  : Substitutes the substring matched by group number.
 | 
			
		||||
			 *              n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
 | 
			
		||||
			 *   ${name}  : Substitutes the substring matched by the named group name (a maximum of 32 characters).
 | 
			
		||||
			 *   $&       : Substitutes a copy of the whole match.
 | 
			
		||||
			 *   $`       : Substitutes all the text of the input string before the match.
 | 
			
		||||
			 *   $'       : Substitutes all the text of the input string after the match.
 | 
			
		||||
			 *   $+       : Substitutes the last group that was captured.
 | 
			
		||||
			 *   $_       : Substitutes the entire input string.
 | 
			
		||||
			 *   $$       : Substitutes a literal "$".
 | 
			
		||||
			 */
 | 
			
		||||
			if (c == '$' || c == '\\')
 | 
			
		||||
			{
 | 
			
		||||
				switch (*++s)
 | 
			
		||||
				{
 | 
			
		||||
					case '\0':
 | 
			
		||||
					{
 | 
			
		||||
						/**
 | 
			
		||||
						 * End of string.
 | 
			
		||||
						 * Copy one character.
 | 
			
		||||
						 */
 | 
			
		||||
						 *(ptr + browsed) = c;
 | 
			
		||||
						 break;
 | 
			
		||||
					}
 | 
			
		||||
					case '&':
 | 
			
		||||
					{
 | 
			
		||||
						/**
 | 
			
		||||
						 * Concatenate retrieved full match sub-string.
 | 
			
		||||
						 * length - 1 to overwrite EOS.
 | 
			
		||||
						 */
 | 
			
		||||
						GetSubstring(baseIndex, ptr + browsed, textMaxLen, &length);
 | 
			
		||||
						browsed += length - 1;
 | 
			
		||||
						break;
 | 
			
		||||
					}
 | 
			
		||||
					case '`':
 | 
			
		||||
					{
 | 
			
		||||
						/**
 | 
			
		||||
						 * Concatenate part of original text up to
 | 
			
		||||
						 * first sub-string position.
 | 
			
		||||
						 */
 | 
			
		||||
						length = mSubStrings.at(baseIndex).start;
 | 
			
		||||
						memcpy(ptr + browsed, subject, length);
 | 
			
		||||
						browsed += length - 1;
 | 
			
		||||
						break;
 | 
			
		||||
					}
 | 
			
		||||
					case '\'':
 | 
			
		||||
					{
 | 
			
		||||
						/**
 | 
			
		||||
						 * Concatenate part of original text from
 | 
			
		||||
						 * last sub-string end position to EOS.
 | 
			
		||||
						 */
 | 
			
		||||
						length = mSubStrings.at(baseIndex).end;
 | 
			
		||||
						memcpy(ptr + browsed, subject + length, subjectLen - length);
 | 
			
		||||
						browsed += (subjectLen - length) - 1;
 | 
			
		||||
						break;
 | 
			
		||||
					}
 | 
			
		||||
					case '+':
 | 
			
		||||
					{
 | 
			
		||||
						/**
 | 
			
		||||
						 * Copy the last group that was captured.
 | 
			
		||||
						 */
 | 
			
		||||
						GetSubstring(baseIndex + mMatchesSubs.at(i) - 1, ptr + browsed, textMaxLen, &length);
 | 
			
		||||
						browsed += length - 1;
 | 
			
		||||
						break;
 | 
			
		||||
					}
 | 
			
		||||
					case '_':
 | 
			
		||||
					{
 | 
			
		||||
						/**
 | 
			
		||||
						 * Copy the entire input string.
 | 
			
		||||
						 */
 | 
			
		||||
						memcpy(ptr + browsed, subject, subjectLen);
 | 
			
		||||
						browsed += (subjectLen - 1);
 | 
			
		||||
						break;
 | 
			
		||||
					}
 | 
			
		||||
					case '$':
 | 
			
		||||
					case '\\':
 | 
			
		||||
					{
 | 
			
		||||
						/**
 | 
			
		||||
						 * Copy the single character $ or \.
 | 
			
		||||
						 */
 | 
			
		||||
						*(ptr + browsed) = c;
 | 
			
		||||
						break;
 | 
			
		||||
					}
 | 
			
		||||
					case '0': case '1':	case '2': case '3':	case '4': 
 | 
			
		||||
					case '5': case '6': case '7': case '8': case '9':
 | 
			
		||||
					case '{':
 | 
			
		||||
					{
 | 
			
		||||
						/**
 | 
			
		||||
						 * Checking backreference.
 | 
			
		||||
						 * Which can be either $n, ${n} or ${name}.
 | 
			
		||||
						 */
 | 
			
		||||
						int backref = -1;
 | 
			
		||||
						const char *walk = s;
 | 
			
		||||
						bool inBrace = false;
 | 
			
		||||
						bool nameCheck = false;
 | 
			
		||||
 | 
			
		||||
						/**
 | 
			
		||||
						 * ${nn}.
 | 
			
		||||
						 *  ^
 | 
			
		||||
						 */
 | 
			
		||||
						if (*walk == '{') 
 | 
			
		||||
						{
 | 
			
		||||
							inBrace = true;
 | 
			
		||||
							++walk;
 | 
			
		||||
						}
 | 
			
		||||
 | 
			
		||||
						/**
 | 
			
		||||
						 * Valid number.
 | 
			
		||||
						 * $nn or ${nn}
 | 
			
		||||
						 *  ^       ^
 | 
			
		||||
						 */
 | 
			
		||||
						if (*walk >= '0' && *walk <= '9')
 | 
			
		||||
						{
 | 
			
		||||
							backref = *walk - '0';
 | 
			
		||||
							++walk;
 | 
			
		||||
						}
 | 
			
		||||
						else if (inBrace)
 | 
			
		||||
						{
 | 
			
		||||
							nameCheck = true;
 | 
			
		||||
 | 
			
		||||
							/**
 | 
			
		||||
							 * Not a valid number.
 | 
			
		||||
							 * Checking as string.
 | 
			
		||||
							 * ${name}
 | 
			
		||||
							 *   ^
 | 
			
		||||
							 */
 | 
			
		||||
							if (*walk)
 | 
			
		||||
							{
 | 
			
		||||
								const char *pch = strchr(walk, '}');
 | 
			
		||||
 | 
			
		||||
								if (pch != NULL)
 | 
			
		||||
								{
 | 
			
		||||
									/**
 | 
			
		||||
									 * A named group maximum character is 32 (PCRE).
 | 
			
		||||
									 */
 | 
			
		||||
									char name[32];
 | 
			
		||||
									size_t nameLength = strncopy(name, walk, pch - walk + 1);
 | 
			
		||||
 | 
			
		||||
									int flags, num = 0;
 | 
			
		||||
									pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &flags);
 | 
			
		||||
 | 
			
		||||
									/**
 | 
			
		||||
									 * If PCRE_DUPNAMES is set, the pcre_copy_named_substring function should be used
 | 
			
		||||
									 * as pcre_get_stringnumber output order is not defined.
 | 
			
		||||
									 */
 | 
			
		||||
									if (flags & PCRE_DUPNAMES)
 | 
			
		||||
									{
 | 
			
		||||
										memset(ovector, 0, REGEX_MAX_SUBPATTERNS);
 | 
			
		||||
 | 
			
		||||
										/**
 | 
			
		||||
										 * pcre_copy_named_substring needs a vector containing sub-patterns ranges
 | 
			
		||||
										 * for a given match.
 | 
			
		||||
										 */
 | 
			
		||||
										for (size_t j = 0; j < mMatchesSubs.at(i); ++j)
 | 
			
		||||
										{
 | 
			
		||||
											ovector[2 * j] = mSubStrings.at(baseIndex + j).start;
 | 
			
		||||
											ovector[2 * j + 1] = mSubStrings.at(baseIndex + j).end;
 | 
			
		||||
										}
 | 
			
		||||
 | 
			
		||||
										num = pcre_copy_named_substring(re, subject, ovector, mMatchesSubs.at(i), name, ptr + browsed, (int)textMaxLen);
 | 
			
		||||
 | 
			
		||||
										if (num != PCRE_ERROR_NOSUBSTRING)
 | 
			
		||||
										{
 | 
			
		||||
											browsed += num - 1;
 | 
			
		||||
											s = pch;
 | 
			
		||||
											break;
 | 
			
		||||
										}
 | 
			
		||||
										++pch;
 | 
			
		||||
									}
 | 
			
		||||
									else
 | 
			
		||||
									{
 | 
			
		||||
										/**
 | 
			
		||||
										 * Retrieve sub-pattern index from a give name.
 | 
			
		||||
										 */
 | 
			
		||||
										num = pcre_get_stringnumber(re, name);
 | 
			
		||||
										if (num != PCRE_ERROR_NOSUBSTRING)
 | 
			
		||||
										{
 | 
			
		||||
											backref = num;
 | 
			
		||||
											walk = ++pch;
 | 
			
		||||
										}
 | 
			
		||||
									}
 | 
			
		||||
 | 
			
		||||
									if (num == PCRE_ERROR_NOSUBSTRING || num >= (int)mMatchesSubs.at(i))
 | 
			
		||||
									{
 | 
			
		||||
										/**
 | 
			
		||||
										 * If a sub-string for a given match is not found,  or if > to
 | 
			
		||||
										 * number of sub-patterns we still need to check if this 
 | 
			
		||||
										 * group name is a valid one because if so we want to escape it. 
 | 
			
		||||
										 * Looking at the name table.
 | 
			
		||||
										 */
 | 
			
		||||
										bool found = false;
 | 
			
		||||
										for (size_t i = 0; i < mSubsNameTable.length(); ++i)
 | 
			
		||||
										{
 | 
			
		||||
											if (!mSubsNameTable.at(i).name.compare(name))
 | 
			
		||||
											{
 | 
			
		||||
												--browsed;
 | 
			
		||||
												s = --pch;
 | 
			
		||||
												found = true;
 | 
			
		||||
												break;
 | 
			
		||||
											}
 | 
			
		||||
										}
 | 
			
		||||
 | 
			
		||||
										if (found)
 | 
			
		||||
										{
 | 
			
		||||
											continue;
 | 
			
		||||
										}
 | 
			
		||||
									}
 | 
			
		||||
								}
 | 
			
		||||
							}
 | 
			
		||||
						}
 | 
			
		||||
 | 
			
		||||
						if (!nameCheck)
 | 
			
		||||
						{
 | 
			
		||||
							/**
 | 
			
		||||
							 * Valid second number.
 | 
			
		||||
							 * $nn or ${nn}
 | 
			
		||||
							 *   ^       ^
 | 
			
		||||
							 */
 | 
			
		||||
							if (*walk && *walk >= '0' && *walk <= '9')
 | 
			
		||||
							{
 | 
			
		||||
								backref = backref * 10 + *walk - '0';
 | 
			
		||||
								++walk;
 | 
			
		||||
							}
 | 
			
		||||
 | 
			
		||||
							if (inBrace)
 | 
			
		||||
							{
 | 
			
		||||
								/**
 | 
			
		||||
								 * Invalid specifier
 | 
			
		||||
								 * Either hit EOS or missing }.
 | 
			
		||||
								 * ${n  or ${nn  or ${nx or ${nnx
 | 
			
		||||
								 *    ^        ^       ^        ^
 | 
			
		||||
								 */
 | 
			
		||||
								if (*walk == '\0' || *walk != '}')
 | 
			
		||||
								{
 | 
			
		||||
									backref = -1;
 | 
			
		||||
								}
 | 
			
		||||
								else
 | 
			
		||||
								{
 | 
			
		||||
									++walk;
 | 
			
		||||
								}
 | 
			
		||||
							}
 | 
			
		||||
						}
 | 
			
		||||
 | 
			
		||||
						length = walk - s;
 | 
			
		||||
						s = --walk;
 | 
			
		||||
 | 
			
		||||
						/**
 | 
			
		||||
						 * We can't provide a capture number >= to total that pcre_exec has found.
 | 
			
		||||
						 * 0 is implicitly accepted, same behavior as $&.
 | 
			
		||||
						 */
 | 
			
		||||
						if (backref >= 0 && (int)backref < mNumSubpatterns)
 | 
			
		||||
						{
 | 
			
		||||
							/**
 | 
			
		||||
							 * Valid available index for a given match.
 | 
			
		||||
							 */
 | 
			
		||||
							if (backref < mMatchesSubs.at(i))
 | 
			
		||||
							{
 | 
			
		||||
								/**
 | 
			
		||||
								 * Concatenate retrieved sub-string.
 | 
			
		||||
								 * length - 1 to overwrite EOS.
 | 
			
		||||
								 */
 | 
			
		||||
								GetSubstring(baseIndex + backref, ptr + browsed, textMaxLen, &length);
 | 
			
		||||
								browsed += length - 1;
 | 
			
		||||
							}
 | 
			
		||||
							else
 | 
			
		||||
							{
 | 
			
		||||
								/**
 | 
			
		||||
								 * Valid unavailable index for a given match.
 | 
			
		||||
								 */
 | 
			
		||||
								--browsed;
 | 
			
		||||
							}
 | 
			
		||||
						}
 | 
			
		||||
						else
 | 
			
		||||
						{
 | 
			
		||||
							/**
 | 
			
		||||
							 * If we here it means the syntax is valid but sub-pattern doesn't exist. 
 | 
			
		||||
							 * So, copy as it is, including $.
 | 
			
		||||
							 */
 | 
			
		||||
							memcpy(ptr + browsed, s - length, length + 1);
 | 
			
		||||
							browsed += length;
 | 
			
		||||
						}
 | 
			
		||||
 | 
			
		||||
						break;
 | 
			
		||||
					}
 | 
			
		||||
					default:
 | 
			
		||||
					{
 | 
			
		||||
						/**
 | 
			
		||||
						 * Not a valid format modifier.
 | 
			
		||||
						 * So we copy characters as it is.
 | 
			
		||||
						 */
 | 
			
		||||
						*(ptr + browsed) = *s;
 | 
			
		||||
						break;
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			else
 | 
			
		||||
			{
 | 
			
		||||
				/**
 | 
			
		||||
				 * At this point, direct copy.
 | 
			
		||||
				 */
 | 
			
		||||
				*(ptr + browsed) = c;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		*(ptr + browsed) = '\0';
 | 
			
		||||
 | 
			
		||||
		/**
 | 
			
		||||
		 * Concatenate only replace string of each match, 
 | 
			
		||||
		 * as we don't want to copy unmatched characters.
 | 
			
		||||
		 */
 | 
			
		||||
		if (flags & REGEX_FORMAT_NOCOPY)
 | 
			
		||||
		{
 | 
			
		||||
			/**
 | 
			
		||||
			 * We want just the first occurrence.
 | 
			
		||||
			 */
 | 
			
		||||
			if (total++ && (flags & REGEX_FORMAT_FIRSTONLY))
 | 
			
		||||
			{
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			strncat(output, toReplace, textMaxLen + 1);
 | 
			
		||||
		}
 | 
			
		||||
		else
 | 
			
		||||
		{
 | 
			
		||||
			/**
 | 
			
		||||
			 * Retrieves full string of a given match.
 | 
			
		||||
			 */
 | 
			
		||||
			const char *search = GetSubstring(baseIndex, toSearch, textMaxLen, &searchLen);
 | 
			
		||||
 | 
			
		||||
			/**
 | 
			
		||||
			 * We get something to replace, but the sub-pattern to search is empty.
 | 
			
		||||
			 * We insert replacement either a the start end or string.
 | 
			
		||||
			 */
 | 
			
		||||
			if (*toReplace && !searchLen)
 | 
			
		||||
			{
 | 
			
		||||
				if (output - text > 0)
 | 
			
		||||
				{
 | 
			
		||||
					strncat(output, toReplace, textMaxLen);
 | 
			
		||||
				}
 | 
			
		||||
				else
 | 
			
		||||
				{
 | 
			
		||||
					strncat(toReplace, text, textMaxLen);
 | 
			
		||||
					strncopy(text, toReplace, strlen(toReplace) + 1);
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				++total;
 | 
			
		||||
			}
 | 
			
		||||
			else if ((output = UTIL_ReplaceEx(text + mSubStrings.at(baseIndex).start + diffLength, textMaxLen, search, searchLen, toReplace, browsed, false)) != NULL)
 | 
			
		||||
			{
 | 
			
		||||
				/**
 | 
			
		||||
				 * Then we simply do a replace.
 | 
			
		||||
				 * Probably not the most efficient, but this should be at least safe.
 | 
			
		||||
				 * To avoid issue where the function could find a string which is not at the expected index,
 | 
			
		||||
				 * We force the input string to start from index of the full match.
 | 
			
		||||
				 */
 | 
			
		||||
				++total;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if (total && (flags & REGEX_FORMAT_FIRSTONLY))
 | 
			
		||||
			{
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/**
 | 
			
		||||
		 * mMatchesSubs is a flat list containing all sub-patterns of all matches.
 | 
			
		||||
		 * A number of sub-patterns can vary per match. So we calculate the position in the list, 
 | 
			
		||||
		 * from where the first sub-pattern result of current match starts.
 | 
			
		||||
		 */
 | 
			
		||||
		baseIndex  += mMatchesSubs.at(i);
 | 
			
		||||
		diffLength += browsed - searchLen;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	delete[] toReplace;
 | 
			
		||||
	
 | 
			
		||||
	if (toSearch != NULL)
 | 
			
		||||
	{
 | 
			
		||||
		delete[] toSearch;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Return the number of successful replacements.
 | 
			
		||||
	 */
 | 
			
		||||
	return total;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,27 +1,94 @@
 | 
			
		||||
/* AMX Mod X
 | 
			
		||||
 *   Regular Expressions Module
 | 
			
		||||
 *
 | 
			
		||||
 * by the AMX Mod X Development Team
 | 
			
		||||
 *
 | 
			
		||||
 * This file is part of AMX Mod X.
 | 
			
		||||
 *
 | 
			
		||||
 *
 | 
			
		||||
 *  This program is free software; you can redistribute it and/or modify it
 | 
			
		||||
 *  under the terms of the GNU General Public License as published by the
 | 
			
		||||
 *  Free Software Foundation; either version 2 of the License, or (at
 | 
			
		||||
 *  your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 *  This program is distributed in the hope that it will be useful, but
 | 
			
		||||
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 | 
			
		||||
 *  General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 *  You should have received a copy of the GNU General Public License
 | 
			
		||||
 *  along with this program; if not, write to the Free Software Foundation,
 | 
			
		||||
 *  Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 | 
			
		||||
 *
 | 
			
		||||
 *  In addition, as a special exception, the author gives permission to
 | 
			
		||||
 *  link the code of this program with the Half-Life Game Engine ("HL
 | 
			
		||||
 *  Engine") and Modified Game Libraries ("MODs") developed by Valve,
 | 
			
		||||
 *  L.L.C ("Valve"). You must obey the GNU General Public License in all
 | 
			
		||||
 *  respects for all of the code used other than the HL Engine and MODs
 | 
			
		||||
 *  from Valve. If you modify this file, you may extend this exception
 | 
			
		||||
 *  to your version of the file, but you are not obligated to do so. If
 | 
			
		||||
 *  you do not wish to do so, delete this exception statement from your
 | 
			
		||||
 *  version.
 | 
			
		||||
 */
 | 
			
		||||
#ifndef _INCLUDE_CREGEX_H
 | 
			
		||||
#define _INCLUDE_CREGEX_H
 | 
			
		||||
 
 | 
			
		||||
#include <am-vector.h>
 | 
			
		||||
#include <am-string.h>
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Maximum number of sub-patterns, here 50 (this should be a multiple of 3).
 | 
			
		||||
 */
 | 
			
		||||
#define REGEX_MAX_SUBPATTERNS 150
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Flags to used with regex_replace, to control the replacement behavior.
 | 
			
		||||
 */
 | 
			
		||||
#define REGEX_FORMAT_DEFAULT   0  // Uses the standard formatting rules to replace matches.
 | 
			
		||||
#define REGEX_FORMAT_NOCOPY    1  // The sections that do not match the regular expression are not copied when replacing matches.
 | 
			
		||||
#define REGEX_FORMAT_FIRSTONLY 2  // Only the first occurrence of a regular expression is replaced.
 | 
			
		||||
 | 
			
		||||
class RegEx
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
	struct RegExSub {
 | 
			
		||||
		int start, end;
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	struct NamedGroup {
 | 
			
		||||
		ke::AString name;
 | 
			
		||||
		size_t index;
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	RegEx();
 | 
			
		||||
	~RegEx();
 | 
			
		||||
 | 
			
		||||
	bool isFree(bool set=false, bool val=false);
 | 
			
		||||
	void Clear();
 | 
			
		||||
 | 
			
		||||
	int Compile(const char *pattern, const char* flags = NULL);
 | 
			
		||||
	int Compile(const char *pattern, int iFlags);
 | 
			
		||||
	int Match(const char *str);
 | 
			
		||||
	int MatchAll(const char *str);
 | 
			
		||||
	int Replace(char *text, size_t text_maxlen, const char *replace, size_t replaceLen, int flags = 0);
 | 
			
		||||
	void ClearMatch();
 | 
			
		||||
	const char *GetSubstring(int s, char buffer[], int max);
 | 
			
		||||
	const char *GetSubstring(size_t start, char buffer[], size_t max, size_t *outlen = NULL);
 | 
			
		||||
	void MakeSubpatternsTable(int numSubpatterns);
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
	int mErrorOffset;
 | 
			
		||||
	const char *mError;
 | 
			
		||||
	int mSubStrings;
 | 
			
		||||
	int Count() { return mSubStrings.length(); }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
	pcre *re;
 | 
			
		||||
	bool mFree;
 | 
			
		||||
	int ovector[30];
 | 
			
		||||
	int ovector[REGEX_MAX_SUBPATTERNS];
 | 
			
		||||
	char *subject;
 | 
			
		||||
	ke::Vector<RegExSub> mSubStrings;
 | 
			
		||||
	ke::Vector<size_t> mMatchesSubs;
 | 
			
		||||
	ke::Vector<NamedGroup> mSubsNameTable;
 | 
			
		||||
	int mNumSubpatterns;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif //_INCLUDE_CREGEX_H
 | 
			
		||||
 
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							@@ -1,24 +1,59 @@
 | 
			
		||||
/* AMX Mod X
 | 
			
		||||
 *   Regular Expressions Module
 | 
			
		||||
 *
 | 
			
		||||
 * by the AMX Mod X Development Team
 | 
			
		||||
 *
 | 
			
		||||
 * This file is part of AMX Mod X.
 | 
			
		||||
 *
 | 
			
		||||
 *
 | 
			
		||||
 *  This program is free software; you can redistribute it and/or modify it
 | 
			
		||||
 *  under the terms of the GNU General Public License as published by the
 | 
			
		||||
 *  Free Software Foundation; either version 2 of the License, or (at
 | 
			
		||||
 *  your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 *  This program is distributed in the hope that it will be useful, but
 | 
			
		||||
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 | 
			
		||||
 *  General Public License for more details.
 | 
			
		||||
 * 
 | 
			
		||||
 *  You should have received a copy of the GNU General Public License
 | 
			
		||||
 *  along with this program; if not, write to the Free Software Foundation,
 | 
			
		||||
 *  Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 | 
			
		||||
 *
 | 
			
		||||
 *  In addition, as a special exception, the author gives permission to
 | 
			
		||||
 *  link the code of this program with the Half-Life Game Engine ("HL
 | 
			
		||||
 *  Engine") and Modified Game Libraries ("MODs") developed by Valve,
 | 
			
		||||
 *  L.L.C ("Valve"). You must obey the GNU General Public License in all
 | 
			
		||||
 *  respects for all of the code used other than the HL Engine and MODs
 | 
			
		||||
 *  from Valve. If you modify this file, you may extend this exception
 | 
			
		||||
 *  to your version of the file, but you are not obligated to do so. If
 | 
			
		||||
 *  you do not wish to do so, delete this exception statement from your
 | 
			
		||||
 *  version.
 | 
			
		||||
 */
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include "pcre.h"
 | 
			
		||||
#include "amxxmodule.h"
 | 
			
		||||
#include "CVector.h"
 | 
			
		||||
#include <am-vector.h>
 | 
			
		||||
#include <am-utility.h>
 | 
			
		||||
#include "CRegEx.h"
 | 
			
		||||
#include "utils.h"
 | 
			
		||||
 | 
			
		||||
CVector<RegEx *> PEL;
 | 
			
		||||
ke::Vector<RegEx *> PEL;
 | 
			
		||||
 | 
			
		||||
int GetPEL()
 | 
			
		||||
{
 | 
			
		||||
	for (int i=0; i<(int)PEL.size(); i++)
 | 
			
		||||
	for (int i=0; i<(int)PEL.length(); i++)
 | 
			
		||||
	{
 | 
			
		||||
		if (PEL[i]->isFree())
 | 
			
		||||
			return i;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	RegEx *x = new RegEx();
 | 
			
		||||
	PEL.push_back(x);
 | 
			
		||||
	PEL.append(x);
 | 
			
		||||
 | 
			
		||||
	return (int)PEL.size() - 1;
 | 
			
		||||
	return (int)PEL.length() - 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// native Regex:regex_compile(const pattern[], &ret, error[], maxLen, const flags[]="");
 | 
			
		||||
static cell AMX_NATIVE_CALL regex_compile(AMX *amx, cell *params)
 | 
			
		||||
{
 | 
			
		||||
@@ -31,17 +66,36 @@ static cell AMX_NATIVE_CALL regex_compile(AMX *amx, cell *params)
 | 
			
		||||
	
 | 
			
		||||
	if (x->Compile(regex, flags) == 0)
 | 
			
		||||
	{
 | 
			
		||||
		cell *eOff = MF_GetAmxAddr(amx, params[2]);
 | 
			
		||||
		const char *err = x->mError;
 | 
			
		||||
		*eOff = x->mErrorOffset;
 | 
			
		||||
		*MF_GetAmxAddr(amx, params[2]) = x->mErrorOffset;
 | 
			
		||||
		MF_SetAmxString(amx, params[3], err?err:"unknown", params[4]);
 | 
			
		||||
		return -1;
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	return id+1;
 | 
			
		||||
}// 1.8 includes the last parameter
 | 
			
		||||
// Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
 | 
			
		||||
static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// native Regex:regex_compile_ex(const pattern[], flags = 0, error[] = "", maxLen = 0, &errcode = 0);
 | 
			
		||||
static cell AMX_NATIVE_CALL regex_compile_ex(AMX *amx, cell *params)
 | 
			
		||||
{
 | 
			
		||||
	int len;
 | 
			
		||||
	const char *regex = MF_GetAmxString(amx, params[1], 0, &len);
 | 
			
		||||
 | 
			
		||||
	int id = GetPEL();
 | 
			
		||||
	RegEx *x = PEL[id];
 | 
			
		||||
 | 
			
		||||
	if (x->Compile(regex, params[2]) == 0)
 | 
			
		||||
	{
 | 
			
		||||
		const char *err = x->mError;
 | 
			
		||||
		*MF_GetAmxAddr(amx, params[5]) = x->mErrorOffset;
 | 
			
		||||
		MF_SetAmxString(amx, params[3], err ? err : "unknown", params[4]);
 | 
			
		||||
		return -1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return id + 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
cell match(AMX *amx, cell *params, bool all)
 | 
			
		||||
{
 | 
			
		||||
	int len;
 | 
			
		||||
	const char *str = MF_GetAmxString(amx, params[1], 0, &len);
 | 
			
		||||
@@ -49,101 +103,166 @@ static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
 | 
			
		||||
 | 
			
		||||
	int id = GetPEL();
 | 
			
		||||
	RegEx *x = PEL[id];
 | 
			
		||||
	
 | 
			
		||||
	char* flags = NULL;
 | 
			
		||||
	
 | 
			
		||||
	if ((params[0] / sizeof(cell)) >= 6) // compiled with 1.8's extra parameter
 | 
			
		||||
 | 
			
		||||
	char *flags = NULL;
 | 
			
		||||
	cell *errorCode;
 | 
			
		||||
	int result = 0;
 | 
			
		||||
 | 
			
		||||
	if (!all)
 | 
			
		||||
	{
 | 
			
		||||
		flags = MF_GetAmxString(amx, params[6], 2, &len);
 | 
			
		||||
		if (*params / sizeof(cell) >= 6) // compiled with 1.8's extra parameter
 | 
			
		||||
		{
 | 
			
		||||
			flags = MF_GetAmxString(amx, params[6], 2, &len);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		result = x->Compile(regex, flags);
 | 
			
		||||
		errorCode = MF_GetAmxAddr(amx, params[3]);
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	if (x->Compile(regex, flags) == 0)
 | 
			
		||||
	else
 | 
			
		||||
	{
 | 
			
		||||
		result = x->Compile(regex, params[3]);
 | 
			
		||||
		errorCode = MF_GetAmxAddr(amx, params[6]);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!result)
 | 
			
		||||
	{
 | 
			
		||||
		cell *eOff = MF_GetAmxAddr(amx, params[3]);
 | 
			
		||||
		const char *err = x->mError;
 | 
			
		||||
		*eOff = x->mErrorOffset;
 | 
			
		||||
		MF_SetAmxString(amx, params[4], err?err:"unknown", params[5]);
 | 
			
		||||
		*errorCode = x->mErrorOffset;
 | 
			
		||||
		MF_SetAmxString(amx, params[4], err ? err : "unknown", params[5]);
 | 
			
		||||
		return -1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	int e = x->Match(str);
 | 
			
		||||
	int e;
 | 
			
		||||
 | 
			
		||||
	if (all)
 | 
			
		||||
		e = x->MatchAll(str);
 | 
			
		||||
	else
 | 
			
		||||
		e = x->Match(str);
 | 
			
		||||
 | 
			
		||||
	if (e == -1)
 | 
			
		||||
	{
 | 
			
		||||
		/* there was a match error.  destroy this and move on. */
 | 
			
		||||
		cell *res = MF_GetAmxAddr(amx, params[3]);
 | 
			
		||||
		*res = x->mErrorOffset;
 | 
			
		||||
		*errorCode = x->mErrorOffset;
 | 
			
		||||
		x->Clear();
 | 
			
		||||
		return -2;
 | 
			
		||||
	} else if (e == 0) {
 | 
			
		||||
		cell *res = MF_GetAmxAddr(amx, params[3]);
 | 
			
		||||
		*res = 0;
 | 
			
		||||
	}
 | 
			
		||||
	else if (e == 0) 
 | 
			
		||||
	{
 | 
			
		||||
		*errorCode = 0;
 | 
			
		||||
		x->Clear();
 | 
			
		||||
		return 0;
 | 
			
		||||
	} else {
 | 
			
		||||
		cell *res = MF_GetAmxAddr(amx, params[3]);
 | 
			
		||||
		*res = x->mSubStrings;
 | 
			
		||||
	}
 | 
			
		||||
	else 
 | 
			
		||||
	{
 | 
			
		||||
		*errorCode = x->Count();
 | 
			
		||||
		if (all)
 | 
			
		||||
			return x->Count();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return id+1;
 | 
			
		||||
	return id + 1;
 | 
			
		||||
}
 | 
			
		||||
// native regex_match_c(const string[], Regex:id, &ret);
 | 
			
		||||
static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params)
 | 
			
		||||
{
 | 
			
		||||
	int len;
 | 
			
		||||
	int id = params[2]-1;
 | 
			
		||||
	const char *str = MF_GetAmxString(amx, params[1], 0, &len);
 | 
			
		||||
 | 
			
		||||
	if (id >= (int)PEL.size() || id < 0 || PEL[id]->isFree())
 | 
			
		||||
// native Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
 | 
			
		||||
static cell AMX_NATIVE_CALL regex_match(AMX *amx, cell *params)
 | 
			
		||||
{
 | 
			
		||||
	return match(amx, params, false);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// native Regex:regex_match_all(const string[], const pattern[], flags = 0, error[] = "", maxLen = 0, &errcode = 0);
 | 
			
		||||
static cell AMX_NATIVE_CALL regex_match_all(AMX *amx, cell *params)
 | 
			
		||||
{
 | 
			
		||||
	return match(amx, params, true);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
cell match_c(AMX *amx, cell *params, bool all)
 | 
			
		||||
{
 | 
			
		||||
	int id = params[2] - 1;
 | 
			
		||||
 | 
			
		||||
	if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
 | 
			
		||||
	{
 | 
			
		||||
		MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id);
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
 | 
			
		||||
	int len;
 | 
			
		||||
	const char *str = MF_GetAmxString(amx, params[1], 0, &len);
 | 
			
		||||
	cell *errorCode = MF_GetAmxAddr(amx, params[3]);
 | 
			
		||||
 | 
			
		||||
	RegEx *x = PEL[id];
 | 
			
		||||
 | 
			
		||||
	int e = x->Match(str);
 | 
			
		||||
	int e;
 | 
			
		||||
	if (all)
 | 
			
		||||
		e = x->MatchAll(str);
 | 
			
		||||
	else
 | 
			
		||||
		e = x->Match(str);
 | 
			
		||||
 | 
			
		||||
	if (e == -1)
 | 
			
		||||
	{
 | 
			
		||||
		/* there was a match error.  move on. */
 | 
			
		||||
		cell *res = MF_GetAmxAddr(amx, params[3]);
 | 
			
		||||
		*res = x->mErrorOffset;
 | 
			
		||||
		*errorCode = x->mErrorOffset;
 | 
			
		||||
 | 
			
		||||
		/* only clear the match results, since the regex object
 | 
			
		||||
		   may still be referenced later */
 | 
			
		||||
		may still be referenced later */
 | 
			
		||||
		x->ClearMatch();
 | 
			
		||||
		return -2;
 | 
			
		||||
	} else if (e == 0) {
 | 
			
		||||
		cell *res = MF_GetAmxAddr(amx, params[3]);
 | 
			
		||||
		*res = 0;
 | 
			
		||||
	}
 | 
			
		||||
	else if (e == 0) 
 | 
			
		||||
	{
 | 
			
		||||
		*errorCode = 0;
 | 
			
		||||
 | 
			
		||||
		/* only clear the match results, since the regex object
 | 
			
		||||
		   may still be referenced later */
 | 
			
		||||
		may still be referenced later */
 | 
			
		||||
		x->ClearMatch();
 | 
			
		||||
		return 0;
 | 
			
		||||
	} else {
 | 
			
		||||
		cell *res = MF_GetAmxAddr(amx, params[3]);
 | 
			
		||||
		*res = x->mSubStrings;
 | 
			
		||||
		return x->mSubStrings;
 | 
			
		||||
	}
 | 
			
		||||
	else 
 | 
			
		||||
	{
 | 
			
		||||
		*errorCode = x->Count();
 | 
			
		||||
		return x->Count();
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// native regex_match_c(const string[], Regex:id, &ret);
 | 
			
		||||
static cell AMX_NATIVE_CALL regex_match_c(AMX *amx, cell *params)
 | 
			
		||||
{
 | 
			
		||||
	return match_c(amx, params, false);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// native regex_match_all_c(const string[], Regex:id, &ret);
 | 
			
		||||
static cell AMX_NATIVE_CALL regex_match_all_c(AMX *amx, cell *params)
 | 
			
		||||
{
 | 
			
		||||
	return match_c(amx, params, true);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// native regex_substr(Regex:id, str_id, buffer[], maxLen);
 | 
			
		||||
static cell AMX_NATIVE_CALL regex_substr(AMX *amx, cell *params)
 | 
			
		||||
{
 | 
			
		||||
	int id = params[1]-1;
 | 
			
		||||
	if (id >= (int)PEL.size() || id < 0 || PEL[id]->isFree())
 | 
			
		||||
	if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
 | 
			
		||||
	{
 | 
			
		||||
		MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id);
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	RegEx *x = PEL[id];
 | 
			
		||||
	//good idea? probably not.
 | 
			
		||||
	static char buffer[4096];
 | 
			
		||||
	static char buffer[16384]; // Same as AMXX buffer.
 | 
			
		||||
 | 
			
		||||
	const char *ret = x->GetSubstring(params[2], buffer, 4095);
 | 
			
		||||
	size_t length;
 | 
			
		||||
	size_t maxLength = ke::Min<size_t>(params[4], sizeof(buffer) - 1);
 | 
			
		||||
 | 
			
		||||
	const char *ret = x->GetSubstring(params[2], buffer, maxLength, &length);
 | 
			
		||||
 | 
			
		||||
	if (ret == NULL)
 | 
			
		||||
	{
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	MF_SetAmxString(amx, params[3], ret, params[4]);
 | 
			
		||||
	if (length >= maxLength && ret[length - 1] & 1 << 7)
 | 
			
		||||
	{
 | 
			
		||||
		maxLength -= UTIL_CheckValidChar((char *)ret + length - 1);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	MF_SetAmxString(amx, params[3], ret, maxLength);
 | 
			
		||||
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
@@ -154,7 +273,7 @@ static cell AMX_NATIVE_CALL regex_free(AMX *amx, cell *params)
 | 
			
		||||
	int id = *c;
 | 
			
		||||
	*c = 0;
 | 
			
		||||
	id -= 1;
 | 
			
		||||
	if (id >= (int)PEL.size() || id < 0 || PEL[id]->isFree())
 | 
			
		||||
	if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
 | 
			
		||||
	{
 | 
			
		||||
		MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id);
 | 
			
		||||
		return 0;
 | 
			
		||||
@@ -166,11 +285,52 @@ static cell AMX_NATIVE_CALL regex_free(AMX *amx, cell *params)
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//native regex_replace(Regex:pattern, string[], maxLen, const replace[], flags = REGEX_FORMAT_DEFAULT, &errcode = 0);
 | 
			
		||||
static cell AMX_NATIVE_CALL regex_replace(AMX *amx, cell *params)
 | 
			
		||||
{
 | 
			
		||||
	int id = params[1] - 1;
 | 
			
		||||
	if (id >= (int)PEL.length() || id < 0 || PEL[id]->isFree())
 | 
			
		||||
	{
 | 
			
		||||
		MF_LogError(amx, AMX_ERR_NATIVE, "Invalid regex handle %d", id);
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	int textLen, replaceLen;
 | 
			
		||||
	char *text = MF_GetAmxString(amx, params[2], 0, &textLen);
 | 
			
		||||
	const char *replace = MF_GetAmxString(amx, params[4], 1, &replaceLen);
 | 
			
		||||
 | 
			
		||||
	cell *erroCode = MF_GetAmxAddr(amx, params[6]);
 | 
			
		||||
 | 
			
		||||
	RegEx *x = PEL[id]; 
 | 
			
		||||
	int e = x->Replace(text, params[3] + 1, replace, replaceLen, params[5]);
 | 
			
		||||
 | 
			
		||||
	if (e == -1)
 | 
			
		||||
	{
 | 
			
		||||
		*erroCode = x->mErrorOffset;
 | 
			
		||||
		x->ClearMatch();
 | 
			
		||||
		return -2;
 | 
			
		||||
	}
 | 
			
		||||
	else if (e == 0)
 | 
			
		||||
	{
 | 
			
		||||
		*erroCode = 0;
 | 
			
		||||
		x->ClearMatch();
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	MF_SetAmxString(amx, params[2], text, params[3]);
 | 
			
		||||
 | 
			
		||||
	return e;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AMX_NATIVE_INFO regex_Natives[] = {
 | 
			
		||||
	{"regex_compile",			regex_compile},
 | 
			
		||||
	{"regex_compile_ex",		regex_compile_ex},
 | 
			
		||||
	{"regex_match",				regex_match},
 | 
			
		||||
	{"regex_match_c",			regex_match_c},
 | 
			
		||||
	{"regex_match_all",			regex_match_all},
 | 
			
		||||
	{"regex_match_all_c",		regex_match_all_c},
 | 
			
		||||
	{"regex_substr",			regex_substr},
 | 
			
		||||
	{"regex_replace",			regex_replace},
 | 
			
		||||
	{"regex_free",				regex_free},
 | 
			
		||||
	{NULL,						NULL},
 | 
			
		||||
};
 | 
			
		||||
@@ -182,7 +342,7 @@ void OnAmxxAttach()
 | 
			
		||||
 | 
			
		||||
void OnAmxxDetach()
 | 
			
		||||
{
 | 
			
		||||
	for (int i = 0; i<(int)PEL.size(); i++)
 | 
			
		||||
	for (int i = 0; i<(int)PEL.length(); i++)
 | 
			
		||||
	{
 | 
			
		||||
		if (PEL[i])
 | 
			
		||||
		{
 | 
			
		||||
 
 | 
			
		||||
@@ -52,8 +52,8 @@
 | 
			
		||||
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 | 
			
		||||
    <ClCompile>
 | 
			
		||||
      <Optimization>Disabled</Optimization>
 | 
			
		||||
      <AdditionalIncludeDirectories>..\;..\sdk;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 | 
			
		||||
      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;REGEX_EXPORTS;PCRE_STATIC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 | 
			
		||||
      <AdditionalIncludeDirectories>..\;..\..\..\public\amtl;..\sdk;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 | 
			
		||||
      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;REGEX_EXPORTS;HAVE_STDINT_H;PCRE_STATIC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 | 
			
		||||
      <MinimalRebuild>true</MinimalRebuild>
 | 
			
		||||
      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
 | 
			
		||||
      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
 | 
			
		||||
@@ -75,8 +75,8 @@
 | 
			
		||||
  </ItemDefinitionGroup>
 | 
			
		||||
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 | 
			
		||||
    <ClCompile>
 | 
			
		||||
      <AdditionalIncludeDirectories>..\;..\sdk;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 | 
			
		||||
      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;REGEX_EXPORTS;PCRE_STATIC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 | 
			
		||||
      <AdditionalIncludeDirectories>..\;..\..\..\public\amtl;..\sdk;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
 | 
			
		||||
      <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;REGEX_EXPORTS;HAVE_STDINT_H;PCRE_STATIC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 | 
			
		||||
      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
 | 
			
		||||
      <RuntimeTypeInfo>false</RuntimeTypeInfo>
 | 
			
		||||
      <PrecompiledHeader>
 | 
			
		||||
@@ -99,6 +99,7 @@
 | 
			
		||||
    <ClCompile Include="..\CRegEx.cpp" />
 | 
			
		||||
    <ClCompile Include="..\module.cpp" />
 | 
			
		||||
    <ClCompile Include="..\sdk\amxxmodule.cpp" />
 | 
			
		||||
    <ClCompile Include="..\utils.cpp" />
 | 
			
		||||
  </ItemGroup>
 | 
			
		||||
  <ItemGroup>
 | 
			
		||||
    <ClInclude Include="..\CRegEx.h" />
 | 
			
		||||
@@ -107,6 +108,7 @@
 | 
			
		||||
    <ClInclude Include="..\sdk\moduleconfig.h" />
 | 
			
		||||
    <ClInclude Include="..\sdk\CVector.h" />
 | 
			
		||||
    <ClInclude Include="..\sdk\amxxmodule.h" />
 | 
			
		||||
    <ClInclude Include="..\utils.h" />
 | 
			
		||||
  </ItemGroup>
 | 
			
		||||
  <ItemGroup>
 | 
			
		||||
    <None Include="..\..\..\plugins\include\regex.inc" />
 | 
			
		||||
 
 | 
			
		||||
@@ -32,6 +32,9 @@
 | 
			
		||||
    <ClCompile Include="..\sdk\amxxmodule.cpp">
 | 
			
		||||
      <Filter>Module SDK\SDK Base</Filter>
 | 
			
		||||
    </ClCompile>
 | 
			
		||||
    <ClCompile Include="..\utils.cpp">
 | 
			
		||||
      <Filter>Source Files</Filter>
 | 
			
		||||
    </ClCompile>
 | 
			
		||||
  </ItemGroup>
 | 
			
		||||
  <ItemGroup>
 | 
			
		||||
    <ClInclude Include="..\CRegEx.h">
 | 
			
		||||
@@ -52,6 +55,9 @@
 | 
			
		||||
    <ClInclude Include="..\sdk\amxxmodule.h">
 | 
			
		||||
      <Filter>Module SDK\SDK Base</Filter>
 | 
			
		||||
    </ClInclude>
 | 
			
		||||
    <ClInclude Include="..\utils.h">
 | 
			
		||||
      <Filter>Header Files</Filter>
 | 
			
		||||
    </ClInclude>
 | 
			
		||||
  </ItemGroup>
 | 
			
		||||
  <ItemGroup>
 | 
			
		||||
    <None Include="..\..\..\plugins\include\regex.inc">
 | 
			
		||||
 
 | 
			
		||||
@@ -2,10 +2,10 @@
 | 
			
		||||
*       Perl-Compatible Regular Expressions      *
 | 
			
		||||
*************************************************/
 | 
			
		||||
 | 
			
		||||
/* In its original form, this is the .in file that is transformed by
 | 
			
		||||
"configure" into pcre.h.
 | 
			
		||||
/* This is the public header file for the PCRE library, to be #included by
 | 
			
		||||
applications that call the PCRE functions.
 | 
			
		||||
 | 
			
		||||
           Copyright (c) 1997-2005 University of Cambridge
 | 
			
		||||
           Copyright (c) 1997-2014 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
@@ -39,34 +39,48 @@ POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
#ifndef _PCRE_H
 | 
			
		||||
#define _PCRE_H
 | 
			
		||||
 | 
			
		||||
/* The file pcre.h is build by "configure". Do not edit it; instead
 | 
			
		||||
make changes to pcre.in. */
 | 
			
		||||
/* The current PCRE version information. */
 | 
			
		||||
 | 
			
		||||
#define PCRE_MAJOR          6
 | 
			
		||||
#define PCRE_MINOR          4
 | 
			
		||||
#define PCRE_DATE           05-Sep-2005
 | 
			
		||||
#define PCRE_MAJOR          8
 | 
			
		||||
#define PCRE_MINOR          35
 | 
			
		||||
#define PCRE_PRERELEASE     
 | 
			
		||||
#define PCRE_DATE           2014-04-04
 | 
			
		||||
 | 
			
		||||
/* Win32 uses DLL by default; it needs special stuff for exported functions. */
 | 
			
		||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
 | 
			
		||||
imported have to be identified as such. When building PCRE, the appropriate
 | 
			
		||||
export setting is defined in pcre_internal.h, which includes this file. So we
 | 
			
		||||
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
 | 
			
		||||
 | 
			
		||||
#ifdef _WIN32
 | 
			
		||||
#  ifdef PCRE_DEFINITION
 | 
			
		||||
#    ifdef DLL_EXPORT
 | 
			
		||||
#      define PCRE_DATA_SCOPE __declspec(dllexport)
 | 
			
		||||
#if defined(_WIN32) && !defined(PCRE_STATIC)
 | 
			
		||||
#  ifndef PCRE_EXP_DECL
 | 
			
		||||
#    define PCRE_EXP_DECL  extern __declspec(dllimport)
 | 
			
		||||
#  endif
 | 
			
		||||
#  ifdef __cplusplus
 | 
			
		||||
#    ifndef PCRECPP_EXP_DECL
 | 
			
		||||
#      define PCRECPP_EXP_DECL  extern __declspec(dllimport)
 | 
			
		||||
#    endif
 | 
			
		||||
#  else
 | 
			
		||||
#    ifndef PCRE_STATIC
 | 
			
		||||
#      define PCRE_DATA_SCOPE extern __declspec(dllimport)
 | 
			
		||||
#    ifndef PCRECPP_EXP_DEFN
 | 
			
		||||
#      define PCRECPP_EXP_DEFN  __declspec(dllimport)
 | 
			
		||||
#    endif
 | 
			
		||||
#  endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* For other operating systems, we use the standard "extern". */
 | 
			
		||||
/* By default, we use the standard "extern" declarations. */
 | 
			
		||||
 | 
			
		||||
#ifndef PCRE_DATA_SCOPE
 | 
			
		||||
#ifndef PCRE_EXP_DECL
 | 
			
		||||
#  ifdef __cplusplus
 | 
			
		||||
#    define PCRE_DATA_SCOPE     extern "C"
 | 
			
		||||
#    define PCRE_EXP_DECL  extern "C"
 | 
			
		||||
#  else
 | 
			
		||||
#    define PCRE_DATA_SCOPE     extern
 | 
			
		||||
#    define PCRE_EXP_DECL  extern
 | 
			
		||||
#  endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
#  ifndef PCRECPP_EXP_DECL
 | 
			
		||||
#    define PCRECPP_EXP_DECL  extern
 | 
			
		||||
#  endif
 | 
			
		||||
#  ifndef PCRECPP_EXP_DEFN
 | 
			
		||||
#    define PCRECPP_EXP_DEFN
 | 
			
		||||
#  endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
@@ -81,50 +95,162 @@ it is needed here for malloc. */
 | 
			
		||||
extern "C" {
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* Options */
 | 
			
		||||
/* Public options. Some are compile-time only, some are run-time only, and some
 | 
			
		||||
are both. Most of the compile-time options are saved with the compiled regex so
 | 
			
		||||
that they can be inspected during studying (and therefore JIT compiling). Note
 | 
			
		||||
that pcre_study() has its own set of options. Originally, all the options
 | 
			
		||||
defined here used distinct bits. However, almost all the bits in a 32-bit word
 | 
			
		||||
are now used, so in order to conserve them, option bits that were previously
 | 
			
		||||
only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
 | 
			
		||||
also be used for compile-time options that affect only compiling and are not
 | 
			
		||||
relevant for studying or JIT compiling.
 | 
			
		||||
 | 
			
		||||
#define PCRE_CASELESS           0x00000001
 | 
			
		||||
#define PCRE_MULTILINE          0x00000002
 | 
			
		||||
#define PCRE_DOTALL             0x00000004
 | 
			
		||||
#define PCRE_EXTENDED           0x00000008
 | 
			
		||||
#define PCRE_ANCHORED           0x00000010
 | 
			
		||||
#define PCRE_DOLLAR_ENDONLY     0x00000020
 | 
			
		||||
#define PCRE_EXTRA              0x00000040
 | 
			
		||||
#define PCRE_NOTBOL             0x00000080
 | 
			
		||||
#define PCRE_NOTEOL             0x00000100
 | 
			
		||||
#define PCRE_UNGREEDY           0x00000200
 | 
			
		||||
#define PCRE_NOTEMPTY           0x00000400
 | 
			
		||||
#define PCRE_UTF8               0x00000800
 | 
			
		||||
#define PCRE_NO_AUTO_CAPTURE    0x00001000
 | 
			
		||||
#define PCRE_NO_UTF8_CHECK      0x00002000
 | 
			
		||||
#define PCRE_AUTO_CALLOUT       0x00004000
 | 
			
		||||
#define PCRE_PARTIAL            0x00008000
 | 
			
		||||
#define PCRE_DFA_SHORTEST       0x00010000
 | 
			
		||||
#define PCRE_DFA_RESTART        0x00020000
 | 
			
		||||
#define PCRE_FIRSTLINE          0x00040000
 | 
			
		||||
Some options for pcre_compile() change its behaviour but do not affect the
 | 
			
		||||
behaviour of the execution functions. Other options are passed through to the
 | 
			
		||||
execution functions and affect their behaviour, with or without affecting the
 | 
			
		||||
behaviour of pcre_compile().
 | 
			
		||||
 | 
			
		||||
Options that can be passed to pcre_compile() are tagged Cx below, with these
 | 
			
		||||
variants:
 | 
			
		||||
 | 
			
		||||
C1   Affects compile only
 | 
			
		||||
C2   Does not affect compile; affects exec, dfa_exec
 | 
			
		||||
C3   Affects compile, exec, dfa_exec
 | 
			
		||||
C4   Affects compile, exec, dfa_exec, study
 | 
			
		||||
C5   Affects compile, exec, study
 | 
			
		||||
 | 
			
		||||
Options that can be set for pcre_exec() and/or pcre_dfa_exec() are flagged with
 | 
			
		||||
E and D, respectively. They take precedence over C3, C4, and C5 settings passed
 | 
			
		||||
from pcre_compile(). Those that are compatible with JIT execution are flagged
 | 
			
		||||
with J. */
 | 
			
		||||
 | 
			
		||||
#define PCRE_CASELESS           0x00000001  /* C1       */
 | 
			
		||||
#define PCRE_MULTILINE          0x00000002  /* C1       */
 | 
			
		||||
#define PCRE_DOTALL             0x00000004  /* C1       */
 | 
			
		||||
#define PCRE_EXTENDED           0x00000008  /* C1       */
 | 
			
		||||
#define PCRE_ANCHORED           0x00000010  /* C4 E D   */
 | 
			
		||||
#define PCRE_DOLLAR_ENDONLY     0x00000020  /* C2       */
 | 
			
		||||
#define PCRE_EXTRA              0x00000040  /* C1       */
 | 
			
		||||
#define PCRE_NOTBOL             0x00000080  /*    E D J */
 | 
			
		||||
#define PCRE_NOTEOL             0x00000100  /*    E D J */
 | 
			
		||||
#define PCRE_UNGREEDY           0x00000200  /* C1       */
 | 
			
		||||
#define PCRE_NOTEMPTY           0x00000400  /*    E D J */
 | 
			
		||||
#define PCRE_UTF8               0x00000800  /* C4        )          */
 | 
			
		||||
#define PCRE_UTF16              0x00000800  /* C4        ) Synonyms */
 | 
			
		||||
#define PCRE_UTF32              0x00000800  /* C4        )          */
 | 
			
		||||
#define PCRE_NO_AUTO_CAPTURE    0x00001000  /* C1       */
 | 
			
		||||
#define PCRE_NO_UTF8_CHECK      0x00002000  /* C1 E D J  )          */
 | 
			
		||||
#define PCRE_NO_UTF16_CHECK     0x00002000  /* C1 E D J  ) Synonyms */
 | 
			
		||||
#define PCRE_NO_UTF32_CHECK     0x00002000  /* C1 E D J  )          */
 | 
			
		||||
#define PCRE_AUTO_CALLOUT       0x00004000  /* C1       */
 | 
			
		||||
#define PCRE_PARTIAL_SOFT       0x00008000  /*    E D J  ) Synonyms */
 | 
			
		||||
#define PCRE_PARTIAL            0x00008000  /*    E D J  )          */
 | 
			
		||||
 | 
			
		||||
/* This pair use the same bit. */
 | 
			
		||||
#define PCRE_NEVER_UTF          0x00010000  /* C1        ) Overlaid */
 | 
			
		||||
#define PCRE_DFA_SHORTEST       0x00010000  /*      D    ) Overlaid */
 | 
			
		||||
 | 
			
		||||
/* This pair use the same bit. */
 | 
			
		||||
#define PCRE_NO_AUTO_POSSESS    0x00020000  /* C1        ) Overlaid */
 | 
			
		||||
#define PCRE_DFA_RESTART        0x00020000  /*      D    ) Overlaid */
 | 
			
		||||
 | 
			
		||||
#define PCRE_FIRSTLINE          0x00040000  /* C3       */
 | 
			
		||||
#define PCRE_DUPNAMES           0x00080000  /* C1       */
 | 
			
		||||
#define PCRE_NEWLINE_CR         0x00100000  /* C3 E D   */
 | 
			
		||||
#define PCRE_NEWLINE_LF         0x00200000  /* C3 E D   */
 | 
			
		||||
#define PCRE_NEWLINE_CRLF       0x00300000  /* C3 E D   */
 | 
			
		||||
#define PCRE_NEWLINE_ANY        0x00400000  /* C3 E D   */
 | 
			
		||||
#define PCRE_NEWLINE_ANYCRLF    0x00500000  /* C3 E D   */
 | 
			
		||||
#define PCRE_BSR_ANYCRLF        0x00800000  /* C3 E D   */
 | 
			
		||||
#define PCRE_BSR_UNICODE        0x01000000  /* C3 E D   */
 | 
			
		||||
#define PCRE_JAVASCRIPT_COMPAT  0x02000000  /* C5       */
 | 
			
		||||
#define PCRE_NO_START_OPTIMIZE  0x04000000  /* C2 E D    ) Synonyms */
 | 
			
		||||
#define PCRE_NO_START_OPTIMISE  0x04000000  /* C2 E D    )          */
 | 
			
		||||
#define PCRE_PARTIAL_HARD       0x08000000  /*    E D J */
 | 
			
		||||
#define PCRE_NOTEMPTY_ATSTART   0x10000000  /*    E D J */
 | 
			
		||||
#define PCRE_UCP                0x20000000  /* C3       */
 | 
			
		||||
 | 
			
		||||
/* Exec-time and get/set-time error codes */
 | 
			
		||||
 | 
			
		||||
#define PCRE_ERROR_NOMATCH         (-1)
 | 
			
		||||
#define PCRE_ERROR_NULL            (-2)
 | 
			
		||||
#define PCRE_ERROR_BADOPTION       (-3)
 | 
			
		||||
#define PCRE_ERROR_BADMAGIC        (-4)
 | 
			
		||||
#define PCRE_ERROR_UNKNOWN_NODE    (-5)
 | 
			
		||||
#define PCRE_ERROR_NOMEMORY        (-6)
 | 
			
		||||
#define PCRE_ERROR_NOSUBSTRING     (-7)
 | 
			
		||||
#define PCRE_ERROR_MATCHLIMIT      (-8)
 | 
			
		||||
#define PCRE_ERROR_CALLOUT         (-9)  /* Never used by PCRE itself */
 | 
			
		||||
#define PCRE_ERROR_BADUTF8        (-10)
 | 
			
		||||
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
 | 
			
		||||
#define PCRE_ERROR_PARTIAL        (-12)
 | 
			
		||||
#define PCRE_ERROR_BADPARTIAL     (-13)
 | 
			
		||||
#define PCRE_ERROR_INTERNAL       (-14)
 | 
			
		||||
#define PCRE_ERROR_BADCOUNT       (-15)
 | 
			
		||||
#define PCRE_ERROR_DFA_UITEM      (-16)
 | 
			
		||||
#define PCRE_ERROR_DFA_UCOND      (-17)
 | 
			
		||||
#define PCRE_ERROR_DFA_UMLIMIT    (-18)
 | 
			
		||||
#define PCRE_ERROR_DFA_WSSIZE     (-19)
 | 
			
		||||
#define PCRE_ERROR_DFA_RECURSE    (-20)
 | 
			
		||||
#define PCRE_ERROR_NOMATCH          (-1)
 | 
			
		||||
#define PCRE_ERROR_NULL             (-2)
 | 
			
		||||
#define PCRE_ERROR_BADOPTION        (-3)
 | 
			
		||||
#define PCRE_ERROR_BADMAGIC         (-4)
 | 
			
		||||
#define PCRE_ERROR_UNKNOWN_OPCODE   (-5)
 | 
			
		||||
#define PCRE_ERROR_UNKNOWN_NODE     (-5)  /* For backward compatibility */
 | 
			
		||||
#define PCRE_ERROR_NOMEMORY         (-6)
 | 
			
		||||
#define PCRE_ERROR_NOSUBSTRING      (-7)
 | 
			
		||||
#define PCRE_ERROR_MATCHLIMIT       (-8)
 | 
			
		||||
#define PCRE_ERROR_CALLOUT          (-9)  /* Never used by PCRE itself */
 | 
			
		||||
#define PCRE_ERROR_BADUTF8         (-10)  /* Same for 8/16/32 */
 | 
			
		||||
#define PCRE_ERROR_BADUTF16        (-10)  /* Same for 8/16/32 */
 | 
			
		||||
#define PCRE_ERROR_BADUTF32        (-10)  /* Same for 8/16/32 */
 | 
			
		||||
#define PCRE_ERROR_BADUTF8_OFFSET  (-11)  /* Same for 8/16 */
 | 
			
		||||
#define PCRE_ERROR_BADUTF16_OFFSET (-11)  /* Same for 8/16 */
 | 
			
		||||
#define PCRE_ERROR_PARTIAL         (-12)
 | 
			
		||||
#define PCRE_ERROR_BADPARTIAL      (-13)
 | 
			
		||||
#define PCRE_ERROR_INTERNAL        (-14)
 | 
			
		||||
#define PCRE_ERROR_BADCOUNT        (-15)
 | 
			
		||||
#define PCRE_ERROR_DFA_UITEM       (-16)
 | 
			
		||||
#define PCRE_ERROR_DFA_UCOND       (-17)
 | 
			
		||||
#define PCRE_ERROR_DFA_UMLIMIT     (-18)
 | 
			
		||||
#define PCRE_ERROR_DFA_WSSIZE      (-19)
 | 
			
		||||
#define PCRE_ERROR_DFA_RECURSE     (-20)
 | 
			
		||||
#define PCRE_ERROR_RECURSIONLIMIT  (-21)
 | 
			
		||||
#define PCRE_ERROR_NULLWSLIMIT     (-22)  /* No longer actually used */
 | 
			
		||||
#define PCRE_ERROR_BADNEWLINE      (-23)
 | 
			
		||||
#define PCRE_ERROR_BADOFFSET       (-24)
 | 
			
		||||
#define PCRE_ERROR_SHORTUTF8       (-25)
 | 
			
		||||
#define PCRE_ERROR_SHORTUTF16      (-25)  /* Same for 8/16 */
 | 
			
		||||
#define PCRE_ERROR_RECURSELOOP     (-26)
 | 
			
		||||
#define PCRE_ERROR_JIT_STACKLIMIT  (-27)
 | 
			
		||||
#define PCRE_ERROR_BADMODE         (-28)
 | 
			
		||||
#define PCRE_ERROR_BADENDIANNESS   (-29)
 | 
			
		||||
#define PCRE_ERROR_DFA_BADRESTART  (-30)
 | 
			
		||||
#define PCRE_ERROR_JIT_BADOPTION   (-31)
 | 
			
		||||
#define PCRE_ERROR_BADLENGTH       (-32)
 | 
			
		||||
#define PCRE_ERROR_UNSET           (-33)
 | 
			
		||||
 | 
			
		||||
/* Specific error codes for UTF-8 validity checks */
 | 
			
		||||
 | 
			
		||||
#define PCRE_UTF8_ERR0               0
 | 
			
		||||
#define PCRE_UTF8_ERR1               1
 | 
			
		||||
#define PCRE_UTF8_ERR2               2
 | 
			
		||||
#define PCRE_UTF8_ERR3               3
 | 
			
		||||
#define PCRE_UTF8_ERR4               4
 | 
			
		||||
#define PCRE_UTF8_ERR5               5
 | 
			
		||||
#define PCRE_UTF8_ERR6               6
 | 
			
		||||
#define PCRE_UTF8_ERR7               7
 | 
			
		||||
#define PCRE_UTF8_ERR8               8
 | 
			
		||||
#define PCRE_UTF8_ERR9               9
 | 
			
		||||
#define PCRE_UTF8_ERR10             10
 | 
			
		||||
#define PCRE_UTF8_ERR11             11
 | 
			
		||||
#define PCRE_UTF8_ERR12             12
 | 
			
		||||
#define PCRE_UTF8_ERR13             13
 | 
			
		||||
#define PCRE_UTF8_ERR14             14
 | 
			
		||||
#define PCRE_UTF8_ERR15             15
 | 
			
		||||
#define PCRE_UTF8_ERR16             16
 | 
			
		||||
#define PCRE_UTF8_ERR17             17
 | 
			
		||||
#define PCRE_UTF8_ERR18             18
 | 
			
		||||
#define PCRE_UTF8_ERR19             19
 | 
			
		||||
#define PCRE_UTF8_ERR20             20
 | 
			
		||||
#define PCRE_UTF8_ERR21             21
 | 
			
		||||
#define PCRE_UTF8_ERR22             22  /* Unused (was non-character) */
 | 
			
		||||
 | 
			
		||||
/* Specific error codes for UTF-16 validity checks */
 | 
			
		||||
 | 
			
		||||
#define PCRE_UTF16_ERR0              0
 | 
			
		||||
#define PCRE_UTF16_ERR1              1
 | 
			
		||||
#define PCRE_UTF16_ERR2              2
 | 
			
		||||
#define PCRE_UTF16_ERR3              3
 | 
			
		||||
#define PCRE_UTF16_ERR4              4  /* Unused (was non-character) */
 | 
			
		||||
 | 
			
		||||
/* Specific error codes for UTF-32 validity checks */
 | 
			
		||||
 | 
			
		||||
#define PCRE_UTF32_ERR0              0
 | 
			
		||||
#define PCRE_UTF32_ERR1              1
 | 
			
		||||
#define PCRE_UTF32_ERR2              2  /* Unused (was non-character) */
 | 
			
		||||
#define PCRE_UTF32_ERR3              3
 | 
			
		||||
 | 
			
		||||
/* Request types for pcre_fullinfo() */
 | 
			
		||||
 | 
			
		||||
@@ -141,8 +267,23 @@ extern "C" {
 | 
			
		||||
#define PCRE_INFO_NAMETABLE          9
 | 
			
		||||
#define PCRE_INFO_STUDYSIZE         10
 | 
			
		||||
#define PCRE_INFO_DEFAULT_TABLES    11
 | 
			
		||||
#define PCRE_INFO_OKPARTIAL         12
 | 
			
		||||
#define PCRE_INFO_JCHANGED          13
 | 
			
		||||
#define PCRE_INFO_HASCRORLF         14
 | 
			
		||||
#define PCRE_INFO_MINLENGTH         15
 | 
			
		||||
#define PCRE_INFO_JIT               16
 | 
			
		||||
#define PCRE_INFO_JITSIZE           17
 | 
			
		||||
#define PCRE_INFO_MAXLOOKBEHIND     18
 | 
			
		||||
#define PCRE_INFO_FIRSTCHARACTER    19
 | 
			
		||||
#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
 | 
			
		||||
#define PCRE_INFO_REQUIREDCHAR      21
 | 
			
		||||
#define PCRE_INFO_REQUIREDCHARFLAGS 22
 | 
			
		||||
#define PCRE_INFO_MATCHLIMIT        23
 | 
			
		||||
#define PCRE_INFO_RECURSIONLIMIT    24
 | 
			
		||||
#define PCRE_INFO_MATCH_EMPTY       25
 | 
			
		||||
 | 
			
		||||
/* Request types for pcre_config() */
 | 
			
		||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
 | 
			
		||||
compatible. */
 | 
			
		||||
 | 
			
		||||
#define PCRE_CONFIG_UTF8                    0
 | 
			
		||||
#define PCRE_CONFIG_NEWLINE                 1
 | 
			
		||||
@@ -151,19 +292,83 @@ extern "C" {
 | 
			
		||||
#define PCRE_CONFIG_MATCH_LIMIT             4
 | 
			
		||||
#define PCRE_CONFIG_STACKRECURSE            5
 | 
			
		||||
#define PCRE_CONFIG_UNICODE_PROPERTIES      6
 | 
			
		||||
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION   7
 | 
			
		||||
#define PCRE_CONFIG_BSR                     8
 | 
			
		||||
#define PCRE_CONFIG_JIT                     9
 | 
			
		||||
#define PCRE_CONFIG_UTF16                  10
 | 
			
		||||
#define PCRE_CONFIG_JITTARGET              11
 | 
			
		||||
#define PCRE_CONFIG_UTF32                  12
 | 
			
		||||
#define PCRE_CONFIG_PARENS_LIMIT           13
 | 
			
		||||
 | 
			
		||||
/* Bit flags for the pcre_extra structure */
 | 
			
		||||
/* Request types for pcre_study(). Do not re-arrange, in order to remain
 | 
			
		||||
compatible. */
 | 
			
		||||
 | 
			
		||||
#define PCRE_EXTRA_STUDY_DATA          0x0001
 | 
			
		||||
#define PCRE_EXTRA_MATCH_LIMIT         0x0002
 | 
			
		||||
#define PCRE_EXTRA_CALLOUT_DATA        0x0004
 | 
			
		||||
#define PCRE_EXTRA_TABLES              0x0008
 | 
			
		||||
#define PCRE_STUDY_JIT_COMPILE                0x0001
 | 
			
		||||
#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE   0x0002
 | 
			
		||||
#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE   0x0004
 | 
			
		||||
#define PCRE_STUDY_EXTRA_NEEDED               0x0008
 | 
			
		||||
 | 
			
		||||
/* Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine
 | 
			
		||||
these bits, just add new ones on the end, in order to remain compatible. */
 | 
			
		||||
 | 
			
		||||
#define PCRE_EXTRA_STUDY_DATA             0x0001
 | 
			
		||||
#define PCRE_EXTRA_MATCH_LIMIT            0x0002
 | 
			
		||||
#define PCRE_EXTRA_CALLOUT_DATA           0x0004
 | 
			
		||||
#define PCRE_EXTRA_TABLES                 0x0008
 | 
			
		||||
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION  0x0010
 | 
			
		||||
#define PCRE_EXTRA_MARK                   0x0020
 | 
			
		||||
#define PCRE_EXTRA_EXECUTABLE_JIT         0x0040
 | 
			
		||||
 | 
			
		||||
/* Types */
 | 
			
		||||
 | 
			
		||||
struct real_pcre;                 /* declaration; the definition is private  */
 | 
			
		||||
typedef struct real_pcre pcre;
 | 
			
		||||
 | 
			
		||||
struct real_pcre16;               /* declaration; the definition is private  */
 | 
			
		||||
typedef struct real_pcre16 pcre16;
 | 
			
		||||
 | 
			
		||||
struct real_pcre32;               /* declaration; the definition is private  */
 | 
			
		||||
typedef struct real_pcre32 pcre32;
 | 
			
		||||
 | 
			
		||||
struct real_pcre_jit_stack;       /* declaration; the definition is private  */
 | 
			
		||||
typedef struct real_pcre_jit_stack pcre_jit_stack;
 | 
			
		||||
 | 
			
		||||
struct real_pcre16_jit_stack;     /* declaration; the definition is private  */
 | 
			
		||||
typedef struct real_pcre16_jit_stack pcre16_jit_stack;
 | 
			
		||||
 | 
			
		||||
struct real_pcre32_jit_stack;     /* declaration; the definition is private  */
 | 
			
		||||
typedef struct real_pcre32_jit_stack pcre32_jit_stack;
 | 
			
		||||
 | 
			
		||||
/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain
 | 
			
		||||
a 16 bit wide signed data type. Otherwise it can be a dummy data type since
 | 
			
		||||
pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */
 | 
			
		||||
#ifndef PCRE_UCHAR16
 | 
			
		||||
#define PCRE_UCHAR16 unsigned short
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef PCRE_SPTR16
 | 
			
		||||
#define PCRE_SPTR16 const PCRE_UCHAR16 *
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* If PCRE is compiled with 32 bit character support, PCRE_UCHAR32 must contain
 | 
			
		||||
a 32 bit wide signed data type. Otherwise it can be a dummy data type since
 | 
			
		||||
pcre32 functions are not implemented. There is a check for this in pcre_internal.h. */
 | 
			
		||||
#ifndef PCRE_UCHAR32
 | 
			
		||||
#define PCRE_UCHAR32 unsigned int
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef PCRE_SPTR32
 | 
			
		||||
#define PCRE_SPTR32 const PCRE_UCHAR32 *
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* When PCRE is compiled as a C++ library, the subject pointer type can be
 | 
			
		||||
replaced with a custom type. For conventional use, the public interface is a
 | 
			
		||||
const char *. */
 | 
			
		||||
 | 
			
		||||
#ifndef PCRE_SPTR
 | 
			
		||||
#define PCRE_SPTR const char *
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* The structure for passing additional data to pcre_exec(). This is defined in
 | 
			
		||||
such as way as to be extensible. Always add new fields at the end, in order to
 | 
			
		||||
remain compatible. */
 | 
			
		||||
@@ -174,8 +379,37 @@ typedef struct pcre_extra {
 | 
			
		||||
  unsigned long int match_limit;  /* Maximum number of calls to match() */
 | 
			
		||||
  void *callout_data;             /* Data passed back in callouts */
 | 
			
		||||
  const unsigned char *tables;    /* Pointer to character tables */
 | 
			
		||||
  unsigned long int match_limit_recursion; /* Max recursive calls to match() */
 | 
			
		||||
  unsigned char **mark;           /* For passing back a mark pointer */
 | 
			
		||||
  void *executable_jit;           /* Contains a pointer to a compiled jit code */
 | 
			
		||||
} pcre_extra;
 | 
			
		||||
 | 
			
		||||
/* Same structure as above, but with 16 bit char pointers. */
 | 
			
		||||
 | 
			
		||||
typedef struct pcre16_extra {
 | 
			
		||||
  unsigned long int flags;        /* Bits for which fields are set */
 | 
			
		||||
  void *study_data;               /* Opaque data from pcre_study() */
 | 
			
		||||
  unsigned long int match_limit;  /* Maximum number of calls to match() */
 | 
			
		||||
  void *callout_data;             /* Data passed back in callouts */
 | 
			
		||||
  const unsigned char *tables;    /* Pointer to character tables */
 | 
			
		||||
  unsigned long int match_limit_recursion; /* Max recursive calls to match() */
 | 
			
		||||
  PCRE_UCHAR16 **mark;            /* For passing back a mark pointer */
 | 
			
		||||
  void *executable_jit;           /* Contains a pointer to a compiled jit code */
 | 
			
		||||
} pcre16_extra;
 | 
			
		||||
 | 
			
		||||
/* Same structure as above, but with 32 bit char pointers. */
 | 
			
		||||
 | 
			
		||||
typedef struct pcre32_extra {
 | 
			
		||||
  unsigned long int flags;        /* Bits for which fields are set */
 | 
			
		||||
  void *study_data;               /* Opaque data from pcre_study() */
 | 
			
		||||
  unsigned long int match_limit;  /* Maximum number of calls to match() */
 | 
			
		||||
  void *callout_data;             /* Data passed back in callouts */
 | 
			
		||||
  const unsigned char *tables;    /* Pointer to character tables */
 | 
			
		||||
  unsigned long int match_limit_recursion; /* Max recursive calls to match() */
 | 
			
		||||
  PCRE_UCHAR32 **mark;            /* For passing back a mark pointer */
 | 
			
		||||
  void *executable_jit;           /* Contains a pointer to a compiled jit code */
 | 
			
		||||
} pcre32_extra;
 | 
			
		||||
 | 
			
		||||
/* The structure for passing out data via the pcre_callout_function. We use a
 | 
			
		||||
structure so that new fields can be added on the end in future versions,
 | 
			
		||||
without changing the API of the function, thereby allowing old clients to work
 | 
			
		||||
@@ -186,7 +420,7 @@ typedef struct pcre_callout_block {
 | 
			
		||||
  /* ------------------------ Version 0 ------------------------------- */
 | 
			
		||||
  int          callout_number;    /* Number compiled into pattern */
 | 
			
		||||
  int         *offset_vector;     /* The offset vector */
 | 
			
		||||
  const char  *subject;           /* The subject being matched */
 | 
			
		||||
  PCRE_SPTR    subject;           /* The subject being matched */
 | 
			
		||||
  int          subject_length;    /* The length of the subject */
 | 
			
		||||
  int          start_match;       /* Offset to start of this match attempt */
 | 
			
		||||
  int          current_position;  /* Where we currently are in the subject */
 | 
			
		||||
@@ -196,9 +430,55 @@ typedef struct pcre_callout_block {
 | 
			
		||||
  /* ------------------- Added for Version 1 -------------------------- */
 | 
			
		||||
  int          pattern_position;  /* Offset to next item in the pattern */
 | 
			
		||||
  int          next_item_length;  /* Length of next item in the pattern */
 | 
			
		||||
  /* ------------------- Added for Version 2 -------------------------- */
 | 
			
		||||
  const unsigned char *mark;      /* Pointer to current mark or NULL    */
 | 
			
		||||
  /* ------------------------------------------------------------------ */
 | 
			
		||||
} pcre_callout_block;
 | 
			
		||||
 | 
			
		||||
/* Same structure as above, but with 16 bit char pointers. */
 | 
			
		||||
 | 
			
		||||
typedef struct pcre16_callout_block {
 | 
			
		||||
  int          version;           /* Identifies version of block */
 | 
			
		||||
  /* ------------------------ Version 0 ------------------------------- */
 | 
			
		||||
  int          callout_number;    /* Number compiled into pattern */
 | 
			
		||||
  int         *offset_vector;     /* The offset vector */
 | 
			
		||||
  PCRE_SPTR16  subject;           /* The subject being matched */
 | 
			
		||||
  int          subject_length;    /* The length of the subject */
 | 
			
		||||
  int          start_match;       /* Offset to start of this match attempt */
 | 
			
		||||
  int          current_position;  /* Where we currently are in the subject */
 | 
			
		||||
  int          capture_top;       /* Max current capture */
 | 
			
		||||
  int          capture_last;      /* Most recently closed capture */
 | 
			
		||||
  void        *callout_data;      /* Data passed in with the call */
 | 
			
		||||
  /* ------------------- Added for Version 1 -------------------------- */
 | 
			
		||||
  int          pattern_position;  /* Offset to next item in the pattern */
 | 
			
		||||
  int          next_item_length;  /* Length of next item in the pattern */
 | 
			
		||||
  /* ------------------- Added for Version 2 -------------------------- */
 | 
			
		||||
  const PCRE_UCHAR16 *mark;       /* Pointer to current mark or NULL    */
 | 
			
		||||
  /* ------------------------------------------------------------------ */
 | 
			
		||||
} pcre16_callout_block;
 | 
			
		||||
 | 
			
		||||
/* Same structure as above, but with 32 bit char pointers. */
 | 
			
		||||
 | 
			
		||||
typedef struct pcre32_callout_block {
 | 
			
		||||
  int          version;           /* Identifies version of block */
 | 
			
		||||
  /* ------------------------ Version 0 ------------------------------- */
 | 
			
		||||
  int          callout_number;    /* Number compiled into pattern */
 | 
			
		||||
  int         *offset_vector;     /* The offset vector */
 | 
			
		||||
  PCRE_SPTR32  subject;           /* The subject being matched */
 | 
			
		||||
  int          subject_length;    /* The length of the subject */
 | 
			
		||||
  int          start_match;       /* Offset to start of this match attempt */
 | 
			
		||||
  int          current_position;  /* Where we currently are in the subject */
 | 
			
		||||
  int          capture_top;       /* Max current capture */
 | 
			
		||||
  int          capture_last;      /* Most recently closed capture */
 | 
			
		||||
  void        *callout_data;      /* Data passed in with the call */
 | 
			
		||||
  /* ------------------- Added for Version 1 -------------------------- */
 | 
			
		||||
  int          pattern_position;  /* Offset to next item in the pattern */
 | 
			
		||||
  int          next_item_length;  /* Length of next item in the pattern */
 | 
			
		||||
  /* ------------------- Added for Version 2 -------------------------- */
 | 
			
		||||
  const PCRE_UCHAR32 *mark;       /* Pointer to current mark or NULL    */
 | 
			
		||||
  /* ------------------------------------------------------------------ */
 | 
			
		||||
} pcre32_callout_block;
 | 
			
		||||
 | 
			
		||||
/* Indirection for store get and free functions. These can be set to
 | 
			
		||||
alternative malloc/free functions if required. Special ones are used in the
 | 
			
		||||
non-recursive case for "frames". There is also an optional callout function
 | 
			
		||||
@@ -206,50 +486,189 @@ that is triggered by the (?) regex item. For Virtual Pascal, these definitions
 | 
			
		||||
have to take another form. */
 | 
			
		||||
 | 
			
		||||
#ifndef VPCOMPAT
 | 
			
		||||
PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
 | 
			
		||||
PCRE_DATA_SCOPE void  (*pcre_free)(void *);
 | 
			
		||||
PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
 | 
			
		||||
PCRE_DATA_SCOPE void  (*pcre_stack_free)(void *);
 | 
			
		||||
PCRE_DATA_SCOPE int   (*pcre_callout)(pcre_callout_block *);
 | 
			
		||||
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
 | 
			
		||||
PCRE_EXP_DECL void  (*pcre_free)(void *);
 | 
			
		||||
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
 | 
			
		||||
PCRE_EXP_DECL void  (*pcre_stack_free)(void *);
 | 
			
		||||
PCRE_EXP_DECL int   (*pcre_callout)(pcre_callout_block *);
 | 
			
		||||
PCRE_EXP_DECL int   (*pcre_stack_guard)(void);
 | 
			
		||||
 | 
			
		||||
PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
 | 
			
		||||
PCRE_EXP_DECL void  (*pcre16_free)(void *);
 | 
			
		||||
PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t);
 | 
			
		||||
PCRE_EXP_DECL void  (*pcre16_stack_free)(void *);
 | 
			
		||||
PCRE_EXP_DECL int   (*pcre16_callout)(pcre16_callout_block *);
 | 
			
		||||
PCRE_EXP_DECL int   (*pcre16_stack_guard)(void);
 | 
			
		||||
 | 
			
		||||
PCRE_EXP_DECL void *(*pcre32_malloc)(size_t);
 | 
			
		||||
PCRE_EXP_DECL void  (*pcre32_free)(void *);
 | 
			
		||||
PCRE_EXP_DECL void *(*pcre32_stack_malloc)(size_t);
 | 
			
		||||
PCRE_EXP_DECL void  (*pcre32_stack_free)(void *);
 | 
			
		||||
PCRE_EXP_DECL int   (*pcre32_callout)(pcre32_callout_block *);
 | 
			
		||||
PCRE_EXP_DECL int   (*pcre32_stack_guard)(void);
 | 
			
		||||
#else   /* VPCOMPAT */
 | 
			
		||||
PCRE_DATA_SCOPE void *pcre_malloc(size_t);
 | 
			
		||||
PCRE_DATA_SCOPE void  pcre_free(void *);
 | 
			
		||||
PCRE_DATA_SCOPE void *pcre_stack_malloc(size_t);
 | 
			
		||||
PCRE_DATA_SCOPE void  pcre_stack_free(void *);
 | 
			
		||||
PCRE_DATA_SCOPE int   pcre_callout(pcre_callout_block *);
 | 
			
		||||
PCRE_EXP_DECL void *pcre_malloc(size_t);
 | 
			
		||||
PCRE_EXP_DECL void  pcre_free(void *);
 | 
			
		||||
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
 | 
			
		||||
PCRE_EXP_DECL void  pcre_stack_free(void *);
 | 
			
		||||
PCRE_EXP_DECL int   pcre_callout(pcre_callout_block *);
 | 
			
		||||
PCRE_EXP_DECL int   pcre_stack_guard(void);
 | 
			
		||||
 | 
			
		||||
PCRE_EXP_DECL void *pcre16_malloc(size_t);
 | 
			
		||||
PCRE_EXP_DECL void  pcre16_free(void *);
 | 
			
		||||
PCRE_EXP_DECL void *pcre16_stack_malloc(size_t);
 | 
			
		||||
PCRE_EXP_DECL void  pcre16_stack_free(void *);
 | 
			
		||||
PCRE_EXP_DECL int   pcre16_callout(pcre16_callout_block *);
 | 
			
		||||
PCRE_EXP_DECL int   pcre16_stack_guard(void);
 | 
			
		||||
 | 
			
		||||
PCRE_EXP_DECL void *pcre32_malloc(size_t);
 | 
			
		||||
PCRE_EXP_DECL void  pcre32_free(void *);
 | 
			
		||||
PCRE_EXP_DECL void *pcre32_stack_malloc(size_t);
 | 
			
		||||
PCRE_EXP_DECL void  pcre32_stack_free(void *);
 | 
			
		||||
PCRE_EXP_DECL int   pcre32_callout(pcre32_callout_block *);
 | 
			
		||||
PCRE_EXP_DECL int   pcre32_stack_guard(void);
 | 
			
		||||
#endif  /* VPCOMPAT */
 | 
			
		||||
 | 
			
		||||
/* User defined callback which provides a stack just before the match starts. */
 | 
			
		||||
 | 
			
		||||
typedef pcre_jit_stack *(*pcre_jit_callback)(void *);
 | 
			
		||||
typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *);
 | 
			
		||||
typedef pcre32_jit_stack *(*pcre32_jit_callback)(void *);
 | 
			
		||||
 | 
			
		||||
/* Exported PCRE functions */
 | 
			
		||||
 | 
			
		||||
PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *,
 | 
			
		||||
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
 | 
			
		||||
                  const unsigned char *);
 | 
			
		||||
PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **,
 | 
			
		||||
PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *,
 | 
			
		||||
                  const unsigned char *);
 | 
			
		||||
PCRE_EXP_DECL pcre32 *pcre32_compile(PCRE_SPTR32, int, const char **, int *,
 | 
			
		||||
                  const unsigned char *);
 | 
			
		||||
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
 | 
			
		||||
                  int *, const unsigned char *);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_config(int, void *);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_copy_named_substring(const pcre *, const char *,
 | 
			
		||||
PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
 | 
			
		||||
                  int *, const unsigned char *);
 | 
			
		||||
PCRE_EXP_DECL pcre32 *pcre32_compile2(PCRE_SPTR32, int, int *, const char **,
 | 
			
		||||
                  int *, const unsigned char *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_config(int, void *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_config(int, void *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_config(int, void *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_copy_named_substring(const pcre *, const char *,
 | 
			
		||||
                  int *, int, const char *, char *, int);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_copy_substring(const char *, int *, int, int, char *,
 | 
			
		||||
                  int);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_dfa_exec(const pcre *, const pcre_extra *,
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16,
 | 
			
		||||
                  int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_copy_named_substring(const pcre32 *, PCRE_SPTR32,
 | 
			
		||||
                  int *, int, PCRE_SPTR32, PCRE_UCHAR32 *, int);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int,
 | 
			
		||||
                  char *, int);
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
 | 
			
		||||
                  PCRE_UCHAR16 *, int);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_copy_substring(PCRE_SPTR32, int *, int, int,
 | 
			
		||||
                  PCRE_UCHAR32 *, int);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_dfa_exec(const pcre *, const pcre_extra *,
 | 
			
		||||
                  const char *, int, int, int, int *, int , int *, int);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_exec(const pcre *, const pcre_extra *, const char *,
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_dfa_exec(const pcre16 *, const pcre16_extra *,
 | 
			
		||||
                  PCRE_SPTR16, int, int, int, int *, int , int *, int);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_dfa_exec(const pcre32 *, const pcre32_extra *,
 | 
			
		||||
                  PCRE_SPTR32, int, int, int, int *, int , int *, int);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
 | 
			
		||||
                   int, int, int, int *, int);
 | 
			
		||||
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
 | 
			
		||||
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_fullinfo(const pcre *, const pcre_extra *, int,
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_exec(const pcre16 *, const pcre16_extra *,
 | 
			
		||||
                   PCRE_SPTR16, int, int, int, int *, int);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_exec(const pcre32 *, const pcre32_extra *,
 | 
			
		||||
                   PCRE_SPTR32, int, int, int, int *, int);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_jit_exec(const pcre *, const pcre_extra *,
 | 
			
		||||
                   PCRE_SPTR, int, int, int, int *, int,
 | 
			
		||||
                   pcre_jit_stack *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_jit_exec(const pcre16 *, const pcre16_extra *,
 | 
			
		||||
                   PCRE_SPTR16, int, int, int, int *, int,
 | 
			
		||||
                   pcre16_jit_stack *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_jit_exec(const pcre32 *, const pcre32_extra *,
 | 
			
		||||
                   PCRE_SPTR32, int, int, int, int *, int,
 | 
			
		||||
                   pcre32_jit_stack *);
 | 
			
		||||
PCRE_EXP_DECL void pcre_free_substring(const char *);
 | 
			
		||||
PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
 | 
			
		||||
PCRE_EXP_DECL void pcre32_free_substring(PCRE_SPTR32);
 | 
			
		||||
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
 | 
			
		||||
PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
 | 
			
		||||
PCRE_EXP_DECL void pcre32_free_substring_list(PCRE_SPTR32 *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_fullinfo(const pcre *, const pcre_extra *, int,
 | 
			
		||||
                  void *);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_get_named_substring(const pcre *, const char *,
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int,
 | 
			
		||||
                  void *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_fullinfo(const pcre32 *, const pcre32_extra *, int,
 | 
			
		||||
                  void *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_get_named_substring(const pcre *, const char *,
 | 
			
		||||
                  int *, int, const char *, const char **);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_get_stringnumber(const pcre *, const char *);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_get_substring(const char *, int *, int, int,
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16,
 | 
			
		||||
                  int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_get_named_substring(const pcre32 *, PCRE_SPTR32,
 | 
			
		||||
                  int *, int, PCRE_SPTR32, PCRE_SPTR32 *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_get_stringnumber(const pcre *, const char *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_get_stringnumber(const pcre32 *, PCRE_SPTR32);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_get_stringtable_entries(const pcre *, const char *,
 | 
			
		||||
                  char **, char **);
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16,
 | 
			
		||||
                  PCRE_UCHAR16 **, PCRE_UCHAR16 **);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_get_stringtable_entries(const pcre32 *, PCRE_SPTR32,
 | 
			
		||||
                  PCRE_UCHAR32 **, PCRE_UCHAR32 **);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_get_substring(const char *, int *, int, int,
 | 
			
		||||
                  const char **);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_get_substring_list(const char *, int *, int,
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_get_substring(PCRE_SPTR16, int *, int, int,
 | 
			
		||||
                  PCRE_SPTR16 *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_get_substring(PCRE_SPTR32, int *, int, int,
 | 
			
		||||
                  PCRE_SPTR32 *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_get_substring_list(const char *, int *, int,
 | 
			
		||||
                  const char ***);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_info(const pcre *, int *, int *);
 | 
			
		||||
PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void);
 | 
			
		||||
PCRE_DATA_SCOPE int  pcre_refcount(pcre *, int);
 | 
			
		||||
PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **);
 | 
			
		||||
PCRE_DATA_SCOPE const char *pcre_version(void);
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_get_substring_list(PCRE_SPTR16, int *, int,
 | 
			
		||||
                  PCRE_SPTR16 **);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_get_substring_list(PCRE_SPTR32, int *, int,
 | 
			
		||||
                  PCRE_SPTR32 **);
 | 
			
		||||
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
 | 
			
		||||
PCRE_EXP_DECL const unsigned char *pcre16_maketables(void);
 | 
			
		||||
PCRE_EXP_DECL const unsigned char *pcre32_maketables(void);
 | 
			
		||||
PCRE_EXP_DECL int  pcre_refcount(pcre *, int);
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_refcount(pcre16 *, int);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_refcount(pcre32 *, int);
 | 
			
		||||
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
 | 
			
		||||
PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **);
 | 
			
		||||
PCRE_EXP_DECL pcre32_extra *pcre32_study(const pcre32 *, int, const char **);
 | 
			
		||||
PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
 | 
			
		||||
PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *);
 | 
			
		||||
PCRE_EXP_DECL void pcre32_free_study(pcre32_extra *);
 | 
			
		||||
PCRE_EXP_DECL const char *pcre_version(void);
 | 
			
		||||
PCRE_EXP_DECL const char *pcre16_version(void);
 | 
			
		||||
PCRE_EXP_DECL const char *pcre32_version(void);
 | 
			
		||||
 | 
			
		||||
/* Utility functions for byte order swaps. */
 | 
			
		||||
PCRE_EXP_DECL int  pcre_pattern_to_host_byte_order(pcre *, pcre_extra *,
 | 
			
		||||
                  const unsigned char *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *,
 | 
			
		||||
                  const unsigned char *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_pattern_to_host_byte_order(pcre32 *, pcre32_extra *,
 | 
			
		||||
                  const unsigned char *);
 | 
			
		||||
PCRE_EXP_DECL int  pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *,
 | 
			
		||||
                  PCRE_SPTR16, int, int *, int);
 | 
			
		||||
PCRE_EXP_DECL int  pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *,
 | 
			
		||||
                  PCRE_SPTR32, int, int *, int);
 | 
			
		||||
 | 
			
		||||
/* JIT compiler related functions. */
 | 
			
		||||
 | 
			
		||||
PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int);
 | 
			
		||||
PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int);
 | 
			
		||||
PCRE_EXP_DECL pcre32_jit_stack *pcre32_jit_stack_alloc(int, int);
 | 
			
		||||
PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *);
 | 
			
		||||
PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *);
 | 
			
		||||
PCRE_EXP_DECL void pcre32_jit_stack_free(pcre32_jit_stack *);
 | 
			
		||||
PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *,
 | 
			
		||||
                  pcre_jit_callback, void *);
 | 
			
		||||
PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
 | 
			
		||||
                  pcre16_jit_callback, void *);
 | 
			
		||||
PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *,
 | 
			
		||||
                  pcre32_jit_callback, void *);
 | 
			
		||||
PCRE_EXP_DECL void pcre_jit_free_unused_memory(void);
 | 
			
		||||
PCRE_EXP_DECL void pcre16_jit_free_unused_memory(void);
 | 
			
		||||
PCRE_EXP_DECL void pcre32_jit_free_unused_memory(void);
 | 
			
		||||
 | 
			
		||||
#ifdef __cplusplus
 | 
			
		||||
}  /* extern "C" */
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										241
									
								
								dlls/regex/utils.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										241
									
								
								dlls/regex/utils.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,241 @@
 | 
			
		||||
 | 
			
		||||
#include "amxxmodule.h"
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include "utils.h"
 | 
			
		||||
 | 
			
		||||
#if defined(WIN32)
 | 
			
		||||
	#define strcasecmp	 stricmp
 | 
			
		||||
	#define strncasecmp	_strnicmp
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int UTIL_CheckValidChar(char *c)
 | 
			
		||||
{
 | 
			
		||||
	int count;
 | 
			
		||||
	int bytecount = 0;
 | 
			
		||||
 | 
			
		||||
	for (count = 1; (*c & 0xC0) == 0x80; count++)
 | 
			
		||||
	{
 | 
			
		||||
		c--;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	switch (*c & 0xF0)
 | 
			
		||||
	{
 | 
			
		||||
	case 0xC0:
 | 
			
		||||
	case 0xD0:
 | 
			
		||||
	{
 | 
			
		||||
				 bytecount = 2;
 | 
			
		||||
				 break;
 | 
			
		||||
	}
 | 
			
		||||
	case 0xE0:
 | 
			
		||||
	{
 | 
			
		||||
				 bytecount = 3;
 | 
			
		||||
				 break;
 | 
			
		||||
	}
 | 
			
		||||
	case 0xF0:
 | 
			
		||||
	{
 | 
			
		||||
				 bytecount = 4;
 | 
			
		||||
				 break;
 | 
			
		||||
	}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (bytecount != count)
 | 
			
		||||
	{
 | 
			
		||||
		return count;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
unsigned int strncopy(char *dest, const char *src, size_t count)
 | 
			
		||||
{
 | 
			
		||||
	if (!count)
 | 
			
		||||
	{
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	char *start = dest;
 | 
			
		||||
	while ((*src) && (--count))
 | 
			
		||||
	{
 | 
			
		||||
		*dest++ = *src++;
 | 
			
		||||
	}
 | 
			
		||||
	*dest = '\0';
 | 
			
		||||
 | 
			
		||||
	return (dest - start);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * NOTE: Do not edit this for the love of god unless you have
 | 
			
		||||
 * read the test cases and understand the code behind each one.
 | 
			
		||||
 * While I don't guarantee there aren't mistakes, I do guarantee
 | 
			
		||||
 * that plugins will end up relying on tiny idiosyncrasies of this
 | 
			
		||||
 * function, just like they did with AMX Mod X.
 | 
			
		||||
 *
 | 
			
		||||
 * There are explicitly more cases than the AMX Mod X version because
 | 
			
		||||
 * we're not doing a blind copy.  Each case is specifically optimized
 | 
			
		||||
 * for what needs to be done.  Even better, we don't have to error on
 | 
			
		||||
 * bad buffer sizes.  Instead, this function will smartly cut off the
 | 
			
		||||
 * string in a way that pushes old data out.
 | 
			
		||||
 */
 | 
			
		||||
char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive)
 | 
			
		||||
{
 | 
			
		||||
	char *ptr = subject;
 | 
			
		||||
	size_t browsed = 0;
 | 
			
		||||
	size_t textLen = strlen(subject);
 | 
			
		||||
 | 
			
		||||
	/* It's not possible to search or replace */
 | 
			
		||||
	if (searchLen > textLen)
 | 
			
		||||
	{
 | 
			
		||||
		return NULL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Handle the case of one byte replacement.
 | 
			
		||||
	* It's only valid in one case.
 | 
			
		||||
	*/
 | 
			
		||||
	if (maxLen == 1)
 | 
			
		||||
	{
 | 
			
		||||
		/* If the search matches and the replace length is 0,
 | 
			
		||||
		* we can just terminate the string and be done.
 | 
			
		||||
		*/
 | 
			
		||||
		if ((caseSensitive ? strcmp(subject, search) : strcasecmp(subject, search)) == 0 && replaceLen == 0)
 | 
			
		||||
		{
 | 
			
		||||
			*subject = '\0';
 | 
			
		||||
			return subject;
 | 
			
		||||
		}
 | 
			
		||||
		else
 | 
			
		||||
		{
 | 
			
		||||
			return NULL;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Subtract one off the maxlength so we can include the null terminator */
 | 
			
		||||
	maxLen--;
 | 
			
		||||
 | 
			
		||||
	while (*ptr != '\0' && (browsed <= textLen - searchLen))
 | 
			
		||||
	{
 | 
			
		||||
		/* See if we get a comparison */
 | 
			
		||||
		if ((caseSensitive ? strncmp(ptr, search, searchLen) : strncasecmp(ptr, search, searchLen)) == 0)
 | 
			
		||||
		{
 | 
			
		||||
			if (replaceLen > searchLen)
 | 
			
		||||
			{
 | 
			
		||||
				/* First, see if we have enough space to do this operation */
 | 
			
		||||
				if (maxLen - textLen < replaceLen - searchLen)
 | 
			
		||||
				{
 | 
			
		||||
					/* First, see if the replacement length goes out of bounds. */
 | 
			
		||||
					if (browsed + replaceLen >= maxLen)
 | 
			
		||||
					{
 | 
			
		||||
						/* EXAMPLE CASE:
 | 
			
		||||
						* Subject: AABBBCCC
 | 
			
		||||
						* Buffer : 12 bytes
 | 
			
		||||
						* Search : BBB
 | 
			
		||||
						* Replace: DDDDDDDDDD
 | 
			
		||||
						* OUTPUT : AADDDDDDDDD
 | 
			
		||||
						* POSITION:           ^
 | 
			
		||||
						*/
 | 
			
		||||
						/* If it does, we'll just bound the length and do a strcpy. */
 | 
			
		||||
						replaceLen = maxLen - browsed;
 | 
			
		||||
 | 
			
		||||
						/* Note, we add one to the final result for the null terminator */
 | 
			
		||||
						strncopy(ptr, replace, replaceLen + 1);
 | 
			
		||||
 | 
			
		||||
						/* Don't truncate a multi-byte character */
 | 
			
		||||
						if (*(ptr + replaceLen - 1) & 1 << 7)
 | 
			
		||||
						{
 | 
			
		||||
							replaceLen -= UTIL_CheckValidChar(ptr + replaceLen - 1);
 | 
			
		||||
							*(ptr + replaceLen) = '\0';
 | 
			
		||||
						}
 | 
			
		||||
					}
 | 
			
		||||
					else
 | 
			
		||||
					{
 | 
			
		||||
						/* EXAMPLE CASE:
 | 
			
		||||
						* Subject: AABBBCCC
 | 
			
		||||
						* Buffer : 12 bytes
 | 
			
		||||
						* Search : BBB
 | 
			
		||||
						* Replace: DDDDDDD
 | 
			
		||||
						* OUTPUT : AADDDDDDDCC
 | 
			
		||||
						* POSITION:         ^
 | 
			
		||||
						*/
 | 
			
		||||
						/* We're going to have some bytes left over... */
 | 
			
		||||
						size_t origBytesToCopy = (textLen - (browsed + searchLen)) + 1;
 | 
			
		||||
						size_t realBytesToCopy = (maxLen - (browsed + replaceLen)) + 1;
 | 
			
		||||
						char *moveFrom = ptr + searchLen + (origBytesToCopy - realBytesToCopy);
 | 
			
		||||
						char *moveTo = ptr + replaceLen;
 | 
			
		||||
 | 
			
		||||
						/* First, move our old data out of the way. */
 | 
			
		||||
						memmove(moveTo, moveFrom, realBytesToCopy);
 | 
			
		||||
 | 
			
		||||
						/* Now, do our replacement. */
 | 
			
		||||
						memcpy(ptr, replace, replaceLen);
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
				else
 | 
			
		||||
				{
 | 
			
		||||
					/* EXAMPLE CASE:
 | 
			
		||||
					* Subject: AABBBCCC
 | 
			
		||||
					* Buffer : 12 bytes
 | 
			
		||||
					* Search : BBB
 | 
			
		||||
					* Replace: DDDD
 | 
			
		||||
					* OUTPUT : AADDDDCCC
 | 
			
		||||
					* POSITION:      ^
 | 
			
		||||
					*/
 | 
			
		||||
					/* Yes, we have enough space.  Do a normal move operation. */
 | 
			
		||||
					char *moveFrom = ptr + searchLen;
 | 
			
		||||
					char *moveTo = ptr + replaceLen;
 | 
			
		||||
 | 
			
		||||
					/* First move our old data out of the way. */
 | 
			
		||||
					size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
 | 
			
		||||
					memmove(moveTo, moveFrom, bytesToCopy);
 | 
			
		||||
 | 
			
		||||
					/* Now do our replacement. */
 | 
			
		||||
					memcpy(ptr, replace, replaceLen);
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			else if (replaceLen < searchLen)
 | 
			
		||||
			{
 | 
			
		||||
				/* EXAMPLE CASE:
 | 
			
		||||
				* Subject: AABBBCCC
 | 
			
		||||
				* Buffer : 12 bytes
 | 
			
		||||
				* Search : BBB
 | 
			
		||||
				* Replace: D
 | 
			
		||||
				* OUTPUT : AADCCC
 | 
			
		||||
				* POSITION:   ^
 | 
			
		||||
				*/
 | 
			
		||||
				/* If the replacement does not grow the string length, we do not
 | 
			
		||||
				* need to do any fancy checking at all.  Yay!
 | 
			
		||||
				*/
 | 
			
		||||
				char *moveFrom = ptr + searchLen;		/* Start after the search pointer */
 | 
			
		||||
				char *moveTo = ptr + replaceLen;		/* Copy to where the replacement ends */
 | 
			
		||||
 | 
			
		||||
				/* Copy our replacement in, if any */
 | 
			
		||||
				if (replaceLen)
 | 
			
		||||
				{
 | 
			
		||||
					memcpy(ptr, replace, replaceLen);
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				/* Figure out how many bytes to move down, including null terminator */
 | 
			
		||||
				size_t bytesToCopy = (textLen - (browsed + searchLen)) + 1;
 | 
			
		||||
 | 
			
		||||
				/* Move the rest of the string down */
 | 
			
		||||
				memmove(moveTo, moveFrom, bytesToCopy);
 | 
			
		||||
			}
 | 
			
		||||
			else
 | 
			
		||||
			{
 | 
			
		||||
				/* EXAMPLE CASE:
 | 
			
		||||
				* Subject: AABBBCCC
 | 
			
		||||
				* Buffer : 12 bytes
 | 
			
		||||
				* Search : BBB
 | 
			
		||||
				* Replace: DDD
 | 
			
		||||
				* OUTPUT : AADDDCCC
 | 
			
		||||
				* POSITION:     ^
 | 
			
		||||
				*/
 | 
			
		||||
				/* We don't have to move anything around, just do a straight copy */
 | 
			
		||||
				memcpy(ptr, replace, replaceLen);
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			return ptr + replaceLen;
 | 
			
		||||
		}
 | 
			
		||||
		ptr++;
 | 
			
		||||
		browsed++;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return NULL;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										8
									
								
								dlls/regex/utils.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								dlls/regex/utils.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,8 @@
 | 
			
		||||
#ifndef UTILS_H
 | 
			
		||||
#define UTILS_H
 | 
			
		||||
 | 
			
		||||
int UTIL_CheckValidChar(char *c);
 | 
			
		||||
char *UTIL_ReplaceEx(char *subject, size_t maxLen, const char *search, size_t searchLen, const char *replace, size_t replaceLen, bool caseSensitive);
 | 
			
		||||
unsigned int strncopy(char *dest, const char *src, size_t count);
 | 
			
		||||
 | 
			
		||||
#endif // UTILS_H
 | 
			
		||||
@@ -1,134 +1,372 @@
 | 
			
		||||
/* Regular Expression API
 | 
			
		||||
 * (C)2004 by David "BAILOPAN" Anderson
 | 
			
		||||
 * Licensed under the GNU General Public License.
 | 
			
		||||
 * No warranties of any kind.
 | 
			
		||||
/**
 | 
			
		||||
 * Regular Expressions API
 | 
			
		||||
 *  By the AMX Mod X Development Team
 | 
			
		||||
 *
 | 
			
		||||
 * This program is free software; you can redistribute it and/or modify it
 | 
			
		||||
 * under the terms of the GNU General Public License as published by the
 | 
			
		||||
 * Free Software Foundation; either version 2 of the License, or (at
 | 
			
		||||
 * your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 * this program is distributed in the hope that it will be useful, but
 | 
			
		||||
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 | 
			
		||||
 * General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 * You should have received a copy of the GNU General Public License
 | 
			
		||||
 * along with this program; if not, write to the Free Software Foundation,
 | 
			
		||||
 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 | 
			
		||||
 *
 | 
			
		||||
 * In addition, as a special exception, the author gives permission to
 | 
			
		||||
 * link the code of this program with the Half-Life Game Engine ("HL
 | 
			
		||||
 * Engine") and Modified Game Libraries ("MODs") developed by Valve,
 | 
			
		||||
 * L.L.C ("Valve"). You must obey the GNU General Public License in all
 | 
			
		||||
 * respects for all of the code used other than the HL Engine and MODs
 | 
			
		||||
 * from Valve. If you modify this file, you may extend this exception
 | 
			
		||||
 * to your version of the file, but you are not obligated to do so. If
 | 
			
		||||
 * you do not wish to do so, delete this exception statement from your
 | 
			
		||||
 * version.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#if defined _regex_included
 | 
			
		||||
  #endinput
 | 
			
		||||
	#endinput
 | 
			
		||||
#endif
 | 
			
		||||
#define _regex_included
 | 
			
		||||
 | 
			
		||||
#if AMXX_VERSION_NUM >= 175
 | 
			
		||||
 #pragma reqlib regex
 | 
			
		||||
 #if !defined AMXMODX_NOAUTOLOAD
 | 
			
		||||
  #pragma loadlib regex
 | 
			
		||||
 #endif
 | 
			
		||||
	#pragma reqlib regex
 | 
			
		||||
	#if !defined AMXMODX_NOAUTOLOAD
 | 
			
		||||
		#pragma loadlib regex
 | 
			
		||||
	#endif
 | 
			
		||||
#else
 | 
			
		||||
 #pragma library regex
 | 
			
		||||
	#pragma library regex
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
enum Regex
 | 
			
		||||
{
 | 
			
		||||
	REGEX_MATCH_FAIL = -2,
 | 
			
		||||
	REGEX_PATTERN_FAIL,
 | 
			
		||||
	REGEX_NO_MATCH,
 | 
			
		||||
	REGEX_OK
 | 
			
		||||
	REGEX_MATCH_FAIL   = -2,
 | 
			
		||||
	REGEX_PATTERN_FAIL = -1,
 | 
			
		||||
	REGEX_NO_MATCH     =  0,
 | 
			
		||||
	REGEX_OK           =  1
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Precompile a regular expression.  Use this if you intend on using the 
 | 
			
		||||
 * same expression multiple times.  Pass the regex handle returned here to
 | 
			
		||||
 * regex_match_c to check for matches.
 | 
			
		||||
 *
 | 
			
		||||
 * @param pattern		The regular expression pattern.
 | 
			
		||||
 * @param errcode		Error code encountered, if applicable.
 | 
			
		||||
 * @param error		Error message encountered, if applicable.
 | 
			
		||||
 * @param maxLen		Maximum string length of the error buffer.
 | 
			
		||||
 * @param flags		General flags for the regular expression.
 | 
			
		||||
 *						i = Ignore case
 | 
			
		||||
 *						m = Multilines (affects ^ and $ so that they match 
 | 
			
		||||
 *							the start/end of a line rather than matching the 
 | 
			
		||||
 *							start/end of the string).
 | 
			
		||||
 *						s = Single line (affects . so that it matches any character, 
 | 
			
		||||
 *							even new line characters).
 | 
			
		||||
 *						x = Pattern extension (ignore whitespace and # comments).
 | 
			
		||||
 *
 | 
			
		||||
 * @return				-1 on error in the pattern, > valid regex handle (> 0) on success.
 | 
			
		||||
 *
 | 
			
		||||
 * @note				This handle is automatically freed on map change.  However,
 | 
			
		||||
 *						if you are completely done with it before then, you should
 | 
			
		||||
 *						call regex_free on this handle.
 | 
			
		||||
 * Flags for compiling regex expressions.
 | 
			
		||||
 * These come directly from the pcre library and can be used in regex_compile_ex.
 | 
			
		||||
 */
 | 
			
		||||
native Regex:regex_compile(const pattern[], &ret, error[], maxLen, const flags[]="");
 | 
			
		||||
#define PCRE_CASELESS           0x00000001  /* Ignore Case */
 | 
			
		||||
#define PCRE_MULTILINE          0x00000002  /* Multilines (affects ^ and $ so that they match the start/end of a line rather than matching the start/end of the string). */
 | 
			
		||||
#define PCRE_DOTALL             0x00000004  /* Single line (affects . so that it matches any character, even new line characters). */
 | 
			
		||||
#define PCRE_EXTENDED           0x00000008  /* Pattern extension (ignore whitespace and # comments). */
 | 
			
		||||
#define PCRE_ANCHORED           0x00000010  /* Force pattern anchoring. */
 | 
			
		||||
#define PCRE_DOLLAR_ENDONLY     0x00000020  /* $ not to match newline at end. */
 | 
			
		||||
#define PCRE_UNGREEDY           0x00000200  /* Invert greediness of quantifiers */
 | 
			
		||||
#define PCRE_NOTEMPTY           0x00000400  /* An empty string is not a valid match. */
 | 
			
		||||
#define PCRE_UTF8               0x00000800  /* Use UTF-8 Chars */
 | 
			
		||||
#define PCRE_NO_UTF8_CHECK      0x00002000  /* Do not check the pattern for UTF-8 validity (only relevant if PCRE_UTF8 is set) */
 | 
			
		||||
#define PCRE_NEVER_UTF          0x00010000  /* Lock out interpretation of the pattern as UTF-8 */
 | 
			
		||||
#define PCRE_FIRSTLINE          0x00040000  /* Force matching to be before newline */
 | 
			
		||||
#define PCRE_DUPNAMES           0x00080000  /* Allow duplicate names for subpattern */
 | 
			
		||||
#define PCRE_NEWLINE_CR         0x00100000  /* Specify that a newline is indicated by a single character CR           )                            */
 | 
			
		||||
#define PCRE_NEWLINE_CRLF       0x00300000  /* specify that a newline is indicated by the two-character CRLF sequence )  Overrides the default     */
 | 
			
		||||
#define PCRE_NEWLINE_ANY        0x00400000  /* Specify that any Unicode newline sequence should be recognized.        )  newline definition (LF)   */
 | 
			
		||||
#define PCRE_NEWLINE_ANYCRLF    0x00500000  /* Specify that any of CR, LF and CRLF sequences should be recognized     )                            */
 | 
			
		||||
#define PCRE_UCP                0x20000000  /* Change the way PCRE processes \B, \b, \D, \d, \S, \s, \W, \w etc. to use Unicode properties */
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Regex expression error codes.
 | 
			
		||||
 * This can be used with regex_compile_ex and regex_match_ex.
 | 
			
		||||
 */
 | 
			
		||||
enum /*RegexError*/
 | 
			
		||||
{
 | 
			
		||||
	REGEX_ERROR_NONE           =  0,    /* No error */
 | 
			
		||||
	REGEX_ERROR_NOMATCH        = -1,    /* No match was found */
 | 
			
		||||
	REGEX_ERROR_NULL           = -2,
 | 
			
		||||
	REGEX_ERROR_BADOPTION      = -3,
 | 
			
		||||
	REGEX_ERROR_BADMAGIC       = -4,
 | 
			
		||||
	REGEX_ERROR_UNKNOWN_OPCODE = -5,
 | 
			
		||||
	REGEX_ERROR_NOMEMORY       = -6,
 | 
			
		||||
	REGEX_ERROR_NOSUBSTRING    = -7,
 | 
			
		||||
	REGEX_ERROR_MATCHLIMIT     = -8,
 | 
			
		||||
	REGEX_ERROR_CALLOUT        = -9,    /* Never used by PCRE itself */
 | 
			
		||||
	REGEX_ERROR_BADUTF8        = -10,
 | 
			
		||||
	REGEX_ERROR_BADUTF8_OFFSET = -11,
 | 
			
		||||
	REGEX_ERROR_PARTIAL        = -12,
 | 
			
		||||
	REGEX_ERROR_BADPARTIAL     = -13,
 | 
			
		||||
	REGEX_ERROR_INTERNAL       = -14,
 | 
			
		||||
	REGEX_ERROR_BADCOUNT       = -15,
 | 
			
		||||
	REGEX_ERROR_DFA_UITEM      = -16,
 | 
			
		||||
	REGEX_ERROR_DFA_UCOND      = -17,
 | 
			
		||||
	REGEX_ERROR_DFA_UMLIMIT    = -18,
 | 
			
		||||
	REGEX_ERROR_DFA_WSSIZE     = -19,
 | 
			
		||||
	REGEX_ERROR_DFA_RECURSE    = -20,
 | 
			
		||||
	REGEX_ERROR_RECURSIONLIMIT = -21,
 | 
			
		||||
	REGEX_ERROR_NULLWSLIMIT    = -22,   /* No longer actually used */
 | 
			
		||||
	REGEX_ERROR_BADNEWLINE     = -23,
 | 
			
		||||
	REGEX_ERROR_BADOFFSET      = -24,
 | 
			
		||||
	REGEX_ERROR_SHORTUTF8      = -25,
 | 
			
		||||
	REGEX_ERROR_RECURSELOOP    = -26,
 | 
			
		||||
	REGEX_ERROR_JIT_STACKLIMIT = -27,
 | 
			
		||||
	REGEX_ERROR_BADMODE        = -28,
 | 
			
		||||
	REGEX_ERROR_BADENDIANNESS  = -29,
 | 
			
		||||
	REGEX_ERROR_DFA_BADRESTART = -30,
 | 
			
		||||
	REGEX_ERROR_JIT_BADOPTION  = -31,
 | 
			
		||||
	REGEX_ERROR_BADLENGTH      = -32,
 | 
			
		||||
	REGEX_ERROR_UNSET          = -33
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Precompile a regular expression.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  Use this if you intend on using the same expression multiple times.
 | 
			
		||||
 *        Pass the regex handle returned here to regex_match_c to check for matches.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  This handle is automatically freed on map change.  However,
 | 
			
		||||
 *        if you are completely done with it before then, you should
 | 
			
		||||
 *        call regex_free on this handle.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  Consider using regex_compile_ex instead if you want to use PCRE_* flags.
 | 
			
		||||
 *
 | 
			
		||||
 * @param pattern       The regular expression pattern.
 | 
			
		||||
 * @param ret           Error code encountered, if applicable.
 | 
			
		||||
 * @param error         Error message encountered, if applicable.
 | 
			
		||||
 * @param maxLen        Maximum string length of the error buffer.
 | 
			
		||||
 * @param flags         General flags for the regular expression.
 | 
			
		||||
 *                      i = Ignore case
 | 
			
		||||
 *                      m = Multilines (affects ^ and $ so that they match
 | 
			
		||||
 *                          the start/end of a line rather than matching the
 | 
			
		||||
 *                          start/end of the string).
 | 
			
		||||
 *                      s = Single line (affects . so that it matches any character,
 | 
			
		||||
 *                          even new line characters).
 | 
			
		||||
 *                      x = Pattern extension (ignore whitespace and # comments).
 | 
			
		||||
 *
 | 
			
		||||
 * @return              -1 on error in the pattern, > valid regex handle (> 0) on success.
 | 
			
		||||
 */
 | 
			
		||||
native Regex:regex_compile(const pattern[], &ret = 0, error[] = "", maxLen = 0, const flags[]="");
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Matches a string against a pre-compiled regular expression pattern.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  You should free the returned handle with regex_free()
 | 
			
		||||
 *        when you are done with this pattern.
 | 
			
		||||
 *
 | 
			
		||||
 * @param pattern		The regular expression pattern.
 | 
			
		||||
 * @param string		The string to check.
 | 
			
		||||
 * @param ret			Error code, if applicable, or number of results on success.
 | 
			
		||||
 * @note  Use the regex handle passed to this function to extract
 | 
			
		||||
 *        matches with regex_substr().
 | 
			
		||||
 *
 | 
			
		||||
 * @return				-2 = Matching error (error code is stored in ret)
 | 
			
		||||
 *						 0 = No match.
 | 
			
		||||
 *						>1 = Number of results.
 | 
			
		||||
 * @param string        The string to check.
 | 
			
		||||
 * @param pattern       The regular expression pattern.
 | 
			
		||||
 * @param ret           Error code, if applicable, or number of results on success. See REGEX_ERROR_* defines.
 | 
			
		||||
 *
 | 
			
		||||
 * @note				You should free the returned handle (with regex_free())
 | 
			
		||||
 *						when you are done with this pattern.
 | 
			
		||||
 *
 | 
			
		||||
 * @note				Use the regex handle passed to this function to extract
 | 
			
		||||
 *						matches with regex_substr().
 | 
			
		||||
 * @return              -2 = Matching error (error code is stored in ret)
 | 
			
		||||
 *                       0 = No match.
 | 
			
		||||
 *                      >1 = Number of results.
 | 
			
		||||
 */
 | 
			
		||||
native regex_match_c(const string[], Regex:pattern, &ret);
 | 
			
		||||
native regex_match_c(const string[], Regex:pattern, &ret = 0);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Matches a string against a regular expression pattern.
 | 
			
		||||
 *
 | 
			
		||||
 * @note				If you intend on using the same regular expression pattern
 | 
			
		||||
 *						multiple times, consider using regex_compile and regex_match_c 
 | 
			
		||||
 *						instead of making this function reparse the expression each time.
 | 
			
		||||
 * @note  If you intend on using the same regular expression pattern
 | 
			
		||||
 *        multiple times, consider using regex_compile and regex_match_ex
 | 
			
		||||
 *        instead of making this function reparse the expression each time.
 | 
			
		||||
 *
 | 
			
		||||
 * @param string		The string to check.
 | 
			
		||||
 * @param pattern		The regular expression pattern.
 | 
			
		||||
 * @param ret			Error code, or result state of the match.
 | 
			
		||||
 * @param error		Error message, if applicable.
 | 
			
		||||
 * @param maxLen		Maximum length of the error buffer.
 | 
			
		||||
 * @param flags		General flags for the regular expression.
 | 
			
		||||
 *						i = Ignore case
 | 
			
		||||
 *						m = Multilines (affects ^ and $ so that they match 
 | 
			
		||||
 *							the start/end of a line rather than matching the 
 | 
			
		||||
 *							start/end of the string).
 | 
			
		||||
 *						s = Single line (affects . so that it matches any character, 
 | 
			
		||||
 *							even new line characters).
 | 
			
		||||
 *						x = Pattern extension (ignore whitespace and # comments).
 | 
			
		||||
 * @note  Flags only exist in amxmodx 1.8 and later.
 | 
			
		||||
 *
 | 
			
		||||
 * @return				-2 = Matching error (error code is stored in ret)
 | 
			
		||||
 *						-1 = Error in pattern (error message and offset # in error and ret)
 | 
			
		||||
 *						 0 = No match.
 | 
			
		||||
 *						>1 = Handle for getting more information (via regex_substr)
 | 
			
		||||
 * @note  You should free the returned handle with regex_free()
 | 
			
		||||
 *        when you are done extracting all of the substrings.
 | 
			
		||||
 *
 | 
			
		||||
 * @note				Flags only exist in amxmodx 1.8 and later.
 | 
			
		||||
 * @note				You should free the returned handle (with regex_free())
 | 
			
		||||
 *						when you are done extracting all of the substrings.
 | 
			
		||||
 * @param string        The string to check.
 | 
			
		||||
 * @param pattern       The regular expression pattern.
 | 
			
		||||
 * @param ret           Error code, or result state of the match.
 | 
			
		||||
 * @param error         Error message, if applicable.
 | 
			
		||||
 * @param maxLen        Maximum length of the error buffer.
 | 
			
		||||
 * @param flags         General flags for the regular expression.
 | 
			
		||||
 *                      i = Ignore case
 | 
			
		||||
 *                      m = Multilines (affects ^ and $ so that they match
 | 
			
		||||
 *                          the start/end of a line rather than matching the
 | 
			
		||||
 *                          start/end of the string).
 | 
			
		||||
 *                      s = Single line (affects . so that it matches any character,
 | 
			
		||||
 *                          even new line characters).
 | 
			
		||||
 *                      x = Pattern extension (ignore whitespace and # comments).
 | 
			
		||||
 *
 | 
			
		||||
 * @return              -2 = Matching error (error code is stored in ret)
 | 
			
		||||
 *                      -1 = Error in pattern (error message and offset # in error and ret)
 | 
			
		||||
 *                       0 = No match.
 | 
			
		||||
 *                      >1 = Handle for getting more information (via regex_substr)
 | 
			
		||||
 */
 | 
			
		||||
native Regex:regex_match(const string[], const pattern[], &ret, error[], maxLen, const flags[] = "");
 | 
			
		||||
native Regex:regex_match(const string[], const pattern[], &ret = 0, error[] = "", maxLen = 0, const flags[] = "");
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Returns a matched substring from a regex handle.
 | 
			
		||||
 * Substring ids start at 0 and end at ret-1, where ret is from the corresponding
 | 
			
		||||
 * regex_match or regex_match_c function call.
 | 
			
		||||
 *
 | 
			
		||||
 * @param id			The regex handle to extract data from.
 | 
			
		||||
 * @param str_id		The index of the expression to get - starts at 0, and ends at ret - 1.
 | 
			
		||||
 * @param buffer		The buffer to set to the matching substring.
 | 
			
		||||
 * @param maxLen		The maximum string length of the buffer.
 | 
			
		||||
 * @note  Substring ids start at 0 and end at ret - 1, where ret is from the corresponding
 | 
			
		||||
 *        regex_match* function call.
 | 
			
		||||
 *
 | 
			
		||||
 * @param id            The regex handle to extract data from.
 | 
			
		||||
 * @param str_id        The index of the expression to get - starts at 0, and ends at ret - 1.
 | 
			
		||||
 * @param buffer        The buffer to set to the matching substring.
 | 
			
		||||
 * @param maxLen        The maximum string length of the buffer.
 | 
			
		||||
 *
 | 
			
		||||
 * @return              1 on success, otherwise 0 on failure.
 | 
			
		||||
 */
 | 
			
		||||
native regex_substr(Regex:id, str_id, buffer[], maxLen);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Frees the memory associated with a regex result, and sets the handle to 0.
 | 
			
		||||
 * This must be called on all results from regex_match() when you are done extracting
 | 
			
		||||
 * the results with regex_substr().
 | 
			
		||||
 * The results of regex_compile() (and subsequently, regex_match_c()) only need to be freed
 | 
			
		||||
 * when you are done using the pattern.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  This must be called on all results from regex_match() when you are done extracting
 | 
			
		||||
 *        the results with regex_substr().
 | 
			
		||||
 *
 | 
			
		||||
 * @param id			The regex handle to free.
 | 
			
		||||
 * @note  The results of regex_compile() or regex_compile_ex() (and subsequently, regex_match_c())
 | 
			
		||||
 *        only need to be freed when you are done using the pattern.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  Do not use the handle again after freeing it!
 | 
			
		||||
 *
 | 
			
		||||
 * @param id            The regex handle to free.
 | 
			
		||||
 * @noreturn
 | 
			
		||||
 *
 | 
			
		||||
 * @note				Do not use the handle again after freeing it!
 | 
			
		||||
 */ 
 | 
			
		||||
 */
 | 
			
		||||
native regex_free(&Regex:id);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * The following natives are only available in 1.8.3 and above.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Precompile a regular expression.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  Use this if you intend on using the same expression multiple times.
 | 
			
		||||
 *        Pass the regex handle returned here to regex_match_c() to check for matches.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  Unlike regex_compile(), this allows you to use PCRE flags directly.
 | 
			
		||||
 *
 | 
			
		||||
 * @param pattern       The regular expression pattern.
 | 
			
		||||
 * @param flags         General flags for the regular expression, see PCRE_* defines.
 | 
			
		||||
 * @param error         Error message encountered, if applicable.
 | 
			
		||||
 * @param maxLen        Maximum string length of the error buffer.
 | 
			
		||||
 * @param errcode       Regex type error code encountered, if applicable. See REGEX_ERROR_* defines.
 | 
			
		||||
 *
 | 
			
		||||
 * @return              Valid regex handle (> 0) on success, or -1 on failure.
 | 
			
		||||
 */
 | 
			
		||||
native Regex:regex_compile_ex(const pattern[], flags = 0, error[]= "", maxLen = 0, &errcode = 0);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Matches a string against a pre-compiled regular expression pattern, matching all
 | 
			
		||||
 * occurrences of the pattern inside the string. This is similar to using the "g" flag
 | 
			
		||||
 * in perl regex.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  You should free the returned handle (with regex_free())
 | 
			
		||||
 *        when you are done with this pattern.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  Use the regex handle passed to this function to extract
 | 
			
		||||
 *        matches with regex_substr().
 | 
			
		||||
 *
 | 
			
		||||
 * @param pattern       The regular expression pattern.
 | 
			
		||||
 * @param string        The string to check.
 | 
			
		||||
 * @param ret           Error code, if applicable, or number of results on success.
 | 
			
		||||
 *                      See REGEX_ERROR_* defines.
 | 
			
		||||
 *
 | 
			
		||||
 * @return              -2 = Matching error (error code is stored in ret)
 | 
			
		||||
 *                       0 = No match.
 | 
			
		||||
 *                      >1 = Number of results.
 | 
			
		||||
 */
 | 
			
		||||
native regex_match_all_c(const string[], Regex:pattern, &ret = 0);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Matches a string against a regular expression pattern, matching all occurrences of the
 | 
			
		||||
 * pattern inside the string. This is similar to using the "g" flag in perl regex.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  If you intend on using the same regular expression pattern
 | 
			
		||||
 *        multiple times, consider using regex_compile and regex_match_ex
 | 
			
		||||
 *        instead of making this function reparse the expression each time.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  Flags only exist in amxmodx 1.8 and later.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  You should free the returned handle with regex_free()
 | 
			
		||||
 *        when you are done extracting all of the substrings.
 | 
			
		||||
 *
 | 
			
		||||
 * @param string        The string to check.
 | 
			
		||||
 * @param pattern       The regular expression pattern.
 | 
			
		||||
 * @param flags         General flags for the regular expression, see PCRE_* defines.
 | 
			
		||||
 * @param error         Error message encountered, if applicable.
 | 
			
		||||
 * @param maxLen        Maximum string length of the error buffer.
 | 
			
		||||
 * @param errcode       Regex type error code encountered, if applicable. See REGEX_ERROR_* defines.
 | 
			
		||||
 *
 | 
			
		||||
 * @return              -2 = Matching error (error code is stored in ret)
 | 
			
		||||
 *                      -1 = Error in pattern (error message and offset # in error and ret)
 | 
			
		||||
 *                       0 = No match.
 | 
			
		||||
 *                      >1 = Handle for getting more information (via regex_substr)
 | 
			
		||||
 */
 | 
			
		||||
native Regex:regex_match_all(const string[], const pattern[], flags = 0, error[]= "", maxLen = 0, &errcode = 0);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Matches a string against a regular expression pattern.
 | 
			
		||||
 *
 | 
			
		||||
 * @note  If you intend on using the same regular expression pattern
 | 
			
		||||
 *        multiple times, consider using compile regex_compile_ex and regex_match*
 | 
			
		||||
 *        instead of making this function reparse the expression each time.
 | 
			
		||||
 *
 | 
			
		||||
 * @param str           The string to check.
 | 
			
		||||
 * @param pattern       The regular expression pattern.
 | 
			
		||||
 * @param flags         General flags for the regular expression.
 | 
			
		||||
 * @param error         Error message, if applicable.
 | 
			
		||||
 * @param maxLen        Maximum length of the error buffer.
 | 
			
		||||
 * @param errcode       Regex type error code encountered, if applicable. See REGEX_ERROR_* defines.
 | 
			
		||||
 *
 | 
			
		||||
 * @return              -2 = Matching error (error code is stored in ret)
 | 
			
		||||
 *                      -1 = Pattern error (error code is stored in ret)
 | 
			
		||||
 *                       0 = No match.
 | 
			
		||||
 *                      >1 = Number of results.
 | 
			
		||||
 */
 | 
			
		||||
stock regex_match_simple(const str[], const pattern[], flags = 0, error[]= "", maxLen = 0, &errcode = 0)
 | 
			
		||||
{
 | 
			
		||||
	new Regex:regex = regex_compile_ex(pattern, flags, error, maxLen, errcode);
 | 
			
		||||
 | 
			
		||||
	if (regex < 0)
 | 
			
		||||
	{
 | 
			
		||||
		return -1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	new substrings = regex_match_c(regex, str);
 | 
			
		||||
 | 
			
		||||
	regex_free(regex);
 | 
			
		||||
 | 
			
		||||
	return substrings;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Flags used with regex_replace to control the replacement behavior.
 | 
			
		||||
 */
 | 
			
		||||
#define REGEX_FORMAT_DEFAULT   0       /* Uses the standard formatting rules to replace matches */
 | 
			
		||||
#define REGEX_FORMAT_NOCOPY    (1<<0)  /* The sections that do not match the regular expression are not copied when replacing matches. */
 | 
			
		||||
#define REGEX_FORMAT_FIRSTONLY (1<<1)  /* Only the first occurrence of a regular expression is replaced. */
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Perform a regular expression search and replace.
 | 
			
		||||
 *
 | 
			
		||||
 * An optional parameter, flags, allows you to specify options on how the replacement is performed.
 | 
			
		||||
 * Supported format specifiers for replace parameter:
 | 
			
		||||
 *   $number  : Substitutes the substring matched by group number.
 | 
			
		||||
 *              n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
 | 
			
		||||
 *   ${name}  : Substitutes the substring matched by the named group name (a maximum of 32 characters).
 | 
			
		||||
 *   $&       : Substitutes a copy of the whole match.
 | 
			
		||||
 *   $`       : Substitutes all the text of the input string before the match.
 | 
			
		||||
 *   $'       : Substitutes all the text of the input string after the match.
 | 
			
		||||
 *   $+       : Substitutes the last group that was captured.
 | 
			
		||||
 *   $_       : Substitutes the entire input string.
 | 
			
		||||
 *   $$       : Substitutes a literal "$".
 | 
			
		||||
 * As note, the character \ can be also used with format specifier, this is same hehavior as $.
 | 
			
		||||
 *
 | 
			
		||||
 * @param pattern       The regular expression pattern.
 | 
			
		||||
 * @param string        The string to check.
 | 
			
		||||
 * @param error         Error message, if applicable.
 | 
			
		||||
 * @param maxLen        Maximum length of the error buffer.
 | 
			
		||||
 * @param replace       The string will be used to replace any matches. See above for format specifiers.
 | 
			
		||||
 * @param flags         General flags to control how the string is replaced. See REGEX_FORMAT_* defines.
 | 
			
		||||
 * @param errcode       Regex type error code encountered, if applicable. See REGEX_ERROR_* defines.
 | 
			
		||||
 *
 | 
			
		||||
 * @return              -2 = Matching error (error code is stored in ret)
 | 
			
		||||
 *                       0 = No match.
 | 
			
		||||
 *                      >1 = Number of matches.
 | 
			
		||||
 */
 | 
			
		||||
native regex_replace(Regex:pattern, string[], maxLen, const replace[], flags = REGEX_FORMAT_DEFAULT, &errcode = 0);
 | 
			
		||||
							
								
								
									
										451
									
								
								plugins/testsuite/regex_test.sma
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										451
									
								
								plugins/testsuite/regex_test.sma
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,451 @@
 | 
			
		||||
#include <amxmodx>
 | 
			
		||||
#include <regex>
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Warning: To get expected result, file encoding must be UTF-8 without BOM.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
public plugin_init()
 | 
			
		||||
{
 | 
			
		||||
    register_plugin("Regex Test", AMXX_VERSION_STR, "AMXX Dev Team");
 | 
			
		||||
    register_srvcmd("regex_test", "OnServerCommand");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
new FailedCount;
 | 
			
		||||
new PassedCount;
 | 
			
		||||
 | 
			
		||||
test(const regex[], const replace[], const string[], const expectedString[], expectedCount = -1, regexFlags = 0, formatFlags = 0, bufferlen = -1)
 | 
			
		||||
{
 | 
			
		||||
    new errorCode, error[128];
 | 
			
		||||
    new Regex:r = regex_compile_ex(regex, regexFlags, error, charsmax(error), errorCode);
 | 
			
		||||
 | 
			
		||||
    if (r == REGEX_PATTERN_FAIL || errorCode)
 | 
			
		||||
    {
 | 
			
		||||
        server_print("^t^t#%d. Pattern fail : ^"%s^"(%d)", ++FailedCount + PassedCount, error, errorCode);
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
        new buffer[512];
 | 
			
		||||
        copy(buffer, charsmax(buffer), string);
 | 
			
		||||
 | 
			
		||||
        new errorCode;
 | 
			
		||||
        new count = regex_replace(r, buffer, bufferlen != -1 ?  bufferlen : charsmax(buffer), replace, formatFlags, errorCode);
 | 
			
		||||
 | 
			
		||||
        if (expectedCount != -1 && count != expectedCount)
 | 
			
		||||
        {
 | 
			
		||||
            server_print("^t^t#%d. Failed - count = %d, expected count = %d", ++FailedCount + PassedCount, count, expectedCount);
 | 
			
		||||
        }
 | 
			
		||||
        else if (!equal(buffer, expectedString))
 | 
			
		||||
        {
 | 
			
		||||
            server_print("^t^t#%d. Failed - output = %s, expected output = %s", ++FailedCount + PassedCount, buffer, expectedString);
 | 
			
		||||
        }
 | 
			
		||||
        else
 | 
			
		||||
        {
 | 
			
		||||
            ++PassedCount;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        regex_free(r);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
end()
 | 
			
		||||
{
 | 
			
		||||
    server_print("Tests successful: %d/%d", PassedCount, PassedCount + FailedCount);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
public OnServerCommand()
 | 
			
		||||
{
 | 
			
		||||
    server_print("Testing regex_replace()");
 | 
			
		||||
 | 
			
		||||
    server_print("^tChecking count...");
 | 
			
		||||
    {
 | 
			
		||||
        test( .regex          = "(([0-9a-z]+)-([0-9]+))-(([0-9]+)-([0-9]+))",
 | 
			
		||||
              .replace        = "xxxx",
 | 
			
		||||
              .string         = "1-2-3-4 a-2-3-4 1-a-3-4 1-2-a-4 1-2-3-a a-a-a-a 4-3-2-1 100-200-300-400-500-600-700-800",
 | 
			
		||||
              .expectedString = "xxxx xxxx 1-a-3-4 1-2-a-4 1-2-3-a a-a-a-a xxxx xxxx-xxxx",
 | 
			
		||||
              .expectedCount  = 5
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
        test( .regex          = "([a-z]+)",
 | 
			
		||||
              .replace        = "xxxx",
 | 
			
		||||
              .string         = "Here must only number like 42 and 13 appear",
 | 
			
		||||
              .expectedString = "Hxxxx xxxx xxxx xxxx xxxx 42 xxxx 13 xxxx",
 | 
			
		||||
              .expectedCount  = 7
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
        test( .regex          = "((V(I|1)(4|A)GR(4|A))|(V(I|1)C(0|O)D(I|1)(N|\/\\\/)))", .regexFlags = PCRE_CASELESS,
 | 
			
		||||
              .replace        = "...",
 | 
			
		||||
              .string         = "Viagra V14GR4 Vicodin V1C0D1/\/ v1c0d1/|/",
 | 
			
		||||
              .expectedString = "... ... ... ... v1c0d1/|/",
 | 
			
		||||
              .expectedCount  = 4
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
        test( .regex          = "\[(right)\](((?R)|[^^[]+?|\[)*)\[/\\1\]", .regexFlags = PCRE_CASELESS | PCRE_UNGREEDY,
 | 
			
		||||
              .replace        = "",
 | 
			
		||||
              .string         = "[CODE]<td align="$stylevar[right]">[/CODE]",
 | 
			
		||||
              .expectedString = "[CODE]<td align="$stylevar[right]">[/CODE]",
 | 
			
		||||
              .expectedCount  = 0
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
        test( .regex          = "- This is a string$",
 | 
			
		||||
              .replace        = "This shouldn\'t work",
 | 
			
		||||
              .string         = "123456789 - Hello, world -           This is a string.",
 | 
			
		||||
              .expectedString = "123456789 - Hello, world -           This is a string.",
 | 
			
		||||
              .expectedCount  = 0
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
        test( .regex          = "[0-35-9]",
 | 
			
		||||
              .replace        = "4",
 | 
			
		||||
              .string         = "123456789 - Hello, world -           This is a string.",
 | 
			
		||||
              .expectedString = "444444444 - Hello, world -           This is a string.",
 | 
			
		||||
              .expectedCount  = 8
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
        test( .regex          = "\b[hH]\w{2,4}",
 | 
			
		||||
              .replace        = "Bonjour",
 | 
			
		||||
              .string         = "123456789 - Hello, world -           This is a string.",
 | 
			
		||||
              .expectedString = "123456789 - Bonjour, world -           This is a string.",
 | 
			
		||||
              .expectedCount  = 1
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
        test( .regex          = "(\w)\s*-\s*(\w)",
 | 
			
		||||
              .replace        = "$1. $2",
 | 
			
		||||
              .string         = "123456789 - Hello, world -           This is a string.",
 | 
			
		||||
              .expectedString = "123456789. Hello, world. This is a string.",
 | 
			
		||||
              .expectedCount  = 2
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
        test( .regex          = "([a-z]\w+)@(\w+)\.(\w+)\.([a-z]{2,})",
 | 
			
		||||
              .replace        = "$1 at $2 dot $3 dot $4",
 | 
			
		||||
              .string         = "josmessa@uk.ibm.com",
 | 
			
		||||
              .expectedString = "josmessa at uk dot ibm dot com",
 | 
			
		||||
              .expectedCount  = 1
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
        test( .regex          = "\b\w{1}s",
 | 
			
		||||
              .replace        = "test",
 | 
			
		||||
              .string         = "This is a string. (0-9) as well as parentheses",
 | 
			
		||||
              .expectedString = "This test a string. (0-9) test well test parentheses",
 | 
			
		||||
              .expectedCount  = 3
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        test( .regex          = "(\d{1})-(\d{1})",
 | 
			
		||||
              .replace        = "$1 to $2",
 | 
			
		||||
              .string         = "This is a string. It contains numbers (0-9) as well as parentheses and some other things!",
 | 
			
		||||
              .expectedString = "This is a string. It contains numbers (0 to 9) as well as parentheses and some other things!",
 | 
			
		||||
              .expectedCount  = 1
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
        test( .regex          = "[\(!\)]",
 | 
			
		||||
              .replace        = "*",
 | 
			
		||||
              .string         = "This is a string. It contains numbers (0-9) as well as parentheses and some other things!",
 | 
			
		||||
              .expectedString = "This is a string. It contains numbers *0-9* as well as parentheses and some other things*",
 | 
			
		||||
              .expectedCount  = 3
 | 
			
		||||
            );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    server_print("^tChecking edges cases...");
 | 
			
		||||
    {
 | 
			
		||||
        test(.regex = "[0-9]+",    .replace = "*",   .string = "",     .expectedString = "",    .expectedCount  = 0);
 | 
			
		||||
        test(.regex = "([0-9]+)",  .replace = "",    .string = "123",  .expectedString = "",    .expectedCount  = 1);
 | 
			
		||||
        test(.regex = "a",         .replace = "\",   .string = "a",    .expectedString = "\",   .expectedCount  = 1);
 | 
			
		||||
        test(.regex = "^^",        .replace = "x",   .string = "a",    .expectedString = "xa",  .expectedCount  = 1);
 | 
			
		||||
        test(.regex = "b",         .replace = "\",   .string = "b",    .expectedString = "\",   .expectedCount  = 1, .bufferlen = 1);
 | 
			
		||||
        test(.regex = "b",         .replace = "^^",  .string = "b",    .expectedString = "b",   .expectedCount  = 0, .bufferlen = 0);
 | 
			
		||||
        test(.regex = "\w+",       .replace = "123", .string = "abc",  .expectedString = "12",  .expectedCount  = 1, .bufferlen = 2);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    server_print("^tChecking UTF-8 support...");
 | 
			
		||||
    {
 | 
			
		||||
        test(.regex = "(\w+)",  .replace = "*",  .string = "éà@É",  .expectedString = "éà@É",  .expectedCount = 0);
 | 
			
		||||
        test(.regex = "(\w+)",  .replace = "*",  .string = "éà@É",  .expectedString = "*@*",   .expectedCount = 2,  .regexFlags = PCRE_UCP | PCRE_UTF8);
 | 
			
		||||
        test(.regex = "(\w+)",  .replace = "字", .string = "éà@É",  .expectedString = "字@字",.expectedCount = 2,  .regexFlags = PCRE_UCP | PCRE_UTF8);
 | 
			
		||||
        test(.regex = "(\w+)",  .replace = "字", .string = "éà@É",  .expectedString = "字",   .expectedCount = 2,  .regexFlags = PCRE_UCP | PCRE_UTF8, .bufferlen = 3);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    server_print("^tChecking substitutions...");
 | 
			
		||||
    {
 | 
			
		||||
        test(.regex = "x",            .replace = "y",             .string = "text",     .expectedString = "teyt"      );
 | 
			
		||||
        test(.regex = "x",            .replace = "$",             .string = "text",     .expectedString = "te$t"      );
 | 
			
		||||
        test(.regex = "x",            .replace = "$1",            .string = "text",     .expectedString = "te$1t"     );
 | 
			
		||||
        test(.regex = "x",            .replace = "${1",           .string = "text",     .expectedString = "te${1t"    );
 | 
			
		||||
        test(.regex = "x",            .replace = "${",            .string = "text",     .expectedString = "te${t"     );
 | 
			
		||||
        test(.regex = "x",            .replace = "${$0",          .string = "text",     .expectedString = "te${xt"    );
 | 
			
		||||
        test(.regex = "x",            .replace = "${1}",          .string = "text",     .expectedString = "te${1}t"   );
 | 
			
		||||
        test(.regex = "x",            .replace = "${1}",          .string = "text",     .expectedString = "te${1}t"   );
 | 
			
		||||
        test(.regex = "x",            .replace = "$5",            .string = "text",     .expectedString = "te$5t"     );
 | 
			
		||||
        test(.regex = "x",            .replace = "$5",            .string = "te(x)t",   .expectedString = "te($5)t"   );
 | 
			
		||||
        test(.regex = "x",            .replace = "${foo",         .string = "text",     .expectedString = "te${foot"  );
 | 
			
		||||
        test(.regex = "(x)",          .replace = "$5",            .string = "text",     .expectedString = "te$5t"     );
 | 
			
		||||
        test(.regex = "(x)",          .replace = "$1",            .string = "text",     .expectedString = "text"      );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "$1",            .string = "text",     .expectedString = "txt"       );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "$5",            .string = "text",     .expectedString = "t$5t"      );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "$4",            .string = "text",     .expectedString = "t$4t"      );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "$3",            .string = "text",     .expectedString = "t$3t"      );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "${1}",          .string = "text",     .expectedString = "txt"       );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "${3}",          .string = "text",     .expectedString = "t${3}t"    );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "${1}${3}",      .string = "text",     .expectedString = "tx${3}t"   );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "${1}${name}",   .string = "text",     .expectedString = "tx${name}t");
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "${1}${name}",   .string = "text",     .expectedString = "tx${name}t");
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "${1}${foo}",    .string = "text",     .expectedString = "txxt"      );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "${goll}${foo}", .string = "text",     .expectedString = "t${goll}xt");
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "${goll${foo}",  .string = "text",     .expectedString = "t${gollxt" );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "${goll${foo}}", .string = "text",     .expectedString = "t${gollx}t");
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "$${foo}}",      .string = "text",     .expectedString = "t${foo}}t" );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "${${foo}}",     .string = "text",     .expectedString = "t${x}t"    );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "$${foo}}",      .string = "text",     .expectedString = "t${foo}}t" );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "$${bfoo}}",     .string = "text",     .expectedString = "t${bfoo}}t");
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "$${foo}}",      .string = "text",     .expectedString = "t${foo}}t" );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "$${foo}",       .string = "text",     .expectedString = "t${foo}t"  );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "$$",            .string = "text",     .expectedString = "t$t"       );
 | 
			
		||||
        test(.regex = "(e)(?<foo>x)", .replace = "${foo}$1$2",    .string = "text",     .expectedString = "txext"     );
 | 
			
		||||
        test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2",    .string = "text",     .expectedString = "teext"     );
 | 
			
		||||
        test(.regex = "(e)(?<foo>x)", .replace = "${foo}$1$2$+",  .string = "text",     .expectedString = "txexxt"    );
 | 
			
		||||
        test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$+",  .string = "text",     .expectedString = "teexxt"    );
 | 
			
		||||
        test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$_",  .string = "texts",    .expectedString = "teextextsts");
 | 
			
		||||
        test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$`",  .string = "texts",    .expectedString = "teextts"   ),
 | 
			
		||||
        test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$'",  .string = "texts",    .expectedString = "teextsts"  ),
 | 
			
		||||
        test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$&",  .string = "texts",    .expectedString = "teexexts"  ),
 | 
			
		||||
        test(.regex = "x",            .replace = "y",             .string = "text",     .expectedString = "teyt"      );
 | 
			
		||||
        test(.regex = "x",            .replace = "$",             .string = "text",     .expectedString = "te$t"      );
 | 
			
		||||
        test(.regex = "x",            .replace = "$1",            .string = "text",     .expectedString = "te$1t"     );
 | 
			
		||||
        test(.regex = "x",            .replace = "${1}",          .string = "text",     .expectedString = "te${1}t"   );
 | 
			
		||||
        test(.regex = "x",            .replace = "$5",            .string = "text",     .expectedString = "te$5t"     );
 | 
			
		||||
        test(.regex = "x",            .replace = "$5",            .string = "te(x)t",   .expectedString = "te($5)t"   );
 | 
			
		||||
        test(.regex = "x",            .replace = "${foo",         .string = "text",     .expectedString = "te${foot"  );
 | 
			
		||||
        test(.regex = "(x)",          .replace = "$5",            .string = "text",     .expectedString = "te$5t"     );
 | 
			
		||||
        test(.regex = "(x)",          .replace = "$1",            .string = "text",     .expectedString = "text"      );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "$1",            .string = "text",     .expectedString = "txt"       );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "$5",            .string = "text",     .expectedString = "t$5t"      );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "$4",            .string = "text",     .expectedString = "t$4t"      );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "$3",            .string = "text",     .expectedString = "t$3t"      );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "${1}",          .string = "text",     .expectedString = "txt"       );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "${3}",          .string = "text",     .expectedString = "t${3}t"    );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "${1}${3}",      .string = "text",     .expectedString = "tx${3}t"   );
 | 
			
		||||
        test(.regex = "e(x)",         .replace = "${1}${name}",   .string = "text",     .expectedString = "tx${name}t");
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "${1}${name}",   .string = "text",     .expectedString = "tx${name}t");
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "${1}${foo}",    .string = "text",     .expectedString = "txxt"      );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "${goll}${foo}", .string = "text",     .expectedString = "t${goll}xt");
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "${goll${foo}",  .string = "text",     .expectedString = "t${gollxt" );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "${goll${foo}}", .string = "text",     .expectedString = "t${gollx}t");
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "$${foo}}",      .string = "text",     .expectedString = "t${foo}}t" );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "${${foo}}",     .string = "text",     .expectedString = "t${x}t"    );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "$${foo}}",      .string = "text",     .expectedString = "t${foo}}t" );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "$${bfoo}}",     .string = "text",     .expectedString = "t${bfoo}}t");
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "$${foo}}",      .string = "text",     .expectedString = "t${foo}}t" );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "$${foo}",       .string = "text",     .expectedString = "t${foo}t"  );
 | 
			
		||||
        test(.regex = "e(?<foo>x)",   .replace = "$$",            .string = "text",     .expectedString = "t$t"       );
 | 
			
		||||
        test(.regex = "(e)(?<foo>x)", .replace = "${foo}$1$2",    .string = "text",     .expectedString = "txext"     );
 | 
			
		||||
        test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2",    .string = "text",     .expectedString = "teext"     );
 | 
			
		||||
        test(.regex = "(e)(?<foo>x)", .replace = "${foo}$1$2$+",  .string = "text",     .expectedString = "txexxt"    );
 | 
			
		||||
        test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$+",  .string = "text",     .expectedString = "teexxt"    );
 | 
			
		||||
        test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$_",  .string = "texts",    .expectedString = "teextextsts");
 | 
			
		||||
        test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$`",  .string = "texts",    .expectedString = "teextts"   );
 | 
			
		||||
        test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$'",  .string = "texts",    .expectedString = "teextsts"  );
 | 
			
		||||
        test(.regex = "(?<foo>e)(x)", .replace = "${foo}$1$2$&",  .string = "texts",    .expectedString = "teexexts"  );
 | 
			
		||||
        test(.regex = "<(.+?)>",                .replace = "[$0:$1]",     .string = "<i>am not</i>", .expectedString = "[<i>:i]am not[</i>:/i]");
 | 
			
		||||
        test(.regex =  "(?<foo>e)(?<foo>x)",    .replace = "${foo}$1$2",  .string = "text",      .expectedString = "teext", .regexFlags = PCRE_DUPNAMES);
 | 
			
		||||
        test(.regex = "\b(\w+)(\s)(\w+)\b",     .replace = "$3$2$1",      .string = "one two",   .expectedString = "two one");
 | 
			
		||||
        test(.regex = "\b(\d+)\s?USD",          .replace = "$$$1",        .string = "103 USD",   .expectedString = "$103"   );
 | 
			
		||||
        test(.regex = "\b(?<w1>\w+)(\s)(?<w2>\w+)\b", .replace = "${w2} ${w1}", .string = "one two", .expectedString = "two one");
 | 
			
		||||
        test(.regex = "(\$*(\d*(\.+\d+)?){1})", .replace = "**$&",        .string = "$1.30",     .expectedString = "**$1.30**");
 | 
			
		||||
        test(.regex = "B+",                     .replace = "$`",          .string = "AABBCC",    .expectedString = "AAAACC");
 | 
			
		||||
        test(.regex = "B+",                     .replace = "$'",          .string = "AABBCC",    .expectedString = "AACCCC");
 | 
			
		||||
        test(.regex = "B+(C+)",                 .replace = "$+",          .string = "AABBCCDD",  .expectedString = "AACCDD");
 | 
			
		||||
        test(.regex = "B+",                     .replace = "$_",          .string = "AABBCC",    .expectedString = "AAAABBCCCC");
 | 
			
		||||
        test(.regex = "(F)(2)(3)(4)(5)(6)(7)(8)(9)(10)(L)\11",  .replace = "${S}$11$1", .string = "F2345678910L71", .expectedString = "F2345678910L71"),
 | 
			
		||||
        test(.regex = "(F)(2)(3)(4)(5)(6)(7)(8)(9)(10)(L)\11",  .replace = "${S}$11$1", .string = "F2345678910LL1", .expectedString = "${S}LF1");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    server_print("^tChecking moar #1...");
 | 
			
		||||
    {
 | 
			
		||||
        test(.string = "(?(w)a|o)"       , .regex = "\(\?\(\w+\).*\|?.*\)"                              , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?(w)|o)"        , .regex = "\(\?\(\w+\).*\|?.*\)"                              , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?(w)a)"         , .regex = "\(\?\(\w+\).*\|?.*\)"                              , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?(w)a|)"        , .regex = "\(\?\(\w+\).*\|?.*\)"                              , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?(w)?|a|o)"     , .regex = "\(\?\(\w+\).*\|?.*\)"                              , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?(w)||o)"       , .regex = "\(\?\(\w+\).*\|?.*\)"                              , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?(w)(a)"        , .regex = "\(\?\(\w+\).*\|?.*\)"                              , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?(w))\a|)"      , .regex = "\(\?\(\w+\).*\|?.*\)"                              , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?(2)a|o)"       , .regex = "\(\?\([^^\)]+\).*\|?.*\)"                          , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?(|)a|o)"       , .regex = "\(\?\([^^\)]+\).*\|?.*\)"                          , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "a\3b"            , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\(\d+)"          , .replace = "\5", .expectedString = "a\5b");
 | 
			
		||||
        test(.string = "\3b"             , .regex = "(?=(?:\A|[^^\\])(?:[\\]{2}){0,3})\\(\d+)"          , .replace = "\5", .expectedString = "\5b");
 | 
			
		||||
        test(.string = "\\\3b"           , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\(\d+)"          , .replace = "\5", .expectedString = "\\\5b");
 | 
			
		||||
        test(.string = "\\\k<g>"         , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k<(\w+)>"       , .replace = "\5", .expectedString = "\\\5");
 | 
			
		||||
        test(.string = "\\\\k'g'"        , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w+)'"       , .replace = "\5", .expectedString = "\\\\k'g'");
 | 
			
		||||
        test(.string = "a\\\\k'g'"       , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w+)'"       , .replace = "\5", .expectedString = "a\\\\k'g'");
 | 
			
		||||
        test(.string = "\k'g'"           , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w+)'"       , .replace = "\5", .expectedString = "\5");
 | 
			
		||||
        test(.string = "(?<n1-n2>)"      , .regex = "\(\?<[A-Za-z]\w*-[A-Za-z]\w*>.*\)"                 , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?'n1-n2'a)"     , .regex = "\(\?'[A-Za-z]\w*-[A-Za-z]\w*'.*\)"                 , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "\p{Isa}"         , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\\[pP]\{)Is(?=\w+\})", .replace = "In", .expectedString = "\p{Ina}");
 | 
			
		||||
        test(.string = "\p{Is}"          , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\\[pP]\{)Is(?=\w+\})", .replace = "In", .expectedString = "\p{Is}");
 | 
			
		||||
        test(.string = "\p{Isa"          , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\\[pP]\{)Is(?=\w+\})", .replace = "In", .expectedString = "\p{Isa");
 | 
			
		||||
        test(.string = "a(?#|)"          , .regex = "\(\?#[^^\)]*\)"                                    , .replace = "", .expectedString = "a");
 | 
			
		||||
        test(.string = "(?#|)"           , .regex = "\(\?#[^^\)]*\)"                                    , .replace = "", .expectedString = "");
 | 
			
		||||
        test(.string = "(?#|)"           , .regex = "\#[^^\n\r]*"                                       , .replace = "", .expectedString = "(?");
 | 
			
		||||
        test(.string = "(?inm-xs:\#)"    , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(r");
 | 
			
		||||
        test(.string = "(?ni:())"        , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(r)");
 | 
			
		||||
        test(.string = "(?x-i:)"         , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(?x-i:)");
 | 
			
		||||
        test(.string = "(?n:))"          , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?[imsx]*n[-imsx]*:[^^\)]+\)", .replace = "r", .expectedString = "(?n:))");
 | 
			
		||||
        test(.string = "(?<n1>)"         , .regex = "\(\?<[A-Za-z]\w*>.*\)"                             , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?'n1'y)"        , .regex = "\(\?'[A-Za-z]\w*'.*\)"                             , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?<45>y)"        , .regex = "\(\?<\d+>.*\)"                                     , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?'7'o)"         , .regex = "\(\?'\d+'.*\)"                                     , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "\\\("            , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\("             , .replace = "r", .expectedString = "\\r");
 | 
			
		||||
        test(.string = "a\\\("           , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\("             , .replace = "r", .expectedString = "a\\r");
 | 
			
		||||
        test(.string = "\\("             , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\("             , .replace = "r", .expectedString = "\r");
 | 
			
		||||
        test(.string = "a\\("            , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\("             , .replace = "r", .expectedString = "a\r");
 | 
			
		||||
        test(.string = "\("              , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\("             , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "a\("             , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\\("             , .replace = "r", .expectedString = "ar");
 | 
			
		||||
        test(.string = "?:"              , .regex = "(?:^^\?[:imnsx=!>-]|^^\?<[!=])"                    , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "?<!"             , .regex = "(?:^^\?[:imnsx=!>-]|^^\?<[!=])"                    , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "?-"              , .regex = "(?:^^\?[:imnsx=!>-]|^^\?<[!=])"                    , .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "\(?<n>"          , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?<[A-Za-z]\w*>", .replace = "r", .expectedString = "\(r");
 | 
			
		||||
        test(.string = "a\(?'n'"         , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?'[A-Za-z]\w*'", .replace = "r", .expectedString = "a\(r");
 | 
			
		||||
        test(.string = "\\(?<2>"         , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?<\d+>"        , .replace = "r", .expectedString = "\\(r");
 | 
			
		||||
        test(.string = "(?'2'"           , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3}\()\?'\d+'"        , .replace = "r", .expectedString = "(r");
 | 
			
		||||
        test(.string = "\[\b]"           , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\u8]");
 | 
			
		||||
        test(.string = "\[a\bb]"         , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[a\u8b]");
 | 
			
		||||
        test(.string = "\[\b]"           , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\u8]");
 | 
			
		||||
        test(.string = "\[\b]"           , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\u8]");
 | 
			
		||||
        test(.string = "\[\\b]"          , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\\b(?=[^^\[\]]*\])", .replace = "\\u8", .expectedString = "\[\\u8]");
 | 
			
		||||
        test(.string = "[[]"             , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\\[" , .expectedString = "[\[]");
 | 
			
		||||
        test(.string = "\[[]"            , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\["  , .expectedString = "\[[]");
 | 
			
		||||
        test(.string = "\[\[]"           , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\["  , .expectedString = "\[\[]");
 | 
			
		||||
        test(.string = "\[\[]"           , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2})*(?:\[|\[[^^\[\]]*[^^\[\]\\])(?:[\\]{2})*)\[(?=[^^\[\]]*\])" , .replace = "\["  , .expectedString = "\[\[]");
 | 
			
		||||
        test(.string = "\{"              , .regex = "(?=(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\\{");
 | 
			
		||||
        test(.string = "\{"              , .regex = "(?=(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\\{");
 | 
			
		||||
        test(.string = "\{1,2}"          , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\{1,2}");
 | 
			
		||||
        test(.string = "\{1}"            , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\{1}");
 | 
			
		||||
        test(.string = "\{1,}"           , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\{", .expectedString = "\{1,}");
 | 
			
		||||
        test(.string = "\{1"             , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2})*)\{(?!\d\d*(,(\d\d*)?)?\})", .replace = "\\{", .expectedString = "\{1");
 | 
			
		||||
        test(.string = "\\(?!{1}"        , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "\5", .expectedString = "?!");
 | 
			
		||||
        test(.string = "{1}"             , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "({1}"            , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "(?{1}"           , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "(?{1}");
 | 
			
		||||
        test(.string = "(?:{1}"          , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "r");
 | 
			
		||||
        test(.string = "\({1}"           , .regex = "(\A|((\A|[^^\\])([\\]{2})*\((\?([:>=!]|<([=!]|(\w+>))))?))\{\d+(,(\d+)?)?\}", .replace = "r", .expectedString = "\({1}");
 | 
			
		||||
        test(.string = "\p{Isa}"         , .regex = "(?!\\[pP]\{)Is(?=\w+\})"                       , .replace = "In", .expectedString = "\p{Ina}");
 | 
			
		||||
        test(.string = "\p{Is}"          , .regex = "(?!\\[pP]\{)Is(?=\w+\})"                       , .replace = "In", .expectedString = "\p{Is}");
 | 
			
		||||
        test(.string = "\p{Isa"          , .regex = "(?!\\[pP]\{)Is(?=\w+\})"                       , .replace = "In", .expectedString = "\p{Isa");
 | 
			
		||||
        test(.string = "\}"              , .regex = "(?!(\\A|[^^\\])(\\{2})*\\{\\d\\d*(,(\\d\\d*)?)?)\\}", .replace = "\\}", .expectedString = "\}");
 | 
			
		||||
        test(.string = "{\}"             , .regex = "(?!(\A|[^^\^^])(\^^{2})*\{\d\d*(,(\d\d*)?)?)\}", .replace = "\\}", .expectedString = "{\\}");
 | 
			
		||||
        test(.string = "{1,2}"           , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}"  , .replace = "\\}", .expectedString = "{1,2\}");
 | 
			
		||||
        test(.string = "\{1}"            , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}"  , .replace = "\\}", .expectedString = "\{1\}");
 | 
			
		||||
        test(.string = "\{1\}"           , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}"  , .replace = "\\}", .expectedString = "\{1\\}");
 | 
			
		||||
        test(.string = "\{1}"            , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}"  , .replace = "\\}", .expectedString = "\{1\}");
 | 
			
		||||
        test(.string = "{1,}"            , .regex = "(?!(\A|[^^\\])(\\{2})*\{\d\d*(,(\d\d*)?)?)\}"  , .replace = "\\}", .expectedString = "{1,\}");
 | 
			
		||||
        test(.string = "a(?<!b*c)"       , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)"      , .replace = "r",   .expectedString = "ar");
 | 
			
		||||
        test(.string = "a(?<!b+c)"       , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)"      , .replace = "r",   .expectedString = "ar");
 | 
			
		||||
        test(.string = "(?<!b{1}c))"     , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)"      , .replace = "r",   .expectedString = "(?<!b{1}c))");
 | 
			
		||||
        test(.string = "(?<!b{1,}c)"     , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)"      , .replace = "r",   .expectedString = "r");
 | 
			
		||||
        test(.string = "(?<!b{1,4}c)"    , .regex = "\(\?\<[=!][^^\)]*(?:[\*\+]|\{\d+,\}).*\)"      , .replace = "r",   .expectedString = "(?<!b{1,4}c)");
 | 
			
		||||
        test(.string = "a\3b"            , .regex = "\\(\d+)"                                       , .replace = "\5",  .expectedString = "a\5b");
 | 
			
		||||
        test(.string = "\3b"             , .regex = "\\(\d+)"                                       , .replace = "\5",  .expectedString = "\5b");
 | 
			
		||||
        test(.string = "\\3b"            , .regex = "(?!\\\\)\\(\d)"                                , .replace = "\5", . expectedString = "\\5b");
 | 
			
		||||
        test(.string = "a\\3b"           , .regex = "(?:(\\){0,3})\\(\d)"                           , .replace = "\5", . expectedString = "a\5b");
 | 
			
		||||
        test(.string = "\\k<g>"          , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k<(\w)>"    , .replace = "\5",  .expectedString = "\\5");
 | 
			
		||||
        test(.string = "a\\k<g>"         , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k<(\w)>"    , .replace = "\5",  .expectedString = "a\\5");
 | 
			
		||||
        test(.string = "\\k'g'"          , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w)'"    , .replace = "\5",  .expectedString = "\\5");
 | 
			
		||||
        test(.string = "a\\k'g'"         , .regex = "(?!(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w)'"    , .replace = "\5",  .expectedString = "a\\5");
 | 
			
		||||
        test(.string = "\k'g'"           , .regex = "(?:(?:\A|[^^\\])(?:[\\]{2}){0,3})\\k'(\w)'"    , .replace = "\5",  .expectedString = "\5");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    server_print("^tChecking moar #2...");
 | 
			
		||||
    {
 | 
			
		||||
        test(.regex          = "^^((?>[a-zA-Z\d!#$%&'*+\-\/=?^^_`{|}~]+\x20*|^"((?=[\x01-\x7f])[^^^"\\]|\\[\x01-\x7f])*^"\x20*)*(?<angle><))?((?!\.)(?>\.?[a-zA-Z\d!#$%&'*+\-\/=?^^_`{|}~]+)+|^"((?=[\x01-\x7f])[^^^"\\]|\\[\x01-\x7f])*^")@(((?!-)[a-zA-Z\d\-]+(?<!-)\.)+[a-zA-Z]{2,}|\[(((?(?<!\[)\.)(25[0-5]|2[0-4]\d|[01]?\d?\d)){4}|[a-zA-Z\d\-]*[a-zA-Z\d]:((?=[\x01-\x7f])[^^\\\[\]]|\\[\x01-\x7f])+)\])(?(angle)>)$"    ,
 | 
			
		||||
             .replace        = "$1$4@$7net>",
 | 
			
		||||
             .string         = "Name Surname <name.surname@blah.com>",
 | 
			
		||||
             .expectedString = "Name Surname <name.surname@blah.net>"
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
        test(.regex         = "([A-Z])\w+",
 | 
			
		||||
             .replace       = "*snip*",
 | 
			
		||||
             .string        = "Welcome to RegExr v2.0 by gskinner.com!\
 | 
			
		||||
                                                                   \
 | 
			
		||||
                            Edit the Expression & Text to see matches. Roll over matches or the expression for details. Undo mistakes with ctrl-z. Save & Share expressions with friends or the Community. A full Reference & Help is available in the Library, or watch the video Tutorial.\
 | 
			
		||||
                                                                   \
 | 
			
		||||
                            Sample text for testing:\
 | 
			
		||||
                            abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ\
 | 
			
		||||
                            :0123456789 +-.,!@#$%^^&*();\/|<>^"'\
 | 
			
		||||
                            12345 -98.7 3.141 .6180 9,000 +42\
 | 
			
		||||
                            555.123.4567    +1-(800)-555-2468\
 | 
			
		||||
                            foo@demo.net    bar.ba@test.co.uk\
 | 
			
		||||
                            www.demo.com    http://foo.co.uk/\
 | 
			
		||||
                            http://regexr.com/foo.html?q=bar",
 | 
			
		||||
 | 
			
		||||
             .expectedString = "*snip* to *snip* v2.0 by gskinner.com!\
 | 
			
		||||
                                                                   \
 | 
			
		||||
                            *snip* the *snip* & *snip* to see matches. *snip* over matches or the expression for details. *snip* mistakes with ctrl-z. *snip* & *snip* expressions with friends or the *snip*. A full *snip* & *snip* is available in the *snip*, or watch the video *snip*.\
 | 
			
		||||
                                                                   \
 | 
			
		||||
                            *snip* text for testing:\
 | 
			
		||||
                            abcdefghijklmnopqrstuvwxyz *snip*\
 | 
			
		||||
                            :0123456789 +-.,!@#$%^^&*();\/|<>^"'\
 | 
			
		||||
                            12345 -98.7 3.141 .6180 9,000 +42\
 | 
			
		||||
                            555.123.4567    +1-(800)-555-2468\
 | 
			
		||||
                            foo@demo.net    bar.ba@test.co.uk\
 | 
			
		||||
                            www.de",
 | 
			
		||||
             .regexFlags = PCRE_EXTENDED
 | 
			
		||||
         );
 | 
			
		||||
 | 
			
		||||
        test(.regex          = "/\*(?>[^^*/]+|\*[^^/]|/[^^*]|/\*(?>[^^*/]+|\*[^^/]|/[^^*])*\*/)*\*/",
 | 
			
		||||
             .replace        = "",
 | 
			
		||||
             .string         = "/* comment */\
 | 
			
		||||
                                no comment\
 | 
			
		||||
                                /* comment\
 | 
			
		||||
                                   spanning\
 | 
			
		||||
                                   multiple\
 | 
			
		||||
                                   lines */\
 | 
			
		||||
                                /* comment /* nesting */ of /* two */ levels supported */\
 | 
			
		||||
                                /* comment /* nesting */ of /* /* more than */ two levels */ not supported */",
 | 
			
		||||
             .expectedString = "no comment\
 | 
			
		||||
                                /* comment  of  not supported */"
 | 
			
		||||
         );
 | 
			
		||||
 | 
			
		||||
        test(.regex          = "\b(?<protocol>https?|ftp)://(?<domain>[A-Z0-9.-]+)(?<file>/[A-Z0-9+&@#/%=~_|!:,.;-]*)?(?<parameters>\?[A-Z0-9+&@#/%=~_|!:,.;]*)?",
 | 
			
		||||
             .replace        = "${protocol}s://site.com${file}^n",
 | 
			
		||||
             .string         = "http://www.alliedmods.net http://www.alliedmods.net/ http://www.alliedmods.net/test.php http://www.alliedmods.net/index.php?secret=x Something interesting at http://www.alliedmods.net.",
 | 
			
		||||
             .expectedString = "https://site.com^nhttps://site.com/^nhttps://site.com/test.php^nhttps://site.com/index.php^nhttps://site.com^n",
 | 
			
		||||
             .regexFlags     = PCRE_CASELESS | PCRE_EXTENDED,
 | 
			
		||||
             .formatFlags    = REGEX_FORMAT_NOCOPY
 | 
			
		||||
         );
 | 
			
		||||
 | 
			
		||||
        test(.regex          = "\b(https?|ftp)://([A-Z0-9.-]+)(/[A-Z0-9+&@#/%=~_|!:,.;-]*)?(\?[A-Z0-9+&@#/%=~_|!:,.;]*)?",
 | 
			
		||||
             .replace        = "$1s://site.com$3^n",
 | 
			
		||||
             .string         = "http://www.alliedmods.net http://www.alliedmods.net/ http://www.alliedmods.net/test.php http://www.alliedmods.net/index.php?secret=x Something interesting at http://www.alliedmods.net.",
 | 
			
		||||
             .expectedString = "https://site.com^nhttps://site.com/^nhttps://site.com/test.php^nhttps://site.com/index.php^nhttps://site.com^n",
 | 
			
		||||
             .regexFlags     = PCRE_CASELESS | PCRE_EXTENDED,
 | 
			
		||||
             .formatFlags    = REGEX_FORMAT_NOCOPY
 | 
			
		||||
         );
 | 
			
		||||
 | 
			
		||||
        test(.regex          = "\b(https?|ftp)://([A-Z0-9.-]+)(/[A-Z0-9+&@#/%=~_|!:,.;-]*)?(\?[A-Z0-9+&@#/%=~_|!:,.;]*)?",
 | 
			
		||||
             .replace        = "$1s://site.com$3^n",
 | 
			
		||||
             .string         = "http://www.alliedmods.net http://www.alliedmods.net/ http://www.alliedmods.net/test.php http://www.alliedmods.net/index.php?secret=x Something interesting at http://www.alliedmods.net.",
 | 
			
		||||
             .expectedString = "https://site.com^n",
 | 
			
		||||
             .regexFlags     = PCRE_CASELESS | PCRE_EXTENDED,
 | 
			
		||||
             .formatFlags    = REGEX_FORMAT_NOCOPY | REGEX_FORMAT_FIRSTONLY
 | 
			
		||||
         );
 | 
			
		||||
 | 
			
		||||
        test(.regex          = "^^(.++)\r?\n(?=(?:^^(?!\1$).*+\r?\n)*+\1$)",
 | 
			
		||||
             .replace        = "",
 | 
			
		||||
             .string         = "one^n\
 | 
			
		||||
                                two^n\
 | 
			
		||||
                                three^n\
 | 
			
		||||
                                four^n\
 | 
			
		||||
                                two^n\
 | 
			
		||||
                                three^n\
 | 
			
		||||
                                four^n\
 | 
			
		||||
                                three^n\
 | 
			
		||||
                                four^n\
 | 
			
		||||
                                four",
 | 
			
		||||
             .expectedString = "one^n\
 | 
			
		||||
                                two^n\
 | 
			
		||||
                                three^n\
 | 
			
		||||
                                four",
 | 
			
		||||
             .regexFlags     = PCRE_EXTENDED | PCRE_MULTILINE
 | 
			
		||||
         );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    end();
 | 
			
		||||
}
 | 
			
		||||
@@ -225,6 +225,7 @@ scripting_files = [
 | 
			
		||||
  'testsuite/menutest.sma',
 | 
			
		||||
  'testsuite/native_test.sma',
 | 
			
		||||
  'testsuite/nvault_test.sma',
 | 
			
		||||
  'testsuite/regex_test.sma',
 | 
			
		||||
  'testsuite/sorttest.sma',
 | 
			
		||||
  'testsuite/strbreak.sma',
 | 
			
		||||
  'testsuite/sqlxtest.sma',
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										313
									
								
								tools/pcre/132html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										313
									
								
								tools/pcre/132html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,313 @@
 | 
			
		||||
#! /usr/bin/perl -w
 | 
			
		||||
 | 
			
		||||
# Script to turn PCRE man pages into HTML
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Subroutine to handle font changes and other escapes
 | 
			
		||||
 | 
			
		||||
sub do_line {
 | 
			
		||||
my($s) = $_[0];
 | 
			
		||||
 | 
			
		||||
$s =~ s/</</g;                   # Deal with < and >
 | 
			
		||||
$s =~ s/>/>/g;
 | 
			
		||||
$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
 | 
			
		||||
$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
 | 
			
		||||
$s =~ s"\\e"\\"g;
 | 
			
		||||
$s =~ s/(?<=Copyright )\(c\)/©/g;
 | 
			
		||||
$s;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Subroutine to ensure not in a paragraph
 | 
			
		||||
 | 
			
		||||
sub end_para {
 | 
			
		||||
if ($inpara)
 | 
			
		||||
  {
 | 
			
		||||
  print TEMP "</PRE>\n" if ($inpre);
 | 
			
		||||
  print TEMP "</P>\n";
 | 
			
		||||
  }
 | 
			
		||||
$inpara = $inpre = 0;
 | 
			
		||||
$wrotetext = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Subroutine to start a new paragraph
 | 
			
		||||
 | 
			
		||||
sub new_para {
 | 
			
		||||
&end_para();
 | 
			
		||||
print TEMP "<P>\n";
 | 
			
		||||
$inpara = 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Main program
 | 
			
		||||
 | 
			
		||||
$innf = 0;
 | 
			
		||||
$inpara = 0;
 | 
			
		||||
$inpre = 0;
 | 
			
		||||
$wrotetext = 0;
 | 
			
		||||
$toc = 0;
 | 
			
		||||
$ref = 1;
 | 
			
		||||
 | 
			
		||||
while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
 | 
			
		||||
  {
 | 
			
		||||
  $toc = 1 if $ARGV[0] eq "-toc";
 | 
			
		||||
  shift;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
# Initial output to STDOUT
 | 
			
		||||
 | 
			
		||||
print <<End ;
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>$ARGV[0] specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>$ARGV[0] man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
End
 | 
			
		||||
 | 
			
		||||
print "<ul>\n" if ($toc);
 | 
			
		||||
 | 
			
		||||
open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
 | 
			
		||||
 | 
			
		||||
while (<STDIN>)
 | 
			
		||||
  {
 | 
			
		||||
  # Handle lines beginning with a dot
 | 
			
		||||
 | 
			
		||||
  if (/^\./)
 | 
			
		||||
    {
 | 
			
		||||
    # Some of the PCRE man pages used to contain instances of .br. However,
 | 
			
		||||
    # they should have all been removed because they cause trouble in some
 | 
			
		||||
    # (other) automated systems that translate man pages to HTML. Complain if
 | 
			
		||||
    # we find .br or .in (another macro that is deprecated).
 | 
			
		||||
 | 
			
		||||
    if (/^\.br/ || /^\.in/)
 | 
			
		||||
      {
 | 
			
		||||
      print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
 | 
			
		||||
      print STDERR "*** $_\n";
 | 
			
		||||
      die "*** Processing abandoned\n";
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
 | 
			
		||||
 | 
			
		||||
    elsif (/^\.nf/)
 | 
			
		||||
      {
 | 
			
		||||
      $innf = 1;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    elsif (/^\.fi/)
 | 
			
		||||
      {
 | 
			
		||||
      $innf = 0;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    # Handling .sp is subtle. If it is inside a literal section, do nothing if
 | 
			
		||||
    # the next line is a non literal text line; similarly, if not inside a
 | 
			
		||||
    # literal section, do nothing if a literal follows, unless we are inside
 | 
			
		||||
    # a .nf/.ne section. The point being that the <pre> and </pre> that delimit
 | 
			
		||||
    # literal sections will do the spacing. Always skip if no previous output.
 | 
			
		||||
 | 
			
		||||
    elsif (/^\.sp/)
 | 
			
		||||
      {
 | 
			
		||||
      if ($wrotetext)
 | 
			
		||||
        {
 | 
			
		||||
        $_ = <STDIN>;
 | 
			
		||||
        if ($inpre)
 | 
			
		||||
          {
 | 
			
		||||
          print TEMP "\n" if (/^[\s.]/);
 | 
			
		||||
          }
 | 
			
		||||
        else
 | 
			
		||||
          {
 | 
			
		||||
          print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
 | 
			
		||||
          }
 | 
			
		||||
        redo;    # Now process the lookahead line we just read
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
 | 
			
		||||
      {
 | 
			
		||||
      &new_para();
 | 
			
		||||
      }
 | 
			
		||||
    elsif (/^\.SH\s*("?)(.*)\1/)
 | 
			
		||||
      {
 | 
			
		||||
      # Ignore the NAME section
 | 
			
		||||
      if ($2 =~ /^NAME\b/)
 | 
			
		||||
        {
 | 
			
		||||
        <STDIN>;
 | 
			
		||||
        next;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      &end_para();
 | 
			
		||||
      my($title) = &do_line($2);
 | 
			
		||||
      if ($toc)
 | 
			
		||||
        {
 | 
			
		||||
        printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
 | 
			
		||||
          $ref, $ref);
 | 
			
		||||
        printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
 | 
			
		||||
          $ref, $ref);
 | 
			
		||||
        $ref++;
 | 
			
		||||
        }
 | 
			
		||||
      else
 | 
			
		||||
        {
 | 
			
		||||
        print TEMP "<br><b>\n$title\n</b><br>\n";
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    elsif (/^\.SS\s*("?)(.*)\1/)
 | 
			
		||||
      {
 | 
			
		||||
      &end_para();
 | 
			
		||||
      my($title) = &do_line($2);
 | 
			
		||||
      print TEMP "<br><b>\n$title\n</b><br>\n";
 | 
			
		||||
      }
 | 
			
		||||
    elsif (/^\.B\s*(.*)/)
 | 
			
		||||
      {
 | 
			
		||||
      &new_para() if (!$inpara);
 | 
			
		||||
      $_ = &do_line($1);
 | 
			
		||||
      s/"(.*?)"/$1/g;
 | 
			
		||||
      print TEMP "<b>$_</b>\n";
 | 
			
		||||
      $wrotetext = 1;
 | 
			
		||||
      }
 | 
			
		||||
    elsif (/^\.I\s*(.*)/)
 | 
			
		||||
      {
 | 
			
		||||
      &new_para() if (!$inpara);
 | 
			
		||||
      $_ = &do_line($1);
 | 
			
		||||
      s/"(.*?)"/$1/g;
 | 
			
		||||
      print TEMP "<i>$_</i>\n";
 | 
			
		||||
      $wrotetext = 1;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    # A comment that starts "HREF" takes the next line as a name that
 | 
			
		||||
    # is turned into a hyperlink, using the text given, which might be
 | 
			
		||||
    # in a special font. If it ends in () or (digits) or punctuation, they
 | 
			
		||||
    # aren't part of the link.
 | 
			
		||||
 | 
			
		||||
    elsif (/^\.\\"\s*HREF/)
 | 
			
		||||
      {
 | 
			
		||||
      $_=<STDIN>;
 | 
			
		||||
      chomp;
 | 
			
		||||
      $_ = &do_line($_);
 | 
			
		||||
      $_ =~ s/\s+$//;
 | 
			
		||||
      $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
 | 
			
		||||
      print TEMP "<a href=\"$1.html\">$_</a>\n";
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    # A comment that starts "HTML" inserts literal HTML
 | 
			
		||||
 | 
			
		||||
    elsif (/^\.\\"\s*HTML\s*(.*)/)
 | 
			
		||||
      {
 | 
			
		||||
      print TEMP $1;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    # A comment that starts < inserts that HTML at the end of the
 | 
			
		||||
    # *next* input line - so as not to get a newline between them.
 | 
			
		||||
 | 
			
		||||
    elsif (/^\.\\"\s*(<.*>)/)
 | 
			
		||||
      {
 | 
			
		||||
      my($markup) = $1;
 | 
			
		||||
      $_=<STDIN>;
 | 
			
		||||
      chomp;
 | 
			
		||||
      $_ = &do_line($_);
 | 
			
		||||
      $_ =~ s/\s+$//;
 | 
			
		||||
      print TEMP "$_$markup\n";
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    # A comment that starts JOIN joins the next two lines together, with one
 | 
			
		||||
    # space between them. Then that line is processed. This is used in some
 | 
			
		||||
    # displays where two lines are needed for the "man" version. JOINSH works
 | 
			
		||||
    # the same, except that it assumes this is a shell command, so removes
 | 
			
		||||
    # continuation backslashes.
 | 
			
		||||
 | 
			
		||||
    elsif (/^\.\\"\s*JOIN(SH)?/)
 | 
			
		||||
      {
 | 
			
		||||
      my($one,$two);
 | 
			
		||||
      $one = <STDIN>;
 | 
			
		||||
      $two = <STDIN>;
 | 
			
		||||
      $one =~ s/\s*\\e\s*$// if (defined($1));
 | 
			
		||||
      chomp($one);
 | 
			
		||||
      $two =~ s/^\s+//;
 | 
			
		||||
      $_ = "$one $two";
 | 
			
		||||
      redo;            # Process the joined lines
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    # .EX/.EE are used in the pcredemo page to bracket the entire program,
 | 
			
		||||
    # which is unmodified except for turning backslash into "\e".
 | 
			
		||||
 | 
			
		||||
    elsif (/^\.EX\s*$/)
 | 
			
		||||
      {
 | 
			
		||||
      print TEMP "<PRE>\n";
 | 
			
		||||
      while (<STDIN>)
 | 
			
		||||
        {
 | 
			
		||||
        last if /^\.EE\s*$/;
 | 
			
		||||
        s/\\e/\\/g;
 | 
			
		||||
        s/&/&/g;
 | 
			
		||||
        s/</</g;
 | 
			
		||||
        s/>/>/g;
 | 
			
		||||
        print TEMP;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    # Ignore anything not recognized
 | 
			
		||||
 | 
			
		||||
    next;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  # Line does not begin with a dot. Replace blank lines with new paragraphs
 | 
			
		||||
 | 
			
		||||
  if (/^\s*$/)
 | 
			
		||||
    {
 | 
			
		||||
    &end_para() if ($wrotetext);
 | 
			
		||||
    next;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  # Convert fonts changes and output an ordinary line. Ensure that indented
 | 
			
		||||
  # lines are marked as literal.
 | 
			
		||||
 | 
			
		||||
  $_ = &do_line($_);
 | 
			
		||||
  &new_para() if (!$inpara);
 | 
			
		||||
 | 
			
		||||
  if (/^\s/)
 | 
			
		||||
    {
 | 
			
		||||
    if (!$inpre)
 | 
			
		||||
      {
 | 
			
		||||
      print TEMP "<pre>\n";
 | 
			
		||||
      $inpre = 1;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  elsif ($inpre)
 | 
			
		||||
    {
 | 
			
		||||
    print TEMP "</pre>\n";
 | 
			
		||||
    $inpre = 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  # Add <br> to the end of a non-literal line if we are within .nf/.fi
 | 
			
		||||
 | 
			
		||||
  $_ .= "<br>\n" if (!$inpre && $innf);
 | 
			
		||||
 | 
			
		||||
  print TEMP;
 | 
			
		||||
  $wrotetext = 1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
# The TOC, if present, will have been written - terminate it
 | 
			
		||||
 | 
			
		||||
print "</ul>\n" if ($toc);
 | 
			
		||||
 | 
			
		||||
# Copy the remainder to the standard output
 | 
			
		||||
 | 
			
		||||
close(TEMP);
 | 
			
		||||
open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
 | 
			
		||||
 | 
			
		||||
print while (<TEMP>);
 | 
			
		||||
 | 
			
		||||
print <<End ;
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
End
 | 
			
		||||
 | 
			
		||||
close(TEMP);
 | 
			
		||||
unlink("/tmp/$$");
 | 
			
		||||
 | 
			
		||||
# End
 | 
			
		||||
							
								
								
									
										45
									
								
								tools/pcre/AUTHORS
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								tools/pcre/AUTHORS
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,45 @@
 | 
			
		||||
THE MAIN PCRE LIBRARY
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
Written by:       Philip Hazel
 | 
			
		||||
Email local part: ph10
 | 
			
		||||
Email domain:     cam.ac.uk
 | 
			
		||||
 | 
			
		||||
University of Cambridge Computing Service,
 | 
			
		||||
Cambridge, England.
 | 
			
		||||
 | 
			
		||||
Copyright (c) 1997-2014 University of Cambridge
 | 
			
		||||
All rights reserved
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
PCRE JUST-IN-TIME COMPILATION SUPPORT
 | 
			
		||||
-------------------------------------
 | 
			
		||||
 | 
			
		||||
Written by:       Zoltan Herczeg
 | 
			
		||||
Email local part: hzmester
 | 
			
		||||
Emain domain:     freemail.hu
 | 
			
		||||
 | 
			
		||||
Copyright(c) 2010-2014 Zoltan Herczeg
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
STACK-LESS JUST-IN-TIME COMPILER
 | 
			
		||||
--------------------------------
 | 
			
		||||
 | 
			
		||||
Written by:       Zoltan Herczeg
 | 
			
		||||
Email local part: hzmester
 | 
			
		||||
Emain domain:     freemail.hu
 | 
			
		||||
 | 
			
		||||
Copyright(c) 2009-2014 Zoltan Herczeg
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
THE C++ WRAPPER LIBRARY
 | 
			
		||||
-----------------------
 | 
			
		||||
 | 
			
		||||
Written by:       Google Inc.
 | 
			
		||||
 | 
			
		||||
Copyright (c) 2007-2012 Google Inc
 | 
			
		||||
All rights reserved
 | 
			
		||||
 | 
			
		||||
####
 | 
			
		||||
							
								
								
									
										991
									
								
								tools/pcre/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										991
									
								
								tools/pcre/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,991 @@
 | 
			
		||||
# CMakeLists.txt
 | 
			
		||||
#
 | 
			
		||||
#
 | 
			
		||||
# This file allows building PCRE with the CMake configuration and build
 | 
			
		||||
# tool. Download CMake in source or binary form from http://www.cmake.org/
 | 
			
		||||
#
 | 
			
		||||
# Original listfile by Christian Ehrlicher <Ch.Ehrlicher@gmx.de>
 | 
			
		||||
# Refined and expanded by Daniel Richard G. <skunk@iSKUNK.ORG>
 | 
			
		||||
# 2007-09-14 mod by Sheri so 7.4 supported configuration options can be entered
 | 
			
		||||
# 2007-09-19 Adjusted by PH to retain previous default settings
 | 
			
		||||
# 2007-12-26 (a) On UNIX, use names libpcre instead of just pcre
 | 
			
		||||
#            (b) Ensure pcretest and pcregrep link with the local library,
 | 
			
		||||
#                not a previously-installed one.
 | 
			
		||||
#            (c) Add PCRE_SUPPORT_LIBREADLINE, PCRE_SUPPORT_LIBZ, and
 | 
			
		||||
#                PCRE_SUPPORT_LIBBZ2.
 | 
			
		||||
# 2008-01-20 Brought up to date to include several new features by Christian
 | 
			
		||||
#            Ehrlicher.
 | 
			
		||||
# 2008-01-22 Sheri added options for backward compatibility of library names
 | 
			
		||||
#            when building with minGW:
 | 
			
		||||
#            if "ON", NON_STANDARD_LIB_PREFIX causes shared libraries to
 | 
			
		||||
#            be built without "lib" as prefix. (The libraries will be named
 | 
			
		||||
#            pcre.dll, pcreposix.dll and pcrecpp.dll).
 | 
			
		||||
#            if "ON", NON_STANDARD_LIB_SUFFIX causes shared libraries to
 | 
			
		||||
#            be built with suffix of "-0.dll". (The libraries will be named
 | 
			
		||||
#            libpcre-0.dll, libpcreposix-0.dll and libpcrecpp-0.dll - same names
 | 
			
		||||
#            built by default with Configure and Make.
 | 
			
		||||
# 2008-01-23 PH removed the automatic build of pcredemo.
 | 
			
		||||
# 2008-04-22 PH modified READLINE support so it finds NCURSES when needed.
 | 
			
		||||
# 2008-07-03 PH updated for revised UCP property support (change of files)
 | 
			
		||||
# 2009-03-23 PH applied Steven Van Ingelgem's patch to change the name
 | 
			
		||||
#            CMAKE_BINARY_DIR to PROJECT_BINARY_DIR so that it works when PCRE
 | 
			
		||||
#            is included within another project.
 | 
			
		||||
# 2009-03-23 PH applied a modified version of Steven Van Ingelgem's patches to
 | 
			
		||||
#            add options to stop the building of pcregrep and the tests, and
 | 
			
		||||
#            to disable the final configuration report.
 | 
			
		||||
# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that
 | 
			
		||||
#            are set by specifying a release type.
 | 
			
		||||
# 2010-01-02 PH added test for stdint.h
 | 
			
		||||
# 2010-03-02 PH added test for inttypes.h
 | 
			
		||||
# 2011-08-01 PH added PCREGREP_BUFSIZE
 | 
			
		||||
# 2011-08-22 PH added PCRE_SUPPORT_JIT
 | 
			
		||||
# 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov
 | 
			
		||||
# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT
 | 
			
		||||
# 2011-10-04 Sheri added support for including coff data in windows shared libraries
 | 
			
		||||
#            compiled with MINGW if pcre.rc and/or pcreposix.rc are placed in
 | 
			
		||||
#            the source dir by the user prior to building
 | 
			
		||||
# 2011-10-04 Sheri changed various add_test's to use exes' location built instead
 | 
			
		||||
#            of DEBUG location only (likely only matters in MSVC)
 | 
			
		||||
# 2011-10-04 Sheri added scripts to provide needed variables to RunTest and
 | 
			
		||||
#            RunGrepTest (used for UNIX and Msys)
 | 
			
		||||
# 2011-10-04 Sheri added scripts to provide needed variables and to execute
 | 
			
		||||
#            RunTest.bat in Win32 (for effortless testing with "make test")
 | 
			
		||||
# 2011-10-04 Sheri Increased minimum required cmake version
 | 
			
		||||
# 2012-01-06 PH removed pcre_info.c and added pcre_string_utils.c
 | 
			
		||||
# 2012-01-10 Zoltan Herczeg added libpcre16 support
 | 
			
		||||
# 2012-01-13 Stephen Kelly added out of source build support
 | 
			
		||||
# 2012-01-17 PH applied Stephen Kelly's patch to parse the version data out
 | 
			
		||||
#            of the configure.ac file
 | 
			
		||||
# 2012-02-26 PH added support for libedit
 | 
			
		||||
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
 | 
			
		||||
# 2012-09-08 ChPe added PCRE32 support
 | 
			
		||||
# 2012-10-23 PH added support for VALGRIND and GCOV
 | 
			
		||||
# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings
 | 
			
		||||
# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and
 | 
			
		||||
#            so it has been removed.
 | 
			
		||||
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
 | 
			
		||||
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
 | 
			
		||||
 | 
			
		||||
PROJECT(PCRE C CXX)
 | 
			
		||||
 | 
			
		||||
# Increased minimum to 2.8.0 to support newer add_test features
 | 
			
		||||
 | 
			
		||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
 | 
			
		||||
 | 
			
		||||
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
 | 
			
		||||
 | 
			
		||||
# external packages
 | 
			
		||||
FIND_PACKAGE( BZip2 )
 | 
			
		||||
FIND_PACKAGE( ZLIB )
 | 
			
		||||
FIND_PACKAGE( Readline )
 | 
			
		||||
FIND_PACKAGE( Editline )
 | 
			
		||||
 | 
			
		||||
# Configuration checks
 | 
			
		||||
 | 
			
		||||
INCLUDE(CheckIncludeFile)
 | 
			
		||||
INCLUDE(CheckIncludeFileCXX)
 | 
			
		||||
INCLUDE(CheckFunctionExists)
 | 
			
		||||
INCLUDE(CheckTypeSize)
 | 
			
		||||
 | 
			
		||||
CHECK_INCLUDE_FILE(dirent.h     HAVE_DIRENT_H)
 | 
			
		||||
CHECK_INCLUDE_FILE(stdint.h     HAVE_STDINT_H)
 | 
			
		||||
CHECK_INCLUDE_FILE(inttypes.h   HAVE_INTTYPES_H)
 | 
			
		||||
CHECK_INCLUDE_FILE(sys/stat.h   HAVE_SYS_STAT_H)
 | 
			
		||||
CHECK_INCLUDE_FILE(sys/types.h  HAVE_SYS_TYPES_H)
 | 
			
		||||
CHECK_INCLUDE_FILE(unistd.h     HAVE_UNISTD_H)
 | 
			
		||||
CHECK_INCLUDE_FILE(windows.h    HAVE_WINDOWS_H)
 | 
			
		||||
 | 
			
		||||
CHECK_INCLUDE_FILE_CXX(type_traits.h            HAVE_TYPE_TRAITS_H)
 | 
			
		||||
CHECK_INCLUDE_FILE_CXX(bits/type_traits.h       HAVE_BITS_TYPE_TRAITS_H)
 | 
			
		||||
 | 
			
		||||
CHECK_FUNCTION_EXISTS(bcopy     HAVE_BCOPY)
 | 
			
		||||
CHECK_FUNCTION_EXISTS(memmove   HAVE_MEMMOVE)
 | 
			
		||||
CHECK_FUNCTION_EXISTS(strerror  HAVE_STRERROR)
 | 
			
		||||
CHECK_FUNCTION_EXISTS(strtoll   HAVE_STRTOLL)
 | 
			
		||||
CHECK_FUNCTION_EXISTS(strtoq    HAVE_STRTOQ)
 | 
			
		||||
CHECK_FUNCTION_EXISTS(_strtoi64 HAVE__STRTOI64)
 | 
			
		||||
 | 
			
		||||
CHECK_TYPE_SIZE("long long"             LONG_LONG)
 | 
			
		||||
CHECK_TYPE_SIZE("unsigned long long"    UNSIGNED_LONG_LONG)
 | 
			
		||||
 | 
			
		||||
# User-configurable options
 | 
			
		||||
#
 | 
			
		||||
# (Note: CMakeSetup displays these in alphabetical order, regardless of
 | 
			
		||||
# the order we use here)
 | 
			
		||||
 | 
			
		||||
SET(BUILD_SHARED_LIBS OFF CACHE BOOL
 | 
			
		||||
    "Build shared libraries instead of static ones.")
 | 
			
		||||
 | 
			
		||||
OPTION(PCRE_BUILD_PCRE8 "Build 8 bit PCRE library" ON)
 | 
			
		||||
 | 
			
		||||
OPTION(PCRE_BUILD_PCRE16 "Build 16 bit PCRE library" OFF)
 | 
			
		||||
 | 
			
		||||
OPTION(PCRE_BUILD_PCRE32 "Build 32 bit PCRE library" OFF)
 | 
			
		||||
 | 
			
		||||
OPTION(PCRE_BUILD_PCRECPP "Build the PCRE C++ library (pcrecpp)." ON)
 | 
			
		||||
 | 
			
		||||
SET(PCRE_EBCDIC OFF CACHE BOOL
 | 
			
		||||
    "Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_EBCDIC_NL25 OFF CACHE BOOL
 | 
			
		||||
    "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_LINK_SIZE "2" CACHE STRING
 | 
			
		||||
    "Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_PARENS_NEST_LIMIT "250" CACHE STRING
 | 
			
		||||
    "Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_MATCH_LIMIT "10000000" CACHE STRING
 | 
			
		||||
    "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_MATCH_LIMIT_RECURSION "MATCH_LIMIT" CACHE STRING
 | 
			
		||||
    "Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.")
 | 
			
		||||
 | 
			
		||||
SET(PCREGREP_BUFSIZE "20480" CACHE STRING
 | 
			
		||||
    "Buffer size parameter for pcregrep. See PCREGREP_BUFSIZE in config.h.in for details.")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_NEWLINE "LF" CACHE STRING
 | 
			
		||||
    "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_NO_RECURSE OFF CACHE BOOL
 | 
			
		||||
    "If ON, then don't use stack recursion when matching. See NO_RECURSE in config.h.in for details.")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_POSIX_MALLOC_THRESHOLD "10" CACHE STRING
 | 
			
		||||
    "Threshold for malloc() usage. See POSIX_MALLOC_THRESHOLD in config.h.in for details.")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_SUPPORT_JIT OFF CACHE BOOL
 | 
			
		||||
    "Enable support for Just-in-time compiling.")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_SUPPORT_PCREGREP_JIT ON CACHE BOOL
 | 
			
		||||
    "Enable use of Just-in-time compiling in pcregrep.")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_SUPPORT_UTF OFF CACHE BOOL
 | 
			
		||||
    "Enable support for Unicode Transformation Format (UTF-8/UTF-16/UTF-32) encoding.")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_SUPPORT_UNICODE_PROPERTIES OFF CACHE BOOL
 | 
			
		||||
    "Enable support for Unicode properties (if set, UTF support will be enabled as well).")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
 | 
			
		||||
    "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
 | 
			
		||||
 | 
			
		||||
SET(PCRE_SUPPORT_VALGRIND OFF CACHE BOOL
 | 
			
		||||
    "Enable Valgrind support.")
 | 
			
		||||
 | 
			
		||||
OPTION(PCRE_SHOW_REPORT    "Show the final configuration report" ON)
 | 
			
		||||
OPTION(PCRE_BUILD_PCREGREP "Build pcregrep" ON)
 | 
			
		||||
OPTION(PCRE_BUILD_TESTS    "Build the tests" ON)
 | 
			
		||||
 | 
			
		||||
IF (MINGW)
 | 
			
		||||
  OPTION(NON_STANDARD_LIB_PREFIX
 | 
			
		||||
         "ON=Shared libraries built in mingw will be named pcre.dll, etc., instead of libpcre.dll, etc."
 | 
			
		||||
         OFF)
 | 
			
		||||
 | 
			
		||||
  OPTION(NON_STANDARD_LIB_SUFFIX
 | 
			
		||||
         "ON=Shared libraries built in mingw will be named libpcre-0.dll, etc., instead of libpcre.dll, etc."
 | 
			
		||||
         OFF)
 | 
			
		||||
ENDIF(MINGW)
 | 
			
		||||
 | 
			
		||||
IF(MSVC)
 | 
			
		||||
  OPTION(INSTALL_MSVC_PDB
 | 
			
		||||
         "ON=Install .pdb files built by MSVC, if generated"
 | 
			
		||||
         OFF)
 | 
			
		||||
ENDIF(MSVC)
 | 
			
		||||
 | 
			
		||||
# bzip2 lib
 | 
			
		||||
IF(BZIP2_FOUND)
 | 
			
		||||
  OPTION (PCRE_SUPPORT_LIBBZ2 "Enable support for linking pcregrep with libbz2." ON)
 | 
			
		||||
ENDIF(BZIP2_FOUND)
 | 
			
		||||
IF(PCRE_SUPPORT_LIBBZ2)
 | 
			
		||||
  INCLUDE_DIRECTORIES(${BZIP2_INCLUDE_DIR})
 | 
			
		||||
ENDIF(PCRE_SUPPORT_LIBBZ2)
 | 
			
		||||
 | 
			
		||||
# zlib
 | 
			
		||||
IF(ZLIB_FOUND)
 | 
			
		||||
  OPTION (PCRE_SUPPORT_LIBZ "Enable support for linking pcregrep with libz." ON)
 | 
			
		||||
ENDIF(ZLIB_FOUND)
 | 
			
		||||
IF(PCRE_SUPPORT_LIBZ)
 | 
			
		||||
  INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
 | 
			
		||||
ENDIF(PCRE_SUPPORT_LIBZ)
 | 
			
		||||
 | 
			
		||||
# editline lib
 | 
			
		||||
IF(EDITLINE_FOUND)
 | 
			
		||||
  OPTION (PCRE_SUPPORT_LIBEDIT  "Enable support for linking pcretest with libedit." OFF)
 | 
			
		||||
ENDIF(EDITLINE_FOUND)
 | 
			
		||||
IF(PCRE_SUPPORT_LIBEDIT)
 | 
			
		||||
  INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
 | 
			
		||||
ENDIF(PCRE_SUPPORT_LIBEDIT)
 | 
			
		||||
 | 
			
		||||
# readline lib
 | 
			
		||||
IF(READLINE_FOUND)
 | 
			
		||||
  OPTION (PCRE_SUPPORT_LIBREADLINE  "Enable support for linking pcretest with libreadline." ON)
 | 
			
		||||
ENDIF(READLINE_FOUND)
 | 
			
		||||
IF(PCRE_SUPPORT_LIBREADLINE)
 | 
			
		||||
  INCLUDE_DIRECTORIES(${READLINE_INCLUDE_DIR})
 | 
			
		||||
ENDIF(PCRE_SUPPORT_LIBREADLINE)
 | 
			
		||||
 | 
			
		||||
# Prepare build configuration
 | 
			
		||||
 | 
			
		||||
SET(pcre_have_type_traits 0)
 | 
			
		||||
SET(pcre_have_bits_type_traits 0)
 | 
			
		||||
 | 
			
		||||
IF(HAVE_TYPE_TRAITS_H)
 | 
			
		||||
        SET(pcre_have_type_traits 1)
 | 
			
		||||
ENDIF(HAVE_TYPE_TRAITS_H)
 | 
			
		||||
 | 
			
		||||
IF(HAVE_BITS_TYPE_TRAITS_H)
 | 
			
		||||
        SET(pcre_have_bits_type_traits 1)
 | 
			
		||||
ENDIF(HAVE_BITS_TYPE_TRAITS_H)
 | 
			
		||||
 | 
			
		||||
SET(pcre_have_long_long 0)
 | 
			
		||||
SET(pcre_have_ulong_long 0)
 | 
			
		||||
 | 
			
		||||
IF(HAVE_LONG_LONG)
 | 
			
		||||
        SET(pcre_have_long_long 1)
 | 
			
		||||
ENDIF(HAVE_LONG_LONG)
 | 
			
		||||
 | 
			
		||||
IF(HAVE_UNSIGNED_LONG_LONG)
 | 
			
		||||
        SET(pcre_have_ulong_long 1)
 | 
			
		||||
ENDIF(HAVE_UNSIGNED_LONG_LONG)
 | 
			
		||||
 | 
			
		||||
IF(NOT BUILD_SHARED_LIBS)
 | 
			
		||||
        SET(PCRE_STATIC 1)
 | 
			
		||||
ENDIF(NOT BUILD_SHARED_LIBS)
 | 
			
		||||
 | 
			
		||||
IF(NOT PCRE_BUILD_PCRE8 AND NOT PCRE_BUILD_PCRE16 AND NOT PCRE_BUILD_PCRE32)
 | 
			
		||||
        MESSAGE(FATAL_ERROR "At least one of PCRE_BUILD_PCRE8, PCRE_BUILD_PCRE16 or PCRE_BUILD_PCRE32 must be enabled")
 | 
			
		||||
ENDIF(NOT PCRE_BUILD_PCRE8 AND NOT PCRE_BUILD_PCRE16 AND NOT PCRE_BUILD_PCRE32)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_BUILD_PCRE8)
 | 
			
		||||
        SET(SUPPORT_PCRE8 1)
 | 
			
		||||
ENDIF(PCRE_BUILD_PCRE8)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_BUILD_PCRE16)
 | 
			
		||||
        SET(SUPPORT_PCRE16 1)
 | 
			
		||||
ENDIF(PCRE_BUILD_PCRE16)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_BUILD_PCRE32)
 | 
			
		||||
        SET(SUPPORT_PCRE32 1)
 | 
			
		||||
ENDIF(PCRE_BUILD_PCRE32)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_BUILD_PCRECPP AND NOT PCRE_BUILD_PCRE8)
 | 
			
		||||
        MESSAGE(STATUS "** PCRE_BUILD_PCRE8 must be enabled for the C++ library support")
 | 
			
		||||
        SET(PCRE_BUILD_PCRECPP OFF)
 | 
			
		||||
ENDIF(PCRE_BUILD_PCRECPP AND NOT PCRE_BUILD_PCRE8)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_BUILD_PCREGREP AND NOT PCRE_BUILD_PCRE8)
 | 
			
		||||
        MESSAGE(STATUS "** PCRE_BUILD_PCRE8 must be enabled for the pcregrep program")
 | 
			
		||||
        SET(PCRE_BUILD_PCREGREP OFF)
 | 
			
		||||
ENDIF(PCRE_BUILD_PCREGREP AND NOT PCRE_BUILD_PCRE8)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_SUPPORT_LIBREADLINE AND PCRE_SUPPORT_LIBEDIT)
 | 
			
		||||
        MESSAGE(FATAL_ERROR "Only one of libreadline or libeditline can be specified")
 | 
			
		||||
ENDIF(PCRE_SUPPORT_LIBREADLINE AND PCRE_SUPPORT_LIBEDIT)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_SUPPORT_BSR_ANYCRLF)
 | 
			
		||||
        SET(BSR_ANYCRLF 1)
 | 
			
		||||
ENDIF(PCRE_SUPPORT_BSR_ANYCRLF)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_SUPPORT_UTF OR PCRE_SUPPORT_UNICODE_PROPERTIES)
 | 
			
		||||
        SET(SUPPORT_UTF 1)
 | 
			
		||||
        SET(PCRE_SUPPORT_UTF ON)
 | 
			
		||||
ENDIF(PCRE_SUPPORT_UTF OR PCRE_SUPPORT_UNICODE_PROPERTIES)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_SUPPORT_UNICODE_PROPERTIES)
 | 
			
		||||
        SET(SUPPORT_UCP 1)
 | 
			
		||||
ENDIF(PCRE_SUPPORT_UNICODE_PROPERTIES)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_SUPPORT_JIT)
 | 
			
		||||
        SET(SUPPORT_JIT 1)
 | 
			
		||||
ENDIF(PCRE_SUPPORT_JIT)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_SUPPORT_PCREGREP_JIT)
 | 
			
		||||
        SET(SUPPORT_PCREGREP_JIT 1)
 | 
			
		||||
ENDIF(PCRE_SUPPORT_PCREGREP_JIT)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_SUPPORT_VALGRIND)
 | 
			
		||||
        SET(SUPPORT_VALGRIND 1)
 | 
			
		||||
ENDIF(PCRE_SUPPORT_VALGRIND)
 | 
			
		||||
 | 
			
		||||
# This next one used to contain
 | 
			
		||||
#       SET(PCRETEST_LIBS ${READLINE_LIBRARY})
 | 
			
		||||
# but I was advised to add the NCURSES test as well, along with
 | 
			
		||||
# some modifications to cmake/FindReadline.cmake which should
 | 
			
		||||
# make it possible to override the default if necessary. PH
 | 
			
		||||
 | 
			
		||||
IF(PCRE_SUPPORT_LIBREADLINE)
 | 
			
		||||
        SET(SUPPORT_LIBREADLINE 1)
 | 
			
		||||
        SET(PCRETEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY})
 | 
			
		||||
ENDIF(PCRE_SUPPORT_LIBREADLINE)
 | 
			
		||||
 | 
			
		||||
# libedit is a plug-compatible alternative to libreadline
 | 
			
		||||
 | 
			
		||||
IF(PCRE_SUPPORT_LIBEDIT)
 | 
			
		||||
        SET(SUPPORT_LIBEDIT 1)
 | 
			
		||||
        SET(PCRETEST_LIBS ${EDITLINE_LIBRARY} ${NCURSES_LIBRARY})
 | 
			
		||||
ENDIF(PCRE_SUPPORT_LIBEDIT)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_SUPPORT_LIBZ)
 | 
			
		||||
        SET(SUPPORT_LIBZ 1)
 | 
			
		||||
        SET(PCREGREP_LIBS ${PCREGREP_LIBS} ${ZLIB_LIBRARIES})
 | 
			
		||||
ENDIF(PCRE_SUPPORT_LIBZ)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_SUPPORT_LIBBZ2)
 | 
			
		||||
        SET(SUPPORT_LIBBZ2 1)
 | 
			
		||||
        SET(PCREGREP_LIBS ${PCREGREP_LIBS} ${BZIP2_LIBRARIES})
 | 
			
		||||
ENDIF(PCRE_SUPPORT_LIBBZ2)
 | 
			
		||||
 | 
			
		||||
SET(NEWLINE "")
 | 
			
		||||
 | 
			
		||||
IF(PCRE_NEWLINE STREQUAL "LF")
 | 
			
		||||
        SET(NEWLINE "10")
 | 
			
		||||
ENDIF(PCRE_NEWLINE STREQUAL "LF")
 | 
			
		||||
IF(PCRE_NEWLINE STREQUAL "CR")
 | 
			
		||||
        SET(NEWLINE "13")
 | 
			
		||||
ENDIF(PCRE_NEWLINE STREQUAL "CR")
 | 
			
		||||
IF(PCRE_NEWLINE STREQUAL "CRLF")
 | 
			
		||||
        SET(NEWLINE "3338")
 | 
			
		||||
ENDIF(PCRE_NEWLINE STREQUAL "CRLF")
 | 
			
		||||
IF(PCRE_NEWLINE STREQUAL "ANY")
 | 
			
		||||
        SET(NEWLINE "-1")
 | 
			
		||||
ENDIF(PCRE_NEWLINE STREQUAL "ANY")
 | 
			
		||||
IF(PCRE_NEWLINE STREQUAL "ANYCRLF")
 | 
			
		||||
        SET(NEWLINE "-2")
 | 
			
		||||
ENDIF(PCRE_NEWLINE STREQUAL "ANYCRLF")
 | 
			
		||||
 | 
			
		||||
IF(NEWLINE STREQUAL "")
 | 
			
		||||
        MESSAGE(FATAL_ERROR "The PCRE_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\".")
 | 
			
		||||
ENDIF(NEWLINE STREQUAL "")
 | 
			
		||||
 | 
			
		||||
IF(PCRE_EBCDIC)
 | 
			
		||||
        SET(EBCDIC 1)
 | 
			
		||||
IF(PCRE_NEWLINE STREQUAL "LF")
 | 
			
		||||
        SET(NEWLINE "21")
 | 
			
		||||
ENDIF(PCRE_NEWLINE STREQUAL "LF")
 | 
			
		||||
IF(PCRE_NEWLINE STREQUAL "CRLF")
 | 
			
		||||
        SET(NEWLINE "3349")
 | 
			
		||||
ENDIF(PCRE_NEWLINE STREQUAL "CRLF")
 | 
			
		||||
ENDIF(PCRE_EBCDIC)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_EBCDIC_NL25)
 | 
			
		||||
        SET(EBCDIC 1)
 | 
			
		||||
        SET(EBCDIC_NL25 1)
 | 
			
		||||
IF(PCRE_NEWLINE STREQUAL "LF")
 | 
			
		||||
        SET(NEWLINE "37")
 | 
			
		||||
ENDIF(PCRE_NEWLINE STREQUAL "LF")
 | 
			
		||||
IF(PCRE_NEWLINE STREQUAL "CRLF")
 | 
			
		||||
        SET(NEWLINE "3365")
 | 
			
		||||
ENDIF(PCRE_NEWLINE STREQUAL "CRLF")
 | 
			
		||||
ENDIF(PCRE_EBCDIC_NL25)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_NO_RECURSE)
 | 
			
		||||
        SET(NO_RECURSE 1)
 | 
			
		||||
ENDIF(PCRE_NO_RECURSE)
 | 
			
		||||
 | 
			
		||||
# Output files
 | 
			
		||||
CONFIGURE_FILE(config-cmake.h.in
 | 
			
		||||
               ${PROJECT_BINARY_DIR}/config.h
 | 
			
		||||
               @ONLY)
 | 
			
		||||
 | 
			
		||||
# Parse version numbers and date out of configure.ac
 | 
			
		||||
 | 
			
		||||
file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac
 | 
			
		||||
  configure_lines
 | 
			
		||||
  LIMIT_COUNT 50 # Read only the first 50 lines of the file
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
set(SEARCHED_VARIABLES "pcre_major" "pcre_minor" "pcre_prerelease" "pcre_date")
 | 
			
		||||
foreach(configure_line ${configure_lines})
 | 
			
		||||
    foreach(_substitution_variable ${SEARCHED_VARIABLES})
 | 
			
		||||
        string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
 | 
			
		||||
        if (NOT ${_substitution_variable_upper})
 | 
			
		||||
            string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MACTHED_STRING ${configure_line})
 | 
			
		||||
            if (CMAKE_MATCH_1)
 | 
			
		||||
                set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
 | 
			
		||||
            endif()
 | 
			
		||||
        endif()
 | 
			
		||||
    endforeach()
 | 
			
		||||
endforeach()
 | 
			
		||||
 | 
			
		||||
CONFIGURE_FILE(pcre.h.in
 | 
			
		||||
               ${PROJECT_BINARY_DIR}/pcre.h
 | 
			
		||||
               @ONLY)
 | 
			
		||||
 | 
			
		||||
# What about pcre-config and libpcre.pc?
 | 
			
		||||
 | 
			
		||||
IF(PCRE_BUILD_PCRECPP)
 | 
			
		||||
        CONFIGURE_FILE(pcre_stringpiece.h.in
 | 
			
		||||
                       ${PROJECT_BINARY_DIR}/pcre_stringpiece.h
 | 
			
		||||
                       @ONLY)
 | 
			
		||||
 | 
			
		||||
        CONFIGURE_FILE(pcrecpparg.h.in
 | 
			
		||||
                       ${PROJECT_BINARY_DIR}/pcrecpparg.h
 | 
			
		||||
                       @ONLY)
 | 
			
		||||
ENDIF(PCRE_BUILD_PCRECPP)
 | 
			
		||||
 | 
			
		||||
# Character table generation
 | 
			
		||||
 | 
			
		||||
OPTION(PCRE_REBUILD_CHARTABLES "Rebuild char tables" OFF)
 | 
			
		||||
IF(PCRE_REBUILD_CHARTABLES)
 | 
			
		||||
  ADD_EXECUTABLE(dftables dftables.c)
 | 
			
		||||
 | 
			
		||||
  GET_TARGET_PROPERTY(DFTABLES_EXE dftables LOCATION)
 | 
			
		||||
 | 
			
		||||
  ADD_CUSTOM_COMMAND(
 | 
			
		||||
    COMMENT "Generating character tables (pcre_chartables.c) for current locale"
 | 
			
		||||
    DEPENDS dftables
 | 
			
		||||
    COMMAND ${DFTABLES_EXE}
 | 
			
		||||
    ARGS        ${PROJECT_BINARY_DIR}/pcre_chartables.c
 | 
			
		||||
    OUTPUT      ${PROJECT_BINARY_DIR}/pcre_chartables.c
 | 
			
		||||
  )
 | 
			
		||||
ELSE(PCRE_REBUILD_CHARTABLES)
 | 
			
		||||
  CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/pcre_chartables.c.dist
 | 
			
		||||
                    ${PROJECT_BINARY_DIR}/pcre_chartables.c
 | 
			
		||||
                    COPYONLY)
 | 
			
		||||
ENDIF(PCRE_REBUILD_CHARTABLES)
 | 
			
		||||
 | 
			
		||||
# Source code
 | 
			
		||||
 | 
			
		||||
SET(PCRE_HEADERS ${PROJECT_BINARY_DIR}/pcre.h)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_BUILD_PCRE8)
 | 
			
		||||
SET(PCRE_SOURCES
 | 
			
		||||
  pcre_byte_order.c
 | 
			
		||||
  pcre_chartables.c
 | 
			
		||||
  pcre_compile.c
 | 
			
		||||
  pcre_config.c
 | 
			
		||||
  pcre_dfa_exec.c
 | 
			
		||||
  pcre_exec.c
 | 
			
		||||
  pcre_fullinfo.c
 | 
			
		||||
  pcre_get.c
 | 
			
		||||
  pcre_globals.c
 | 
			
		||||
  pcre_jit_compile.c
 | 
			
		||||
  pcre_maketables.c
 | 
			
		||||
  pcre_newline.c
 | 
			
		||||
  pcre_ord2utf8.c
 | 
			
		||||
  pcre_refcount.c
 | 
			
		||||
  pcre_string_utils.c
 | 
			
		||||
  pcre_study.c
 | 
			
		||||
  pcre_tables.c
 | 
			
		||||
  pcre_ucd.c
 | 
			
		||||
  pcre_valid_utf8.c
 | 
			
		||||
  pcre_version.c
 | 
			
		||||
  pcre_xclass.c
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
SET(PCREPOSIX_HEADERS pcreposix.h)
 | 
			
		||||
 | 
			
		||||
SET(PCREPOSIX_SOURCES pcreposix.c)
 | 
			
		||||
 | 
			
		||||
ENDIF(PCRE_BUILD_PCRE8)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_BUILD_PCRE16)
 | 
			
		||||
SET(PCRE16_SOURCES
 | 
			
		||||
  pcre16_byte_order.c
 | 
			
		||||
  pcre16_chartables.c
 | 
			
		||||
  pcre16_compile.c
 | 
			
		||||
  pcre16_config.c
 | 
			
		||||
  pcre16_dfa_exec.c
 | 
			
		||||
  pcre16_exec.c
 | 
			
		||||
  pcre16_fullinfo.c
 | 
			
		||||
  pcre16_get.c
 | 
			
		||||
  pcre16_globals.c
 | 
			
		||||
  pcre16_jit_compile.c
 | 
			
		||||
  pcre16_maketables.c
 | 
			
		||||
  pcre16_newline.c
 | 
			
		||||
  pcre16_ord2utf16.c
 | 
			
		||||
  pcre16_refcount.c
 | 
			
		||||
  pcre16_string_utils.c
 | 
			
		||||
  pcre16_study.c
 | 
			
		||||
  pcre16_tables.c
 | 
			
		||||
  pcre16_ucd.c
 | 
			
		||||
  pcre16_utf16_utils.c
 | 
			
		||||
  pcre16_valid_utf16.c
 | 
			
		||||
  pcre16_version.c
 | 
			
		||||
  pcre16_xclass.c
 | 
			
		||||
)
 | 
			
		||||
ENDIF(PCRE_BUILD_PCRE16)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_BUILD_PCRE32)
 | 
			
		||||
SET(PCRE32_SOURCES
 | 
			
		||||
  pcre32_byte_order.c
 | 
			
		||||
  pcre32_chartables.c
 | 
			
		||||
  pcre32_compile.c
 | 
			
		||||
  pcre32_config.c
 | 
			
		||||
  pcre32_dfa_exec.c
 | 
			
		||||
  pcre32_exec.c
 | 
			
		||||
  pcre32_fullinfo.c
 | 
			
		||||
  pcre32_get.c
 | 
			
		||||
  pcre32_globals.c
 | 
			
		||||
  pcre32_jit_compile.c
 | 
			
		||||
  pcre32_maketables.c
 | 
			
		||||
  pcre32_newline.c
 | 
			
		||||
  pcre32_ord2utf32.c
 | 
			
		||||
  pcre32_refcount.c
 | 
			
		||||
  pcre32_string_utils.c
 | 
			
		||||
  pcre32_study.c
 | 
			
		||||
  pcre32_tables.c
 | 
			
		||||
  pcre32_ucd.c
 | 
			
		||||
  pcre32_utf32_utils.c
 | 
			
		||||
  pcre32_valid_utf32.c
 | 
			
		||||
  pcre32_version.c
 | 
			
		||||
  pcre32_xclass.c
 | 
			
		||||
)
 | 
			
		||||
ENDIF(PCRE_BUILD_PCRE32)
 | 
			
		||||
 | 
			
		||||
IF(MINGW AND NOT PCRE_STATIC)
 | 
			
		||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
 | 
			
		||||
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre.o
 | 
			
		||||
PRE-LINK
 | 
			
		||||
COMMAND windres ARGS pcre.rc pcre.o
 | 
			
		||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
 | 
			
		||||
COMMENT Using pcre coff info in mingw build)
 | 
			
		||||
SET(PCRE_SOURCES
 | 
			
		||||
  ${PCRE_SOURCES} ${PROJECT_SOURCE_DIR}/pcre.o
 | 
			
		||||
)
 | 
			
		||||
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
 | 
			
		||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
 | 
			
		||||
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcreposix.o
 | 
			
		||||
PRE-LINK
 | 
			
		||||
COMMAND windres ARGS pcreposix.rc pcreposix.o
 | 
			
		||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
 | 
			
		||||
COMMENT Using pcreposix coff info in mingw build)
 | 
			
		||||
SET(PCREPOSIX_SOURCES
 | 
			
		||||
  ${PCREPOSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcreposix.o
 | 
			
		||||
)
 | 
			
		||||
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
 | 
			
		||||
ENDIF(MINGW AND NOT PCRE_STATIC)
 | 
			
		||||
 | 
			
		||||
IF(MSVC AND NOT PCRE_STATIC)
 | 
			
		||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
 | 
			
		||||
SET(PCRE_SOURCES
 | 
			
		||||
  ${PCRE_SOURCES} pcre.rc)
 | 
			
		||||
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
 | 
			
		||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
 | 
			
		||||
SET(PCREPOSIX_SOURCES
 | 
			
		||||
  ${PCREPOSIX_SOURCES} pcreposix.rc)
 | 
			
		||||
ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
 | 
			
		||||
ENDIF(MSVC AND NOT PCRE_STATIC)
 | 
			
		||||
 | 
			
		||||
SET(PCRECPP_HEADERS
 | 
			
		||||
  pcrecpp.h
 | 
			
		||||
  pcre_scanner.h
 | 
			
		||||
  ${PROJECT_BINARY_DIR}/pcrecpparg.h
 | 
			
		||||
  ${PROJECT_BINARY_DIR}/pcre_stringpiece.h
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
SET(PCRECPP_SOURCES
 | 
			
		||||
        pcrecpp.cc
 | 
			
		||||
        pcre_scanner.cc
 | 
			
		||||
        pcre_stringpiece.cc
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# Build setup
 | 
			
		||||
 | 
			
		||||
ADD_DEFINITIONS(-DHAVE_CONFIG_H)
 | 
			
		||||
 | 
			
		||||
IF(MSVC)
 | 
			
		||||
        ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS)
 | 
			
		||||
ENDIF(MSVC)
 | 
			
		||||
 | 
			
		||||
SET(CMAKE_INCLUDE_CURRENT_DIR 1)
 | 
			
		||||
# needed to make sure to not link debug libs
 | 
			
		||||
# against release libs and vice versa
 | 
			
		||||
IF(WIN32)
 | 
			
		||||
  SET(CMAKE_DEBUG_POSTFIX "d")
 | 
			
		||||
ENDIF(WIN32)
 | 
			
		||||
 | 
			
		||||
SET(targets)
 | 
			
		||||
 | 
			
		||||
# Libraries
 | 
			
		||||
# pcre
 | 
			
		||||
IF(PCRE_BUILD_PCRE8)
 | 
			
		||||
ADD_LIBRARY(pcre ${PCRE_HEADERS} ${PCRE_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
 | 
			
		||||
SET(targets ${targets} pcre)
 | 
			
		||||
ADD_LIBRARY(pcreposix ${PCREPOSIX_HEADERS} ${PCREPOSIX_SOURCES})
 | 
			
		||||
SET(targets ${targets} pcreposix)
 | 
			
		||||
TARGET_LINK_LIBRARIES(pcreposix pcre)
 | 
			
		||||
 | 
			
		||||
IF(MINGW AND NOT PCRE_STATIC)
 | 
			
		||||
  IF(NON_STANDARD_LIB_PREFIX)
 | 
			
		||||
    SET_TARGET_PROPERTIES(pcre pcreposix
 | 
			
		||||
                        PROPERTIES PREFIX ""
 | 
			
		||||
    )
 | 
			
		||||
  ENDIF(NON_STANDARD_LIB_PREFIX)
 | 
			
		||||
 | 
			
		||||
  IF(NON_STANDARD_LIB_SUFFIX)
 | 
			
		||||
    SET_TARGET_PROPERTIES(pcre pcreposix
 | 
			
		||||
                        PROPERTIES SUFFIX "-0.dll"
 | 
			
		||||
    )
 | 
			
		||||
  ENDIF(NON_STANDARD_LIB_SUFFIX)
 | 
			
		||||
ENDIF(MINGW AND NOT PCRE_STATIC)
 | 
			
		||||
 | 
			
		||||
ENDIF(PCRE_BUILD_PCRE8)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_BUILD_PCRE16)
 | 
			
		||||
ADD_LIBRARY(pcre16 ${PCRE_HEADERS} ${PCRE16_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
 | 
			
		||||
SET(targets ${targets} pcre16)
 | 
			
		||||
 | 
			
		||||
IF(MINGW AND NOT PCRE_STATIC)
 | 
			
		||||
  IF(NON_STANDARD_LIB_PREFIX)
 | 
			
		||||
    SET_TARGET_PROPERTIES(pcre16
 | 
			
		||||
                        PROPERTIES PREFIX ""
 | 
			
		||||
    )
 | 
			
		||||
  ENDIF(NON_STANDARD_LIB_PREFIX)
 | 
			
		||||
 | 
			
		||||
  IF(NON_STANDARD_LIB_SUFFIX)
 | 
			
		||||
    SET_TARGET_PROPERTIES(pcre16
 | 
			
		||||
                        PROPERTIES SUFFIX "-0.dll"
 | 
			
		||||
    )
 | 
			
		||||
  ENDIF(NON_STANDARD_LIB_SUFFIX)
 | 
			
		||||
ENDIF(MINGW AND NOT PCRE_STATIC)
 | 
			
		||||
 | 
			
		||||
ENDIF(PCRE_BUILD_PCRE16)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_BUILD_PCRE32)
 | 
			
		||||
ADD_LIBRARY(pcre32 ${PCRE_HEADERS} ${PCRE32_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
 | 
			
		||||
SET(targets ${targets} pcre32)
 | 
			
		||||
 | 
			
		||||
IF(MINGW AND NOT PCRE_STATIC)
 | 
			
		||||
  IF(NON_STANDARD_LIB_PREFIX)
 | 
			
		||||
    SET_TARGET_PROPERTIES(pcre32
 | 
			
		||||
                        PROPERTIES PREFIX ""
 | 
			
		||||
    )
 | 
			
		||||
  ENDIF(NON_STANDARD_LIB_PREFIX)
 | 
			
		||||
 | 
			
		||||
  IF(NON_STANDARD_LIB_SUFFIX)
 | 
			
		||||
    SET_TARGET_PROPERTIES(pcre32
 | 
			
		||||
                        PROPERTIES SUFFIX "-0.dll"
 | 
			
		||||
    )
 | 
			
		||||
  ENDIF(NON_STANDARD_LIB_SUFFIX)
 | 
			
		||||
ENDIF(MINGW AND NOT PCRE_STATIC)
 | 
			
		||||
 | 
			
		||||
ENDIF(PCRE_BUILD_PCRE32)
 | 
			
		||||
 | 
			
		||||
# pcrecpp
 | 
			
		||||
IF(PCRE_BUILD_PCRECPP)
 | 
			
		||||
ADD_LIBRARY(pcrecpp ${PCRECPP_HEADERS} ${PCRECPP_SOURCES})
 | 
			
		||||
SET(targets ${targets} pcrecpp)
 | 
			
		||||
TARGET_LINK_LIBRARIES(pcrecpp pcre)
 | 
			
		||||
 | 
			
		||||
  IF(MINGW AND NOT PCRE_STATIC)
 | 
			
		||||
    IF(NON_STANDARD_LIB_PREFIX)
 | 
			
		||||
      SET_TARGET_PROPERTIES(pcrecpp
 | 
			
		||||
                            PROPERTIES PREFIX ""
 | 
			
		||||
      )
 | 
			
		||||
    ENDIF(NON_STANDARD_LIB_PREFIX)
 | 
			
		||||
 | 
			
		||||
    IF(NON_STANDARD_LIB_SUFFIX)
 | 
			
		||||
      SET_TARGET_PROPERTIES(pcrecpp
 | 
			
		||||
                          PROPERTIES SUFFIX "-0.dll"
 | 
			
		||||
      )
 | 
			
		||||
    ENDIF(NON_STANDARD_LIB_SUFFIX)
 | 
			
		||||
  ENDIF(MINGW AND NOT PCRE_STATIC)
 | 
			
		||||
ENDIF(PCRE_BUILD_PCRECPP)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Executables
 | 
			
		||||
 | 
			
		||||
# Removed by PH (2008-01-23) because pcredemo shouldn't really be built
 | 
			
		||||
# automatically, and it gave trouble in some environments anyway.
 | 
			
		||||
# ADD_EXECUTABLE(pcredemo pcredemo.c)
 | 
			
		||||
# TARGET_LINK_LIBRARIES(pcredemo pcreposix)
 | 
			
		||||
# IF(NOT BUILD_SHARED_LIBS)
 | 
			
		||||
#     # make sure to not use declspec(dllimport) in static mode on windows
 | 
			
		||||
#         SET_TARGET_PROPERTIES(pcredemo PROPERTIES COMPILE_FLAGS "-DPCRE_STATIC")
 | 
			
		||||
# ENDIF(NOT BUILD_SHARED_LIBS)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_BUILD_PCREGREP)
 | 
			
		||||
  ADD_EXECUTABLE(pcregrep pcregrep.c)
 | 
			
		||||
  SET(targets ${targets} pcregrep)
 | 
			
		||||
  TARGET_LINK_LIBRARIES(pcregrep pcreposix ${PCREGREP_LIBS})
 | 
			
		||||
ENDIF(PCRE_BUILD_PCREGREP)
 | 
			
		||||
 | 
			
		||||
# Testing
 | 
			
		||||
IF(PCRE_BUILD_TESTS)
 | 
			
		||||
  ENABLE_TESTING()
 | 
			
		||||
 | 
			
		||||
  SET(PCRETEST_SOURCES pcretest.c)
 | 
			
		||||
  IF(PCRE_BUILD_PCRE8)
 | 
			
		||||
    LIST(APPEND PCRETEST_SOURCES pcre_printint.c)
 | 
			
		||||
  ENDIF(PCRE_BUILD_PCRE8)
 | 
			
		||||
  IF(PCRE_BUILD_PCRE16)
 | 
			
		||||
    LIST(APPEND PCRETEST_SOURCES pcre16_printint.c)
 | 
			
		||||
  ENDIF(PCRE_BUILD_PCRE16)
 | 
			
		||||
  IF(PCRE_BUILD_PCRE32)
 | 
			
		||||
    LIST(APPEND PCRETEST_SOURCES pcre32_printint.c)
 | 
			
		||||
  ENDIF(PCRE_BUILD_PCRE32)
 | 
			
		||||
 | 
			
		||||
  ADD_EXECUTABLE(pcretest ${PCRETEST_SOURCES})
 | 
			
		||||
  SET(targets ${targets} pcretest)
 | 
			
		||||
  IF(PCRE_BUILD_PCRE8)
 | 
			
		||||
    LIST(APPEND PCRETEST_LIBS pcreposix pcre)
 | 
			
		||||
  ENDIF(PCRE_BUILD_PCRE8)
 | 
			
		||||
  IF(PCRE_BUILD_PCRE16)
 | 
			
		||||
    LIST(APPEND PCRETEST_LIBS pcre16)
 | 
			
		||||
  ENDIF(PCRE_BUILD_PCRE16)
 | 
			
		||||
  IF(PCRE_BUILD_PCRE32)
 | 
			
		||||
    LIST(APPEND PCRETEST_LIBS pcre32)
 | 
			
		||||
  ENDIF(PCRE_BUILD_PCRE32)
 | 
			
		||||
  TARGET_LINK_LIBRARIES(pcretest ${PCRETEST_LIBS})
 | 
			
		||||
 | 
			
		||||
  IF(PCRE_SUPPORT_JIT)
 | 
			
		||||
    ADD_EXECUTABLE(pcre_jit_test pcre_jit_test.c)
 | 
			
		||||
    SET(targets ${targets} pcre_jit_test)
 | 
			
		||||
    SET(PCRE_JIT_TEST_LIBS )
 | 
			
		||||
    IF(PCRE_BUILD_PCRE8)
 | 
			
		||||
      LIST(APPEND PCRE_JIT_TEST_LIBS pcre)
 | 
			
		||||
    ENDIF(PCRE_BUILD_PCRE8)
 | 
			
		||||
    IF(PCRE_BUILD_PCRE16)
 | 
			
		||||
      LIST(APPEND PCRE_JIT_TEST_LIBS pcre16)
 | 
			
		||||
    ENDIF(PCRE_BUILD_PCRE16)
 | 
			
		||||
    IF(PCRE_BUILD_PCRE32)
 | 
			
		||||
      LIST(APPEND PCRE_JIT_TEST_LIBS pcre32)
 | 
			
		||||
    ENDIF(PCRE_BUILD_PCRE32)
 | 
			
		||||
    TARGET_LINK_LIBRARIES(pcre_jit_test ${PCRE_JIT_TEST_LIBS})
 | 
			
		||||
  ENDIF(PCRE_SUPPORT_JIT)
 | 
			
		||||
 | 
			
		||||
  IF(PCRE_BUILD_PCRECPP)
 | 
			
		||||
    ADD_EXECUTABLE(pcrecpp_unittest pcrecpp_unittest.cc)
 | 
			
		||||
    SET(targets ${targets} pcrecpp_unittest)
 | 
			
		||||
    TARGET_LINK_LIBRARIES(pcrecpp_unittest pcrecpp)
 | 
			
		||||
    IF(MINGW AND NON_STANDARD_LIB_NAMES AND NOT PCRE_STATIC)
 | 
			
		||||
      SET_TARGET_PROPERTIES(pcrecpp
 | 
			
		||||
                        PROPERTIES PREFIX ""
 | 
			
		||||
      )
 | 
			
		||||
    ENDIF(MINGW AND NON_STANDARD_LIB_NAMES AND NOT PCRE_STATIC)
 | 
			
		||||
 | 
			
		||||
    ADD_EXECUTABLE(pcre_scanner_unittest pcre_scanner_unittest.cc)
 | 
			
		||||
    SET(targets ${targets} pcre_scanner_unittest)
 | 
			
		||||
    TARGET_LINK_LIBRARIES(pcre_scanner_unittest pcrecpp)
 | 
			
		||||
 | 
			
		||||
    ADD_EXECUTABLE(pcre_stringpiece_unittest pcre_stringpiece_unittest.cc)
 | 
			
		||||
    SET(targets ${targets} pcre_stringpiece_unittest)
 | 
			
		||||
    TARGET_LINK_LIBRARIES(pcre_stringpiece_unittest pcrecpp)
 | 
			
		||||
  ENDIF(PCRE_BUILD_PCRECPP)
 | 
			
		||||
 | 
			
		||||
  # exes in Debug location tested by the RunTest shell script
 | 
			
		||||
  # via "make test"
 | 
			
		||||
  IF(PCRE_BUILD_PCREGREP)
 | 
			
		||||
    GET_TARGET_PROPERTY(PCREGREP_EXE pcregrep DEBUG_LOCATION)
 | 
			
		||||
  ENDIF(PCRE_BUILD_PCREGREP)
 | 
			
		||||
 | 
			
		||||
  GET_TARGET_PROPERTY(PCRETEST_EXE pcretest DEBUG_LOCATION)
 | 
			
		||||
 | 
			
		||||
# =================================================
 | 
			
		||||
  # Write out a CTest configuration file
 | 
			
		||||
  #
 | 
			
		||||
  FILE(WRITE ${PROJECT_BINARY_DIR}/CTestCustom.ctest
 | 
			
		||||
  "# This is a generated file.
 | 
			
		||||
MESSAGE(\"When testing is complete, review test output in the
 | 
			
		||||
\\\"${PROJECT_BINARY_DIR}/Testing/Temporary\\\" folder.\")
 | 
			
		||||
MESSAGE(\" \")
 | 
			
		||||
")
 | 
			
		||||
 | 
			
		||||
  FILE(WRITE ${PROJECT_BINARY_DIR}/pcre_test.sh
 | 
			
		||||
  "#! /bin/sh
 | 
			
		||||
# This is a generated file.
 | 
			
		||||
srcdir=${PROJECT_SOURCE_DIR}
 | 
			
		||||
pcretest=${PCRETEST_EXE}
 | 
			
		||||
. ${PROJECT_SOURCE_DIR}/RunTest
 | 
			
		||||
if test \"$?\" != \"0\"; then exit 1; fi
 | 
			
		||||
# End
 | 
			
		||||
")
 | 
			
		||||
 | 
			
		||||
  IF(UNIX)
 | 
			
		||||
    ADD_TEST(pcre_test      sh ${PROJECT_BINARY_DIR}/pcre_test.sh)
 | 
			
		||||
  ENDIF(UNIX)
 | 
			
		||||
 | 
			
		||||
  IF(PCRE_BUILD_PCREGREP)
 | 
			
		||||
    FILE(WRITE ${PROJECT_BINARY_DIR}/pcre_grep_test.sh
 | 
			
		||||
    "#! /bin/sh
 | 
			
		||||
# This is a generated file.
 | 
			
		||||
srcdir=${PROJECT_SOURCE_DIR}
 | 
			
		||||
pcregrep=${PCREGREP_EXE}
 | 
			
		||||
pcretest=${PCRETEST_EXE}
 | 
			
		||||
. ${PROJECT_SOURCE_DIR}/RunGrepTest
 | 
			
		||||
if test \"$?\" != \"0\"; then exit 1; fi
 | 
			
		||||
# End
 | 
			
		||||
")
 | 
			
		||||
 | 
			
		||||
    IF(UNIX)
 | 
			
		||||
      ADD_TEST(pcre_grep_test sh ${PROJECT_BINARY_DIR}/pcre_grep_test.sh)
 | 
			
		||||
    ENDIF(UNIX)
 | 
			
		||||
  ENDIF(PCRE_BUILD_PCREGREP)
 | 
			
		||||
 | 
			
		||||
  IF(WIN32)
 | 
			
		||||
    # Provide environment for executing the bat file version of RunTest
 | 
			
		||||
    FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc)
 | 
			
		||||
    FILE(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin)
 | 
			
		||||
    FILE(TO_NATIVE_PATH ${PCRETEST_EXE} winexe)
 | 
			
		||||
 | 
			
		||||
    FILE(WRITE ${PROJECT_BINARY_DIR}/pcre_test.bat
 | 
			
		||||
    "\@REM This is a generated file.
 | 
			
		||||
\@echo off
 | 
			
		||||
setlocal
 | 
			
		||||
SET srcdir=\"${winsrc}\"
 | 
			
		||||
SET pcretest=\"${winexe}\"
 | 
			
		||||
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcretest=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcretest.exe\"
 | 
			
		||||
call %srcdir%\\RunTest.Bat
 | 
			
		||||
if errorlevel 1 exit /b 1
 | 
			
		||||
echo RunTest.bat tests successfully completed
 | 
			
		||||
")
 | 
			
		||||
 | 
			
		||||
  ADD_TEST(NAME pcre_test_bat
 | 
			
		||||
  COMMAND pcre_test.bat)
 | 
			
		||||
  SET_TESTS_PROPERTIES(pcre_test_bat PROPERTIES
 | 
			
		||||
  PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed")
 | 
			
		||||
 | 
			
		||||
    IF("$ENV{OSTYPE}" STREQUAL "msys")
 | 
			
		||||
      # Both the sh and bat file versions of RunTest are run if make test is used
 | 
			
		||||
      # in msys
 | 
			
		||||
      ADD_TEST(pcre_test_sh    sh.exe ${PROJECT_BINARY_DIR}/pcre_test.sh)
 | 
			
		||||
      IF(PCRE_BUILD_PCREGREP)
 | 
			
		||||
        ADD_TEST(pcre_grep_test  sh.exe ${PROJECT_BINARY_DIR}/pcre_grep_test.sh)
 | 
			
		||||
      ENDIF(PCRE_BUILD_PCREGREP)
 | 
			
		||||
    ENDIF("$ENV{OSTYPE}" STREQUAL "msys")
 | 
			
		||||
 | 
			
		||||
  ENDIF(WIN32)
 | 
			
		||||
 | 
			
		||||
  # Changed to accommodate testing whichever location was just built
 | 
			
		||||
 | 
			
		||||
  IF(PCRE_SUPPORT_JIT)
 | 
			
		||||
    ADD_TEST(pcre_jit_test         pcre_jit_test)
 | 
			
		||||
  ENDIF(PCRE_SUPPORT_JIT)
 | 
			
		||||
 | 
			
		||||
  IF(PCRE_BUILD_PCRECPP)
 | 
			
		||||
    ADD_TEST(pcrecpp_test          pcrecpp_unittest)
 | 
			
		||||
    ADD_TEST(pcre_scanner_test     pcre_scanner_unittest)
 | 
			
		||||
    ADD_TEST(pcre_stringpiece_test pcre_stringpiece_unittest)
 | 
			
		||||
  ENDIF(PCRE_BUILD_PCRECPP)
 | 
			
		||||
 | 
			
		||||
ENDIF(PCRE_BUILD_TESTS)
 | 
			
		||||
 | 
			
		||||
# Installation
 | 
			
		||||
SET(CMAKE_INSTALL_ALWAYS 1)
 | 
			
		||||
 | 
			
		||||
INSTALL(TARGETS ${targets}
 | 
			
		||||
        RUNTIME DESTINATION bin
 | 
			
		||||
        LIBRARY DESTINATION lib
 | 
			
		||||
        ARCHIVE DESTINATION lib)
 | 
			
		||||
 | 
			
		||||
INSTALL(FILES ${PCRE_HEADERS} ${PCREPOSIX_HEADERS} DESTINATION include)
 | 
			
		||||
 | 
			
		||||
FILE(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html)
 | 
			
		||||
FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1)
 | 
			
		||||
FILE(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_BUILD_PCRECPP)
 | 
			
		||||
        INSTALL(FILES ${PCRECPP_HEADERS} DESTINATION include)
 | 
			
		||||
ELSE(PCRE_BUILD_PCRECPP)
 | 
			
		||||
        # Remove pcrecpp.3
 | 
			
		||||
        FOREACH(man ${man3})
 | 
			
		||||
                GET_FILENAME_COMPONENT(man_tmp ${man} NAME)
 | 
			
		||||
                IF(NOT man_tmp STREQUAL "pcrecpp.3")
 | 
			
		||||
                        SET(man3_new ${man3} ${man})
 | 
			
		||||
                ENDIF(NOT man_tmp STREQUAL "pcrecpp.3")
 | 
			
		||||
        ENDFOREACH(man ${man3})
 | 
			
		||||
        SET(man3 ${man3_new})
 | 
			
		||||
ENDIF(PCRE_BUILD_PCRECPP)
 | 
			
		||||
 | 
			
		||||
INSTALL(FILES ${man1} DESTINATION man/man1)
 | 
			
		||||
INSTALL(FILES ${man3} DESTINATION man/man3)
 | 
			
		||||
INSTALL(FILES ${html} DESTINATION share/doc/pcre/html)
 | 
			
		||||
 | 
			
		||||
IF(MSVC AND INSTALL_MSVC_PDB)
 | 
			
		||||
    INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre.pdb
 | 
			
		||||
                  ${PROJECT_BINARY_DIR}/pcreposix.pdb
 | 
			
		||||
            DESTINATION bin
 | 
			
		||||
            CONFIGURATIONS RelWithDebInfo)
 | 
			
		||||
    INSTALL(FILES ${PROJECT_BINARY_DIR}/pcred.pdb
 | 
			
		||||
                  ${PROJECT_BINARY_DIR}/pcreposixd.pdb
 | 
			
		||||
            DESTINATION bin
 | 
			
		||||
            CONFIGURATIONS Debug)
 | 
			
		||||
ENDIF(MSVC AND INSTALL_MSVC_PDB)
 | 
			
		||||
 | 
			
		||||
# help, only for nice output
 | 
			
		||||
IF(BUILD_SHARED_LIBS)
 | 
			
		||||
  SET(BUILD_STATIC_LIBS OFF)
 | 
			
		||||
ELSE(BUILD_SHARED_LIBS)
 | 
			
		||||
  SET(BUILD_STATIC_LIBS ON)
 | 
			
		||||
ENDIF(BUILD_SHARED_LIBS)
 | 
			
		||||
 | 
			
		||||
IF(PCRE_SHOW_REPORT)
 | 
			
		||||
  STRING(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype)
 | 
			
		||||
  IF (CMAKE_C_FLAGS)
 | 
			
		||||
    SET(cfsp " ")
 | 
			
		||||
  ENDIF(CMAKE_C_FLAGS)
 | 
			
		||||
  IF (CMAKE_CXX_FLAGS)
 | 
			
		||||
    SET(cxxfsp " ")
 | 
			
		||||
  ENDIF(CMAKE_CXX_FLAGS)
 | 
			
		||||
  MESSAGE(STATUS "")
 | 
			
		||||
  MESSAGE(STATUS "")
 | 
			
		||||
  MESSAGE(STATUS "PCRE configuration summary:")
 | 
			
		||||
  MESSAGE(STATUS "")
 | 
			
		||||
  MESSAGE(STATUS "  Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
 | 
			
		||||
  MESSAGE(STATUS "  C compiler ...................... : ${CMAKE_C_COMPILER}")
 | 
			
		||||
  MESSAGE(STATUS "  C++ compiler .................... : ${CMAKE_CXX_COMPILER}")
 | 
			
		||||
  MESSAGE(STATUS "  C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}")
 | 
			
		||||
  MESSAGE(STATUS "  C++ compiler flags .............. : ${CMAKE_CXX_FLAGS}${cxxfsp}${CMAKE_CXX_FLAGS_${buildtype}}")
 | 
			
		||||
  MESSAGE(STATUS "")
 | 
			
		||||
  MESSAGE(STATUS "  Build 8 bit PCRE library ........ : ${PCRE_BUILD_PCRE8}")
 | 
			
		||||
  MESSAGE(STATUS "  Build 16 bit PCRE library ....... : ${PCRE_BUILD_PCRE16}")
 | 
			
		||||
  MESSAGE(STATUS "  Build 32 bit PCRE library ....... : ${PCRE_BUILD_PCRE32}")
 | 
			
		||||
  MESSAGE(STATUS "  Build C++ library ............... : ${PCRE_BUILD_PCRECPP}")
 | 
			
		||||
  MESSAGE(STATUS "  Enable JIT compiling support .... : ${PCRE_SUPPORT_JIT}")
 | 
			
		||||
  MESSAGE(STATUS "  Enable UTF support .............. : ${PCRE_SUPPORT_UTF}")
 | 
			
		||||
  MESSAGE(STATUS "  Unicode properties .............. : ${PCRE_SUPPORT_UNICODE_PROPERTIES}")
 | 
			
		||||
  MESSAGE(STATUS "  Newline char/sequence ........... : ${PCRE_NEWLINE}")
 | 
			
		||||
  MESSAGE(STATUS "  \\R matches only ANYCRLF ......... : ${PCRE_SUPPORT_BSR_ANYCRLF}")
 | 
			
		||||
  MESSAGE(STATUS "  EBCDIC coding ................... : ${PCRE_EBCDIC}")
 | 
			
		||||
  MESSAGE(STATUS "  EBCDIC coding with NL=0x25 ...... : ${PCRE_EBCDIC_NL25}")
 | 
			
		||||
  MESSAGE(STATUS "  Rebuild char tables ............. : ${PCRE_REBUILD_CHARTABLES}")
 | 
			
		||||
  MESSAGE(STATUS "  No stack recursion .............. : ${PCRE_NO_RECURSE}")
 | 
			
		||||
  MESSAGE(STATUS "  POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}")
 | 
			
		||||
  MESSAGE(STATUS "  Internal link size .............. : ${PCRE_LINK_SIZE}")
 | 
			
		||||
  MESSAGE(STATUS "  Parentheses nest limit .......... : ${PCRE_PARENS_NEST_LIMIT}")
 | 
			
		||||
  MESSAGE(STATUS "  Match limit ..................... : ${PCRE_MATCH_LIMIT}")
 | 
			
		||||
  MESSAGE(STATUS "  Match limit recursion ........... : ${PCRE_MATCH_LIMIT_RECURSION}")
 | 
			
		||||
  MESSAGE(STATUS "  Build shared libs ............... : ${BUILD_SHARED_LIBS}")
 | 
			
		||||
  MESSAGE(STATUS "  Build static libs ............... : ${BUILD_STATIC_LIBS}")
 | 
			
		||||
  MESSAGE(STATUS "  Build pcregrep .................. : ${PCRE_BUILD_PCREGREP}")
 | 
			
		||||
  MESSAGE(STATUS "  Enable JIT in pcregrep .......... : ${PCRE_SUPPORT_PCREGREP_JIT}")
 | 
			
		||||
  MESSAGE(STATUS "  Buffer size for pcregrep ........ : ${PCREGREP_BUFSIZE}")
 | 
			
		||||
  MESSAGE(STATUS "  Build tests (implies pcretest  .. : ${PCRE_BUILD_TESTS}")
 | 
			
		||||
  MESSAGE(STATUS "               and pcregrep)")
 | 
			
		||||
  IF(ZLIB_FOUND)
 | 
			
		||||
    MESSAGE(STATUS "  Link pcregrep with libz ......... : ${PCRE_SUPPORT_LIBZ}")
 | 
			
		||||
  ELSE(ZLIB_FOUND)
 | 
			
		||||
    MESSAGE(STATUS "  Link pcregrep with libz ......... : Library not found" )
 | 
			
		||||
  ENDIF(ZLIB_FOUND)
 | 
			
		||||
  IF(BZIP2_FOUND)
 | 
			
		||||
    MESSAGE(STATUS "  Link pcregrep with libbz2 ....... : ${PCRE_SUPPORT_LIBBZ2}")
 | 
			
		||||
  ELSE(BZIP2_FOUND)
 | 
			
		||||
    MESSAGE(STATUS "  Link pcregrep with libbz2 ....... : Library not found" )
 | 
			
		||||
  ENDIF(BZIP2_FOUND)
 | 
			
		||||
  IF(EDITLINE_FOUND)
 | 
			
		||||
    MESSAGE(STATUS "  Link pcretest with libeditline .. : ${PCRE_SUPPORT_LIBEDIT}")
 | 
			
		||||
  ELSE(EDITLINE_FOUND)
 | 
			
		||||
    MESSAGE(STATUS "  Link pcretest with libeditline .. : Library not found" )
 | 
			
		||||
  ENDIF(EDITLINE_FOUND)
 | 
			
		||||
  IF(READLINE_FOUND)
 | 
			
		||||
    MESSAGE(STATUS "  Link pcretest with libreadline .. : ${PCRE_SUPPORT_LIBREADLINE}")
 | 
			
		||||
  ELSE(READLINE_FOUND)
 | 
			
		||||
    MESSAGE(STATUS "  Link pcretest with libreadline .. : Library not found" )
 | 
			
		||||
  ENDIF(READLINE_FOUND)
 | 
			
		||||
  MESSAGE(STATUS "  Support Valgrind .................: ${PCRE_SUPPORT_VALGRIND}")
 | 
			
		||||
  MESSAGE(STATUS "  Support coverage .................: ${PCRE_SUPPORT_COVERAGE}")
 | 
			
		||||
 | 
			
		||||
  IF(MINGW AND NOT PCRE_STATIC)
 | 
			
		||||
    MESSAGE(STATUS "  Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
 | 
			
		||||
    MESSAGE(STATUS "  Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")
 | 
			
		||||
  ENDIF(MINGW AND NOT PCRE_STATIC)
 | 
			
		||||
 | 
			
		||||
  IF(MSVC)
 | 
			
		||||
    MESSAGE(STATUS "  Install MSVC .pdb files ..........: ${INSTALL_MSVC_PDB}")
 | 
			
		||||
  ENDIF(MSVC)
 | 
			
		||||
 | 
			
		||||
  MESSAGE(STATUS "")
 | 
			
		||||
ENDIF(PCRE_SHOW_REPORT)
 | 
			
		||||
 | 
			
		||||
# end CMakeLists.txt
 | 
			
		||||
							
								
								
									
										5
									
								
								tools/pcre/COPYING
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								tools/pcre/COPYING
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,5 @@
 | 
			
		||||
PCRE LICENCE
 | 
			
		||||
 | 
			
		||||
Please see the file LICENCE in the PCRE distribution for licensing details.
 | 
			
		||||
 | 
			
		||||
End
 | 
			
		||||
							
								
								
									
										5477
									
								
								tools/pcre/ChangeLog
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5477
									
								
								tools/pcre/ChangeLog
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										67
									
								
								tools/pcre/CheckMan
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								tools/pcre/CheckMan
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,67 @@
 | 
			
		||||
#! /usr/bin/perl
 | 
			
		||||
 | 
			
		||||
# A script to scan PCRE's man pages to check for typos in the control
 | 
			
		||||
# sequences. I use only a small set of the available repertoire, so it is 
 | 
			
		||||
# straightforward to check that nothing else has slipped in by mistake. This
 | 
			
		||||
# script should be called in the doc directory.
 | 
			
		||||
 | 
			
		||||
$yield = 0;
 | 
			
		||||
 | 
			
		||||
while (scalar(@ARGV) > 0)
 | 
			
		||||
  {
 | 
			
		||||
  $line = 0; 
 | 
			
		||||
  $file = shift @ARGV;
 | 
			
		||||
    
 | 
			
		||||
  open (IN, $file) || die "Failed to open $file\n";
 | 
			
		||||
  
 | 
			
		||||
  while (<IN>)
 | 
			
		||||
    {  
 | 
			
		||||
    $line++; 
 | 
			
		||||
    if (/^\s*$/)
 | 
			
		||||
      {
 | 
			
		||||
      printf "Empty line $line of $file\n";
 | 
			
		||||
      $yield = 1;  
 | 
			
		||||
      }   
 | 
			
		||||
    elsif (/^\./)
 | 
			
		||||
      {
 | 
			
		||||
      if (!/^\.\s*$|
 | 
			
		||||
            ^\.B\s+\S| 
 | 
			
		||||
            ^\.TH\s\S|
 | 
			
		||||
            ^\.SH\s\S|
 | 
			
		||||
            ^\.SS\s\S|
 | 
			
		||||
            ^\.TP(?:\s?\d+)?\s*$|
 | 
			
		||||
            ^\.SM\s*$|
 | 
			
		||||
            ^\.br\s*$| 
 | 
			
		||||
            ^\.rs\s*$| 
 | 
			
		||||
            ^\.sp\s*$| 
 | 
			
		||||
            ^\.nf\s*$| 
 | 
			
		||||
            ^\.fi\s*$| 
 | 
			
		||||
            ^\.P\s*$| 
 | 
			
		||||
            ^\.PP\s*$| 
 | 
			
		||||
            ^\.\\"(?:\ HREF)?\s*$|
 | 
			
		||||
            ^\.\\"\sHTML\s<a\shref="[^"]+?">\s*$|
 | 
			
		||||
            ^\.\\"\sHTML\s<a\sname="[^"]+?"><\/a>\s*$|
 | 
			
		||||
            ^\.\\"\s<\/a>\s*$|
 | 
			
		||||
            ^\.\\"\sJOINSH\s*$|
 | 
			
		||||
            ^\.\\"\sJOIN\s*$/x  
 | 
			
		||||
         )
 | 
			
		||||
        {
 | 
			
		||||
        printf "Bad control line $line of $file\n";
 | 
			
		||||
        $yield = 1;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    else
 | 
			
		||||
      {
 | 
			
		||||
      if (/\\[^ef]|\\f[^IBP]/)
 | 
			
		||||
        {
 | 
			
		||||
        printf "Bad backslash in line $line of $file\n";  
 | 
			
		||||
        $yield = 1; 
 | 
			
		||||
        } 
 | 
			
		||||
      }   
 | 
			
		||||
    }
 | 
			
		||||
     
 | 
			
		||||
  close(IN);   
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
exit $yield;
 | 
			
		||||
# End  
 | 
			
		||||
							
								
								
									
										113
									
								
								tools/pcre/CleanTxt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								tools/pcre/CleanTxt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,113 @@
 | 
			
		||||
#! /usr/bin/perl -w
 | 
			
		||||
 | 
			
		||||
# Script to take the output of nroff -man and remove all the backspacing and
 | 
			
		||||
# the page footers and the screen commands etc so that it is more usefully
 | 
			
		||||
# readable online. In fact, in the latest nroff, intermediate footers don't
 | 
			
		||||
# seem to be generated any more.
 | 
			
		||||
 | 
			
		||||
$blankcount = 0;
 | 
			
		||||
$lastwascut = 0;
 | 
			
		||||
$firstheader = 1;
 | 
			
		||||
 | 
			
		||||
# Input on STDIN; output to STDOUT.
 | 
			
		||||
 | 
			
		||||
while (<STDIN>)
 | 
			
		||||
  {
 | 
			
		||||
  s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
 | 
			
		||||
  s/.\x8//g;         # Remove "char, backspace"
 | 
			
		||||
 | 
			
		||||
  # Handle header lines. Retain only the first one we encounter, but remove
 | 
			
		||||
  # the blank line that follows. Any others (e.g. at end of document) and the
 | 
			
		||||
  # following blank line are dropped.
 | 
			
		||||
 | 
			
		||||
  if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
 | 
			
		||||
    {
 | 
			
		||||
    if ($firstheader)
 | 
			
		||||
      {
 | 
			
		||||
      $firstheader = 0;
 | 
			
		||||
      print;
 | 
			
		||||
      $lastprinted = $_;
 | 
			
		||||
      $lastwascut = 0;
 | 
			
		||||
      }
 | 
			
		||||
    $_=<STDIN>;       # Remove a blank that follows
 | 
			
		||||
    next;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  # Count runs of empty lines
 | 
			
		||||
 | 
			
		||||
  if (/^\s*$/)
 | 
			
		||||
    {
 | 
			
		||||
    $blankcount++;
 | 
			
		||||
    $lastwascut = 0;
 | 
			
		||||
    next;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  # If a chunk of lines has been cut out (page footer) and the next line
 | 
			
		||||
  # has a different indentation, put back one blank line.
 | 
			
		||||
 | 
			
		||||
  if ($lastwascut && $blankcount < 1 && defined($lastprinted))
 | 
			
		||||
    {
 | 
			
		||||
    ($a) = $lastprinted =~ /^(\s*)/;
 | 
			
		||||
    ($b) = $_ =~ /^(\s*)/;
 | 
			
		||||
    $blankcount++ if ($a ne $b);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  # We get here only when we have a non-blank line in hand. If it was preceded
 | 
			
		||||
  # by 3 or more blank lines, read the next 3 lines and see if they are blank.
 | 
			
		||||
  # If so, remove all 7 lines, and remember that we have just done a cut.
 | 
			
		||||
 | 
			
		||||
  if ($blankcount >= 3)
 | 
			
		||||
    {
 | 
			
		||||
    for ($i = 0; $i < 3; $i++)
 | 
			
		||||
      {
 | 
			
		||||
      $next[$i] = <STDIN>;
 | 
			
		||||
      $next[$i] = "" if !defined $next[$i];
 | 
			
		||||
      $next[$i] =~ s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
 | 
			
		||||
      $next[$i] =~ s/.\x8//g;         # Remove "char, backspace"
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    # Cut out chunks of the form <3 blanks><non-blank><3 blanks>
 | 
			
		||||
 | 
			
		||||
    if ($next[0] =~ /^\s*$/ &&
 | 
			
		||||
        $next[1] =~ /^\s*$/ &&
 | 
			
		||||
        $next[2] =~ /^\s*$/)
 | 
			
		||||
      {
 | 
			
		||||
      $blankcount -= 3;
 | 
			
		||||
      $lastwascut = 1;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    # Otherwise output the saved blanks, the current, and the next three
 | 
			
		||||
    # lines. Remember the last printed line.
 | 
			
		||||
 | 
			
		||||
    else
 | 
			
		||||
      {
 | 
			
		||||
      for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
 | 
			
		||||
      print;
 | 
			
		||||
      for ($i = 0; $i < 3; $i++)
 | 
			
		||||
        {
 | 
			
		||||
        $next[$i] =~ s/.\x8//g;
 | 
			
		||||
        print $next[$i];
 | 
			
		||||
        $lastprinted = $_;
 | 
			
		||||
        }
 | 
			
		||||
      $lastwascut = 0;
 | 
			
		||||
      $blankcount = 0;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  # This non-blank line is not preceded by 3 or more blank lines. Output
 | 
			
		||||
  # any blanks there are, and the line. Remember it. Force two blank lines
 | 
			
		||||
  # before headings.
 | 
			
		||||
 | 
			
		||||
  else
 | 
			
		||||
    {
 | 
			
		||||
    $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
 | 
			
		||||
      defined($lastprinted);
 | 
			
		||||
    for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
 | 
			
		||||
    print;
 | 
			
		||||
    $lastprinted = $_;
 | 
			
		||||
    $lastwascut = 0;
 | 
			
		||||
    $blankcount = 0;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
# End
 | 
			
		||||
							
								
								
									
										35
									
								
								tools/pcre/Detrail
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								tools/pcre/Detrail
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,35 @@
 | 
			
		||||
#!/usr/bin/perl
 | 
			
		||||
 | 
			
		||||
# This is a script for removing trailing whitespace from lines in files that
 | 
			
		||||
# are listed on the command line.
 | 
			
		||||
 | 
			
		||||
# This subroutine does the work for one file.
 | 
			
		||||
 | 
			
		||||
sub detrail {
 | 
			
		||||
my($file) = $_[0];
 | 
			
		||||
my($changed) = 0;
 | 
			
		||||
open(IN, "$file") || die "Can't open $file for input";
 | 
			
		||||
@lines = <IN>;
 | 
			
		||||
close(IN);
 | 
			
		||||
foreach (@lines)
 | 
			
		||||
  {
 | 
			
		||||
  if (/\s+\n$/)
 | 
			
		||||
    {
 | 
			
		||||
    s/\s+\n$/\n/;
 | 
			
		||||
    $changed = 1;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
if ($changed)
 | 
			
		||||
  {
 | 
			
		||||
  open(OUT, ">$file") || die "Can't open $file for output";
 | 
			
		||||
  print OUT @lines;
 | 
			
		||||
  close(OUT);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# This is the main program
 | 
			
		||||
 | 
			
		||||
$, = "";   # Output field separator
 | 
			
		||||
for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); }
 | 
			
		||||
 | 
			
		||||
# End
 | 
			
		||||
							
								
								
									
										528
									
								
								tools/pcre/HACKING
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										528
									
								
								tools/pcre/HACKING
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,528 @@
 | 
			
		||||
Technical Notes about PCRE
 | 
			
		||||
--------------------------
 | 
			
		||||
 | 
			
		||||
These are very rough technical notes that record potentially useful information 
 | 
			
		||||
about PCRE internals. For information about testing PCRE, see the pcretest 
 | 
			
		||||
documentation and the comment at the head of the RunTest file.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Historical note 1
 | 
			
		||||
-----------------
 | 
			
		||||
 | 
			
		||||
Many years ago I implemented some regular expression functions to an algorithm
 | 
			
		||||
suggested by Martin Richards. These were not Unix-like in form, and were quite
 | 
			
		||||
restricted in what they could do by comparison with Perl. The interesting part
 | 
			
		||||
about the algorithm was that the amount of space required to hold the compiled
 | 
			
		||||
form of an expression was known in advance. The code to apply an expression did
 | 
			
		||||
not operate by backtracking, as the original Henry Spencer code and current
 | 
			
		||||
Perl code does, but instead checked all possibilities simultaneously by keeping
 | 
			
		||||
a list of current states and checking all of them as it advanced through the
 | 
			
		||||
subject string. In the terminology of Jeffrey Friedl's book, it was a "DFA
 | 
			
		||||
algorithm", though it was not a traditional Finite State Machine (FSM). When
 | 
			
		||||
the pattern was all used up, all remaining states were possible matches, and
 | 
			
		||||
the one matching the longest subset of the subject string was chosen. This did
 | 
			
		||||
not necessarily maximize the individual wild portions of the pattern, as is
 | 
			
		||||
expected in Unix and Perl-style regular expressions.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Historical note 2
 | 
			
		||||
-----------------
 | 
			
		||||
 | 
			
		||||
By contrast, the code originally written by Henry Spencer (which was
 | 
			
		||||
subsequently heavily modified for Perl) compiles the expression twice: once in
 | 
			
		||||
a dummy mode in order to find out how much store will be needed, and then for
 | 
			
		||||
real. (The Perl version probably doesn't do this any more; I'm talking about
 | 
			
		||||
the original library.) The execution function operates by backtracking and
 | 
			
		||||
maximizing (or, optionally, minimizing in Perl) the amount of the subject that
 | 
			
		||||
matches individual wild portions of the pattern. This is an "NFA algorithm" in
 | 
			
		||||
Friedl's terminology.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
OK, here's the real stuff
 | 
			
		||||
-------------------------
 | 
			
		||||
 | 
			
		||||
For the set of functions that form the "basic" PCRE library (which are
 | 
			
		||||
unrelated to those mentioned above), I tried at first to invent an algorithm
 | 
			
		||||
that used an amount of store bounded by a multiple of the number of characters
 | 
			
		||||
in the pattern, to save on compiling time. However, because of the greater
 | 
			
		||||
complexity in Perl regular expressions, I couldn't do this. In any case, a
 | 
			
		||||
first pass through the pattern is helpful for other reasons. 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Support for 16-bit and 32-bit data strings
 | 
			
		||||
-------------------------------------------
 | 
			
		||||
 | 
			
		||||
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
 | 
			
		||||
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
 | 
			
		||||
in any combination of 8-bit, 16-bit or 32-bit modes, creating up to three
 | 
			
		||||
different libraries. In the description that follows, the word "short" is used
 | 
			
		||||
for a 16-bit data quantity, and the word "unit" is used for a quantity that is
 | 
			
		||||
a byte in 8-bit mode, a short in 16-bit mode and a 32-bit word in 32-bit mode.
 | 
			
		||||
However, so as not to over-complicate the text, the names of PCRE functions are
 | 
			
		||||
given in 8-bit form only.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Computing the memory requirement: how it was
 | 
			
		||||
--------------------------------------------
 | 
			
		||||
 | 
			
		||||
Up to and including release 6.7, PCRE worked by running a very degenerate first
 | 
			
		||||
pass to calculate a maximum store size, and then a second pass to do the real
 | 
			
		||||
compile - which might use a bit less than the predicted amount of memory. The
 | 
			
		||||
idea was that this would turn out faster than the Henry Spencer code because
 | 
			
		||||
the first pass is degenerate and the second pass can just store stuff straight
 | 
			
		||||
into the vector, which it knows is big enough.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Computing the memory requirement: how it is
 | 
			
		||||
-------------------------------------------
 | 
			
		||||
 | 
			
		||||
By the time I was working on a potential 6.8 release, the degenerate first pass
 | 
			
		||||
had become very complicated and hard to maintain. Indeed one of the early
 | 
			
		||||
things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
 | 
			
		||||
I had a flash of inspiration as to how I could run the real compile function in
 | 
			
		||||
a "fake" mode that enables it to compute how much memory it would need, while
 | 
			
		||||
actually only ever using a few hundred bytes of working memory, and without too
 | 
			
		||||
many tests of the mode that might slow it down. So I refactored the compiling
 | 
			
		||||
functions to work this way. This got rid of about 600 lines of source. It
 | 
			
		||||
should make future maintenance and development easier. As this was such a major 
 | 
			
		||||
change, I never released 6.8, instead upping the number to 7.0 (other quite 
 | 
			
		||||
major changes were also present in the 7.0 release).
 | 
			
		||||
 | 
			
		||||
A side effect of this work was that the previous limit of 200 on the nesting
 | 
			
		||||
depth of parentheses was removed. However, there is a downside: pcre_compile()
 | 
			
		||||
runs more slowly than before (30% or more, depending on the pattern) because it
 | 
			
		||||
is doing a full analysis of the pattern. My hope was that this would not be a
 | 
			
		||||
big issue, and in the event, nobody has commented on it.
 | 
			
		||||
 | 
			
		||||
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
 | 
			
		||||
(default 250, settable at build time) so as to put a limit on the amount of 
 | 
			
		||||
system stack used by pcre_compile(). This is a safety feature for environments 
 | 
			
		||||
with small stacks where the patterns are provided by users.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Traditional matching function
 | 
			
		||||
-----------------------------
 | 
			
		||||
 | 
			
		||||
The "traditional", and original, matching function is called pcre_exec(), and 
 | 
			
		||||
it implements an NFA algorithm, similar to the original Henry Spencer algorithm 
 | 
			
		||||
and the way that Perl works. This is not surprising, since it is intended to be
 | 
			
		||||
as compatible with Perl as possible. This is the function most users of PCRE
 | 
			
		||||
will use most of the time. From release 8.20, if PCRE is compiled with 
 | 
			
		||||
just-in-time (JIT) support, and studying a compiled pattern with JIT is 
 | 
			
		||||
successful, the JIT code is run instead of the normal pcre_exec() code, but the 
 | 
			
		||||
result is the same.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Supplementary matching function
 | 
			
		||||
-------------------------------
 | 
			
		||||
 | 
			
		||||
From PCRE 6.0, there is also a supplementary matching function called 
 | 
			
		||||
pcre_dfa_exec(). This implements a DFA matching algorithm that searches 
 | 
			
		||||
simultaneously for all possible matches that start at one point in the subject 
 | 
			
		||||
string. (Going back to my roots: see Historical Note 1 above.) This function 
 | 
			
		||||
intreprets the same compiled pattern data as pcre_exec(); however, not all the 
 | 
			
		||||
facilities are available, and those that are do not always work in quite the 
 | 
			
		||||
same way. See the user documentation for details.
 | 
			
		||||
 | 
			
		||||
The algorithm that is used for pcre_dfa_exec() is not a traditional FSM, 
 | 
			
		||||
because it may have a number of states active at one time. More work would be
 | 
			
		||||
needed at compile time to produce a traditional FSM where only one state is
 | 
			
		||||
ever active at once. I believe some other regex matchers work this way. JIT
 | 
			
		||||
support is not available for this kind of matching.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Changeable options
 | 
			
		||||
------------------
 | 
			
		||||
 | 
			
		||||
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and some 
 | 
			
		||||
others) may change in the middle of patterns. From PCRE 8.13, their processing
 | 
			
		||||
is handled entirely at compile time by generating different opcodes for the
 | 
			
		||||
different settings. The runtime functions do not need to keep track of an
 | 
			
		||||
options state any more.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Format of compiled patterns
 | 
			
		||||
---------------------------
 | 
			
		||||
 | 
			
		||||
The compiled form of a pattern is a vector of unsigned units (bytes in 8-bit
 | 
			
		||||
mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing items of
 | 
			
		||||
variable length. The first unit in an item contains an opcode, and the length
 | 
			
		||||
of the item is either implicit in the opcode or contained in the data that
 | 
			
		||||
follows it.
 | 
			
		||||
 | 
			
		||||
In many cases listed below, LINK_SIZE data values are specified for offsets
 | 
			
		||||
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
 | 
			
		||||
default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or
 | 
			
		||||
4-byte values for these offsets, although this impairs the performance. (3-byte
 | 
			
		||||
LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
 | 
			
		||||
larger than 2 is necessary only when patterns whose compiled length is greater
 | 
			
		||||
than 64K are going to be processed. In this description, we assume the "normal"
 | 
			
		||||
compilation options. Data values that are counts (e.g. quantifiers) are two
 | 
			
		||||
bytes long in 8-bit mode (most significant byte first), or one unit in 16-bit
 | 
			
		||||
and 32-bit modes.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Opcodes with no following data
 | 
			
		||||
------------------------------
 | 
			
		||||
 | 
			
		||||
These items are all just one unit long
 | 
			
		||||
 | 
			
		||||
  OP_END                 end of pattern
 | 
			
		||||
  OP_ANY                 match any one character other than newline
 | 
			
		||||
  OP_ALLANY              match any one character, including newline
 | 
			
		||||
  OP_ANYBYTE             match any single unit, even in UTF-8/16 mode
 | 
			
		||||
  OP_SOD                 match start of data: \A
 | 
			
		||||
  OP_SOM,                start of match (subject + offset): \G
 | 
			
		||||
  OP_SET_SOM,            set start of match (\K) 
 | 
			
		||||
  OP_CIRC                ^ (start of data)
 | 
			
		||||
  OP_CIRCM               ^ multiline mode (start of data or after newline)
 | 
			
		||||
  OP_NOT_WORD_BOUNDARY   \W
 | 
			
		||||
  OP_WORD_BOUNDARY       \w
 | 
			
		||||
  OP_NOT_DIGIT           \D
 | 
			
		||||
  OP_DIGIT               \d
 | 
			
		||||
  OP_NOT_HSPACE          \H
 | 
			
		||||
  OP_HSPACE              \h  
 | 
			
		||||
  OP_NOT_WHITESPACE      \S
 | 
			
		||||
  OP_WHITESPACE          \s
 | 
			
		||||
  OP_NOT_VSPACE          \V
 | 
			
		||||
  OP_VSPACE              \v  
 | 
			
		||||
  OP_NOT_WORDCHAR        \W
 | 
			
		||||
  OP_WORDCHAR            \w
 | 
			
		||||
  OP_EODN                match end of data or newline at end: \Z
 | 
			
		||||
  OP_EOD                 match end of data: \z
 | 
			
		||||
  OP_DOLL                $ (end of data, or before final newline)
 | 
			
		||||
  OP_DOLLM               $ multiline mode (end of data or before newline)
 | 
			
		||||
  OP_EXTUNI              match an extended Unicode grapheme cluster 
 | 
			
		||||
  OP_ANYNL               match any Unicode newline sequence 
 | 
			
		||||
  
 | 
			
		||||
  OP_ASSERT_ACCEPT       )
 | 
			
		||||
  OP_ACCEPT              ) These are Perl 5.10's "backtracking control   
 | 
			
		||||
  OP_COMMIT              ) verbs". If OP_ACCEPT is inside capturing
 | 
			
		||||
  OP_FAIL                ) parentheses, it may be preceded by one or more
 | 
			
		||||
  OP_PRUNE               ) OP_CLOSE, each followed by a count that
 | 
			
		||||
  OP_SKIP                ) indicates which parentheses must be closed.
 | 
			
		||||
  OP_THEN                )
 | 
			
		||||
  
 | 
			
		||||
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion. 
 | 
			
		||||
This ends the assertion, not the entire pattern match.  
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
Backtracking control verbs with optional data
 | 
			
		||||
---------------------------------------------
 | 
			
		||||
 | 
			
		||||
(*THEN) without an argument generates the opcode OP_THEN and no following data.
 | 
			
		||||
OP_MARK is followed by the mark name, preceded by a one-unit length, and
 | 
			
		||||
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
 | 
			
		||||
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
 | 
			
		||||
following in the same format as OP_MARK.
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
Matching literal characters
 | 
			
		||||
---------------------------
 | 
			
		||||
 | 
			
		||||
The OP_CHAR opcode is followed by a single character that is to be matched 
 | 
			
		||||
casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
 | 
			
		||||
the character may be more than one unit long. In UTF-32 mode, characters
 | 
			
		||||
are always exactly one unit long.
 | 
			
		||||
 | 
			
		||||
If there is only one character in a character class, OP_CHAR or OP_CHARI is
 | 
			
		||||
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
 | 
			
		||||
for something like [^a]).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Repeating single characters
 | 
			
		||||
---------------------------
 | 
			
		||||
 | 
			
		||||
The common repeats (*, +, ?), when applied to a single character, use the
 | 
			
		||||
following opcodes, which come in caseful and caseless versions:
 | 
			
		||||
 | 
			
		||||
  Caseful         Caseless
 | 
			
		||||
  OP_STAR         OP_STARI      
 | 
			
		||||
  OP_MINSTAR      OP_MINSTARI   
 | 
			
		||||
  OP_POSSTAR      OP_POSSTARI   
 | 
			
		||||
  OP_PLUS         OP_PLUSI      
 | 
			
		||||
  OP_MINPLUS      OP_MINPLUSI   
 | 
			
		||||
  OP_POSPLUS      OP_POSPLUSI   
 | 
			
		||||
  OP_QUERY        OP_QUERYI     
 | 
			
		||||
  OP_MINQUERY     OP_MINQUERYI  
 | 
			
		||||
  OP_POSQUERY     OP_POSQUERYI  
 | 
			
		||||
 | 
			
		||||
Each opcode is followed by the character that is to be repeated. In ASCII mode,
 | 
			
		||||
these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
 | 
			
		||||
UTF-32 mode these are one-unit items. Those with "MIN" in their names are the
 | 
			
		||||
minimizing versions. Those with "POS" in their names are possessive versions.
 | 
			
		||||
Other repeats make use of these opcodes:
 | 
			
		||||
 | 
			
		||||
  Caseful         Caseless
 | 
			
		||||
  OP_UPTO         OP_UPTOI    
 | 
			
		||||
  OP_MINUPTO      OP_MINUPTOI 
 | 
			
		||||
  OP_POSUPTO      OP_POSUPTOI 
 | 
			
		||||
  OP_EXACT        OP_EXACTI   
 | 
			
		||||
 | 
			
		||||
Each of these is followed by a count and then the repeated character. OP_UPTO
 | 
			
		||||
matches from 0 to the given number. A repeat with a non-zero minimum and a
 | 
			
		||||
fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or
 | 
			
		||||
OPT_POSUPTO).
 | 
			
		||||
 | 
			
		||||
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
 | 
			
		||||
etc.) are used for repeated, negated, single-character classes such as [^a]*.
 | 
			
		||||
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
 | 
			
		||||
positive single-character classes.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Repeating character types
 | 
			
		||||
-------------------------
 | 
			
		||||
 | 
			
		||||
Repeats of things like \d are done exactly as for single characters, except
 | 
			
		||||
that instead of a character, the opcode for the type is stored in the data
 | 
			
		||||
unit. The opcodes are:
 | 
			
		||||
 | 
			
		||||
  OP_TYPESTAR
 | 
			
		||||
  OP_TYPEMINSTAR
 | 
			
		||||
  OP_TYPEPOSSTAR 
 | 
			
		||||
  OP_TYPEPLUS
 | 
			
		||||
  OP_TYPEMINPLUS
 | 
			
		||||
  OP_TYPEPOSPLUS 
 | 
			
		||||
  OP_TYPEQUERY
 | 
			
		||||
  OP_TYPEMINQUERY
 | 
			
		||||
  OP_TYPEPOSQUERY 
 | 
			
		||||
  OP_TYPEUPTO
 | 
			
		||||
  OP_TYPEMINUPTO
 | 
			
		||||
  OP_TYPEPOSUPTO 
 | 
			
		||||
  OP_TYPEEXACT
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Match by Unicode property
 | 
			
		||||
-------------------------
 | 
			
		||||
 | 
			
		||||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a 
 | 
			
		||||
character by testing its Unicode property (the \p and \P escape sequences).
 | 
			
		||||
Each is followed by two units that encode the desired property as a type and a
 | 
			
		||||
value. The types are a set of #defines of the form PT_xxx, and the values are
 | 
			
		||||
enumerations of the form ucp_xx, defined in the ucp.h source file. The value is
 | 
			
		||||
relevant only for PT_GC (General Category), PT_PC (Particular Category), and
 | 
			
		||||
PT_SC (Script).
 | 
			
		||||
 | 
			
		||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
 | 
			
		||||
three units: OP_PROP or OP_NOTPROP, and then the desired property type and
 | 
			
		||||
value.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Character classes
 | 
			
		||||
-----------------
 | 
			
		||||
 | 
			
		||||
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
 | 
			
		||||
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
 | 
			
		||||
something like [^a]). 
 | 
			
		||||
 | 
			
		||||
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
 | 
			
		||||
negated, single-character classes. The normal single-character opcodes
 | 
			
		||||
(OP_STAR, etc.) are used for repeated positive single-character classes.
 | 
			
		||||
 | 
			
		||||
When there is more than one character in a class, and all the code points are
 | 
			
		||||
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
 | 
			
		||||
negative one. In either case, the opcode is followed by a 32-byte (16-short, 
 | 
			
		||||
8-word) bit map containing a 1 bit for every character that is acceptable. The
 | 
			
		||||
bits are counted from the least significant end of each unit. In caseless mode,
 | 
			
		||||
bits for both cases are set.
 | 
			
		||||
 | 
			
		||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32
 | 
			
		||||
mode, subject characters with values greater than 255 can be handled correctly.
 | 
			
		||||
For OP_CLASS they do not match, whereas for OP_NCLASS they do.
 | 
			
		||||
 | 
			
		||||
For classes containing characters with values greater than 255 or that contain 
 | 
			
		||||
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any code points
 | 
			
		||||
are less than 256, followed by a list of pairs (for a range) and single
 | 
			
		||||
characters. In caseless mode, both cases are explicitly listed.
 | 
			
		||||
 | 
			
		||||
OP_XCLASS is followed by a unit containing flag bits: XCL_NOT indicates that 
 | 
			
		||||
this is a negative class, and XCL_MAP indicates that a bit map is present.
 | 
			
		||||
There follows the bit map, if XCL_MAP is set, and then a sequence of items
 | 
			
		||||
coded as follows:
 | 
			
		||||
 | 
			
		||||
  XCL_END      marks the end of the list
 | 
			
		||||
  XCL_SINGLE   one character follows
 | 
			
		||||
  XCL_RANGE    two characters follow
 | 
			
		||||
  XCL_PROP     a Unicode property (type, value) follows   
 | 
			
		||||
  XCL_NOTPROP  a Unicode property (type, value) follows   
 | 
			
		||||
 | 
			
		||||
If a range starts with a code point less than 256 and ends with one greater 
 | 
			
		||||
than 256, an XCL_RANGE item is used, without setting any bits in the bit map. 
 | 
			
		||||
This means that if no other items in the class set bits in the map, a map is 
 | 
			
		||||
not needed.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Back references
 | 
			
		||||
---------------
 | 
			
		||||
 | 
			
		||||
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
 | 
			
		||||
reference number if the reference is to a unique capturing group (either by
 | 
			
		||||
number or by name). When named groups are used, there may be more than one
 | 
			
		||||
group with the same name. In this case, a reference by name generates OP_DNREF
 | 
			
		||||
or OP_DNREFI. These are followed by two counts: the index (not the byte offset) 
 | 
			
		||||
in the group name table of the first entry for the requred name, followed by
 | 
			
		||||
the number of groups with the same name.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Repeating character classes and back references
 | 
			
		||||
-----------------------------------------------
 | 
			
		||||
 | 
			
		||||
Single-character classes are handled specially (see above). This section
 | 
			
		||||
applies to other classes and also to back references. In both cases, the repeat
 | 
			
		||||
information follows the base item. The matching code looks at the following
 | 
			
		||||
opcode to see if it is one of
 | 
			
		||||
 | 
			
		||||
  OP_CRSTAR
 | 
			
		||||
  OP_CRMINSTAR
 | 
			
		||||
  OP_CRPOSSTAR 
 | 
			
		||||
  OP_CRPLUS
 | 
			
		||||
  OP_CRMINPLUS
 | 
			
		||||
  OP_CRPOSPLUS 
 | 
			
		||||
  OP_CRQUERY
 | 
			
		||||
  OP_CRMINQUERY
 | 
			
		||||
  OP_CRPOSQUERY 
 | 
			
		||||
  OP_CRRANGE
 | 
			
		||||
  OP_CRMINRANGE
 | 
			
		||||
  OP_CRPOSRANGE 
 | 
			
		||||
 | 
			
		||||
All but the last three are single-unit items, with no data. The others are
 | 
			
		||||
followed by the minimum and maximum repeat counts.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Brackets and alternation
 | 
			
		||||
------------------------
 | 
			
		||||
 | 
			
		||||
A pair of non-capturing round brackets is wrapped round each expression at
 | 
			
		||||
compile time, so alternation always happens in the context of brackets.
 | 
			
		||||
 | 
			
		||||
[Note for North Americans: "bracket" to some English speakers, including
 | 
			
		||||
myself, can be round, square, curly, or pointy. Hence this usage rather than 
 | 
			
		||||
"parentheses".]
 | 
			
		||||
 | 
			
		||||
Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99
 | 
			
		||||
capturing brackets and it used a different opcode for each one. From release
 | 
			
		||||
3.5, the limit was removed by putting the bracket number into the data for
 | 
			
		||||
higher-numbered brackets. From release 7.0 all capturing brackets are handled
 | 
			
		||||
this way, using the single opcode OP_CBRA.
 | 
			
		||||
 | 
			
		||||
A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
 | 
			
		||||
next alternative OP_ALT or, if there aren't any branches, to the matching
 | 
			
		||||
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
 | 
			
		||||
the next one, or to the OP_KET opcode. For capturing brackets, the bracket 
 | 
			
		||||
number is a count that immediately follows the offset.
 | 
			
		||||
 | 
			
		||||
OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN
 | 
			
		||||
and OP_KETRMAX are used for indefinite repetitions, minimally or maximally
 | 
			
		||||
respectively (see below for possessive repetitions). All three are followed by
 | 
			
		||||
LINK_SIZE bytes giving (as a positive number) the offset back to the matching
 | 
			
		||||
bracket opcode.
 | 
			
		||||
 | 
			
		||||
If a subpattern is quantified such that it is permitted to match zero times, it
 | 
			
		||||
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
 | 
			
		||||
single-unit opcodes that tell the matcher that skipping the following
 | 
			
		||||
subpattern entirely is a valid branch. In the case of the first two, not 
 | 
			
		||||
skipping the pattern is also valid (greedy and non-greedy). The third is used 
 | 
			
		||||
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
 | 
			
		||||
because it may be called as a subroutine from elsewhere in the regex.
 | 
			
		||||
 | 
			
		||||
A subpattern with an indefinite maximum repetition is replicated in the
 | 
			
		||||
compiled data its minimum number of times (or once with OP_BRAZERO if the
 | 
			
		||||
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
 | 
			
		||||
as appropriate.
 | 
			
		||||
 | 
			
		||||
A subpattern with a bounded maximum repetition is replicated in a nested
 | 
			
		||||
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
 | 
			
		||||
before each replication after the minimum, so that, for example, (abc){2,5} is
 | 
			
		||||
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group 
 | 
			
		||||
has the same number.
 | 
			
		||||
 | 
			
		||||
When a repeated subpattern has an unbounded upper limit, it is checked to see 
 | 
			
		||||
whether it could match an empty string. If this is the case, the opcode in the 
 | 
			
		||||
final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
 | 
			
		||||
that it needs to check for matching an empty string when it hits OP_KETRMIN or
 | 
			
		||||
OP_KETRMAX, and if so, to break the loop.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Possessive brackets
 | 
			
		||||
-------------------
 | 
			
		||||
 | 
			
		||||
When a repeated group (capturing or non-capturing) is marked as possessive by
 | 
			
		||||
the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
 | 
			
		||||
have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCPBRPOS instead 
 | 
			
		||||
of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum 
 | 
			
		||||
repetition is zero, the group is preceded by OP_BRAPOSZERO.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Once-only (atomic) groups
 | 
			
		||||
-------------------------
 | 
			
		||||
 | 
			
		||||
These are just like other subpatterns, but they start with the opcode
 | 
			
		||||
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets 
 | 
			
		||||
within the atomic group; the latter when there are. The distinction is needed 
 | 
			
		||||
for when there is a backtrack to before the group - any captures within the 
 | 
			
		||||
group must be reset, so it is necessary to retain backtracking points inside
 | 
			
		||||
the group even after it is complete in order to do this. When there are no 
 | 
			
		||||
captures in an atomic group, all the backtracking can be discarded when it is 
 | 
			
		||||
complete. This is more efficient, and also uses less stack.
 | 
			
		||||
 | 
			
		||||
The check for matching an empty string in an unbounded repeat is handled
 | 
			
		||||
entirely at runtime, so there are just these two opcodes for atomic groups.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Assertions
 | 
			
		||||
----------
 | 
			
		||||
 | 
			
		||||
Forward assertions are also just like other subpatterns, but starting with one
 | 
			
		||||
of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
 | 
			
		||||
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
 | 
			
		||||
is OP_REVERSE, followed by a count of the number of characters to move back the
 | 
			
		||||
pointer in the subject string. In ASCII mode, the count is a number of units,
 | 
			
		||||
but in UTF-8/16 mode each character may occupy more than one unit; in UTF-32
 | 
			
		||||
mode each character occupies exactly one unit. A separate count is present in
 | 
			
		||||
each alternative of a lookbehind assertion, allowing them to have different
 | 
			
		||||
fixed lengths.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Conditional subpatterns
 | 
			
		||||
-----------------------
 | 
			
		||||
 | 
			
		||||
These are like other subpatterns, but they start with the opcode OP_COND, or
 | 
			
		||||
OP_SCOND for one that might match an empty string in an unbounded repeat. If
 | 
			
		||||
the condition is a back reference, this is stored at the start of the
 | 
			
		||||
subpattern using the opcode OP_CREF followed by a count containing the
 | 
			
		||||
reference number, provided that the reference is to a unique capturing group.
 | 
			
		||||
If the reference was by name and there is more than one group with that name, 
 | 
			
		||||
OP_DNCREF is used instead. It is followed by two counts: the index in the group 
 | 
			
		||||
names table, and the number of groups with the same name.
 | 
			
		||||
 | 
			
		||||
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
 | 
			
		||||
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
 | 
			
		||||
subpattern using the opcode OP_RREF (with a value of zero for "the whole
 | 
			
		||||
pattern") or OP_DNRREF (with data as for OP_DNCREF). For a DEFINE condition,
 | 
			
		||||
just the single unit OP_DEF is used (it has no associated data). Otherwise, a
 | 
			
		||||
conditional subpattern always starts with one of the assertions.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Recursion
 | 
			
		||||
---------
 | 
			
		||||
 | 
			
		||||
Recursion either matches the current regex, or some subexpression. The opcode
 | 
			
		||||
OP_RECURSE is followed by aLINK_SIZE value that is the offset to the starting
 | 
			
		||||
bracket from the start of the whole pattern. From release 6.5, OP_RECURSE is
 | 
			
		||||
automatically wrapped inside OP_ONCE brackets, because otherwise some patterns
 | 
			
		||||
broke it. OP_RECURSE is also used for "subroutine" calls, even though they are
 | 
			
		||||
not strictly a recursion.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Callout
 | 
			
		||||
-------
 | 
			
		||||
 | 
			
		||||
OP_CALLOUT is followed by one unit of data that holds a callout number in the
 | 
			
		||||
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both 
 | 
			
		||||
cases there follows a count giving the offset in the pattern string to the
 | 
			
		||||
start of the following item, and another count giving the length of this item.
 | 
			
		||||
These values make is possible for pcretest to output useful tracing information 
 | 
			
		||||
using automatic callouts.
 | 
			
		||||
 | 
			
		||||
Philip Hazel
 | 
			
		||||
November 2013
 | 
			
		||||
							
								
								
									
										370
									
								
								tools/pcre/INSTALL
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										370
									
								
								tools/pcre/INSTALL
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,370 @@
 | 
			
		||||
Installation Instructions
 | 
			
		||||
*************************
 | 
			
		||||
 | 
			
		||||
Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation,
 | 
			
		||||
Inc.
 | 
			
		||||
 | 
			
		||||
   Copying and distribution of this file, with or without modification,
 | 
			
		||||
are permitted in any medium without royalty provided the copyright
 | 
			
		||||
notice and this notice are preserved.  This file is offered as-is,
 | 
			
		||||
without warranty of any kind.
 | 
			
		||||
 | 
			
		||||
Basic Installation
 | 
			
		||||
==================
 | 
			
		||||
 | 
			
		||||
   Briefly, the shell command `./configure && make && make install'
 | 
			
		||||
should configure, build, and install this package.  The following
 | 
			
		||||
more-detailed instructions are generic; see the `README' file for
 | 
			
		||||
instructions specific to this package.  Some packages provide this
 | 
			
		||||
`INSTALL' file but do not implement all of the features documented
 | 
			
		||||
below.  The lack of an optional feature in a given package is not
 | 
			
		||||
necessarily a bug.  More recommendations for GNU packages can be found
 | 
			
		||||
in *note Makefile Conventions: (standards)Makefile Conventions.
 | 
			
		||||
 | 
			
		||||
   The `configure' shell script attempts to guess correct values for
 | 
			
		||||
various system-dependent variables used during compilation.  It uses
 | 
			
		||||
those values to create a `Makefile' in each directory of the package.
 | 
			
		||||
It may also create one or more `.h' files containing system-dependent
 | 
			
		||||
definitions.  Finally, it creates a shell script `config.status' that
 | 
			
		||||
you can run in the future to recreate the current configuration, and a
 | 
			
		||||
file `config.log' containing compiler output (useful mainly for
 | 
			
		||||
debugging `configure').
 | 
			
		||||
 | 
			
		||||
   It can also use an optional file (typically called `config.cache'
 | 
			
		||||
and enabled with `--cache-file=config.cache' or simply `-C') that saves
 | 
			
		||||
the results of its tests to speed up reconfiguring.  Caching is
 | 
			
		||||
disabled by default to prevent problems with accidental use of stale
 | 
			
		||||
cache files.
 | 
			
		||||
 | 
			
		||||
   If you need to do unusual things to compile the package, please try
 | 
			
		||||
to figure out how `configure' could check whether to do them, and mail
 | 
			
		||||
diffs or instructions to the address given in the `README' so they can
 | 
			
		||||
be considered for the next release.  If you are using the cache, and at
 | 
			
		||||
some point `config.cache' contains results you don't want to keep, you
 | 
			
		||||
may remove or edit it.
 | 
			
		||||
 | 
			
		||||
   The file `configure.ac' (or `configure.in') is used to create
 | 
			
		||||
`configure' by a program called `autoconf'.  You need `configure.ac' if
 | 
			
		||||
you want to change it or regenerate `configure' using a newer version
 | 
			
		||||
of `autoconf'.
 | 
			
		||||
 | 
			
		||||
   The simplest way to compile this package is:
 | 
			
		||||
 | 
			
		||||
  1. `cd' to the directory containing the package's source code and type
 | 
			
		||||
     `./configure' to configure the package for your system.
 | 
			
		||||
 | 
			
		||||
     Running `configure' might take a while.  While running, it prints
 | 
			
		||||
     some messages telling which features it is checking for.
 | 
			
		||||
 | 
			
		||||
  2. Type `make' to compile the package.
 | 
			
		||||
 | 
			
		||||
  3. Optionally, type `make check' to run any self-tests that come with
 | 
			
		||||
     the package, generally using the just-built uninstalled binaries.
 | 
			
		||||
 | 
			
		||||
  4. Type `make install' to install the programs and any data files and
 | 
			
		||||
     documentation.  When installing into a prefix owned by root, it is
 | 
			
		||||
     recommended that the package be configured and built as a regular
 | 
			
		||||
     user, and only the `make install' phase executed with root
 | 
			
		||||
     privileges.
 | 
			
		||||
 | 
			
		||||
  5. Optionally, type `make installcheck' to repeat any self-tests, but
 | 
			
		||||
     this time using the binaries in their final installed location.
 | 
			
		||||
     This target does not install anything.  Running this target as a
 | 
			
		||||
     regular user, particularly if the prior `make install' required
 | 
			
		||||
     root privileges, verifies that the installation completed
 | 
			
		||||
     correctly.
 | 
			
		||||
 | 
			
		||||
  6. You can remove the program binaries and object files from the
 | 
			
		||||
     source code directory by typing `make clean'.  To also remove the
 | 
			
		||||
     files that `configure' created (so you can compile the package for
 | 
			
		||||
     a different kind of computer), type `make distclean'.  There is
 | 
			
		||||
     also a `make maintainer-clean' target, but that is intended mainly
 | 
			
		||||
     for the package's developers.  If you use it, you may have to get
 | 
			
		||||
     all sorts of other programs in order to regenerate files that came
 | 
			
		||||
     with the distribution.
 | 
			
		||||
 | 
			
		||||
  7. Often, you can also type `make uninstall' to remove the installed
 | 
			
		||||
     files again.  In practice, not all packages have tested that
 | 
			
		||||
     uninstallation works correctly, even though it is required by the
 | 
			
		||||
     GNU Coding Standards.
 | 
			
		||||
 | 
			
		||||
  8. Some packages, particularly those that use Automake, provide `make
 | 
			
		||||
     distcheck', which can by used by developers to test that all other
 | 
			
		||||
     targets like `make install' and `make uninstall' work correctly.
 | 
			
		||||
     This target is generally not run by end users.
 | 
			
		||||
 | 
			
		||||
Compilers and Options
 | 
			
		||||
=====================
 | 
			
		||||
 | 
			
		||||
   Some systems require unusual options for compilation or linking that
 | 
			
		||||
the `configure' script does not know about.  Run `./configure --help'
 | 
			
		||||
for details on some of the pertinent environment variables.
 | 
			
		||||
 | 
			
		||||
   You can give `configure' initial values for configuration parameters
 | 
			
		||||
by setting variables in the command line or in the environment.  Here
 | 
			
		||||
is an example:
 | 
			
		||||
 | 
			
		||||
     ./configure CC=c99 CFLAGS=-g LIBS=-lposix
 | 
			
		||||
 | 
			
		||||
   *Note Defining Variables::, for more details.
 | 
			
		||||
 | 
			
		||||
Compiling For Multiple Architectures
 | 
			
		||||
====================================
 | 
			
		||||
 | 
			
		||||
   You can compile the package for more than one kind of computer at the
 | 
			
		||||
same time, by placing the object files for each architecture in their
 | 
			
		||||
own directory.  To do this, you can use GNU `make'.  `cd' to the
 | 
			
		||||
directory where you want the object files and executables to go and run
 | 
			
		||||
the `configure' script.  `configure' automatically checks for the
 | 
			
		||||
source code in the directory that `configure' is in and in `..'.  This
 | 
			
		||||
is known as a "VPATH" build.
 | 
			
		||||
 | 
			
		||||
   With a non-GNU `make', it is safer to compile the package for one
 | 
			
		||||
architecture at a time in the source code directory.  After you have
 | 
			
		||||
installed the package for one architecture, use `make distclean' before
 | 
			
		||||
reconfiguring for another architecture.
 | 
			
		||||
 | 
			
		||||
   On MacOS X 10.5 and later systems, you can create libraries and
 | 
			
		||||
executables that work on multiple system types--known as "fat" or
 | 
			
		||||
"universal" binaries--by specifying multiple `-arch' options to the
 | 
			
		||||
compiler but only a single `-arch' option to the preprocessor.  Like
 | 
			
		||||
this:
 | 
			
		||||
 | 
			
		||||
     ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
 | 
			
		||||
                 CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
 | 
			
		||||
                 CPP="gcc -E" CXXCPP="g++ -E"
 | 
			
		||||
 | 
			
		||||
   This is not guaranteed to produce working output in all cases, you
 | 
			
		||||
may have to build one architecture at a time and combine the results
 | 
			
		||||
using the `lipo' tool if you have problems.
 | 
			
		||||
 | 
			
		||||
Installation Names
 | 
			
		||||
==================
 | 
			
		||||
 | 
			
		||||
   By default, `make install' installs the package's commands under
 | 
			
		||||
`/usr/local/bin', include files under `/usr/local/include', etc.  You
 | 
			
		||||
can specify an installation prefix other than `/usr/local' by giving
 | 
			
		||||
`configure' the option `--prefix=PREFIX', where PREFIX must be an
 | 
			
		||||
absolute file name.
 | 
			
		||||
 | 
			
		||||
   You can specify separate installation prefixes for
 | 
			
		||||
architecture-specific files and architecture-independent files.  If you
 | 
			
		||||
pass the option `--exec-prefix=PREFIX' to `configure', the package uses
 | 
			
		||||
PREFIX as the prefix for installing programs and libraries.
 | 
			
		||||
Documentation and other data files still use the regular prefix.
 | 
			
		||||
 | 
			
		||||
   In addition, if you use an unusual directory layout you can give
 | 
			
		||||
options like `--bindir=DIR' to specify different values for particular
 | 
			
		||||
kinds of files.  Run `configure --help' for a list of the directories
 | 
			
		||||
you can set and what kinds of files go in them.  In general, the
 | 
			
		||||
default for these options is expressed in terms of `${prefix}', so that
 | 
			
		||||
specifying just `--prefix' will affect all of the other directory
 | 
			
		||||
specifications that were not explicitly provided.
 | 
			
		||||
 | 
			
		||||
   The most portable way to affect installation locations is to pass the
 | 
			
		||||
correct locations to `configure'; however, many packages provide one or
 | 
			
		||||
both of the following shortcuts of passing variable assignments to the
 | 
			
		||||
`make install' command line to change installation locations without
 | 
			
		||||
having to reconfigure or recompile.
 | 
			
		||||
 | 
			
		||||
   The first method involves providing an override variable for each
 | 
			
		||||
affected directory.  For example, `make install
 | 
			
		||||
prefix=/alternate/directory' will choose an alternate location for all
 | 
			
		||||
directory configuration variables that were expressed in terms of
 | 
			
		||||
`${prefix}'.  Any directories that were specified during `configure',
 | 
			
		||||
but not in terms of `${prefix}', must each be overridden at install
 | 
			
		||||
time for the entire installation to be relocated.  The approach of
 | 
			
		||||
makefile variable overrides for each directory variable is required by
 | 
			
		||||
the GNU Coding Standards, and ideally causes no recompilation.
 | 
			
		||||
However, some platforms have known limitations with the semantics of
 | 
			
		||||
shared libraries that end up requiring recompilation when using this
 | 
			
		||||
method, particularly noticeable in packages that use GNU Libtool.
 | 
			
		||||
 | 
			
		||||
   The second method involves providing the `DESTDIR' variable.  For
 | 
			
		||||
example, `make install DESTDIR=/alternate/directory' will prepend
 | 
			
		||||
`/alternate/directory' before all installation names.  The approach of
 | 
			
		||||
`DESTDIR' overrides is not required by the GNU Coding Standards, and
 | 
			
		||||
does not work on platforms that have drive letters.  On the other hand,
 | 
			
		||||
it does better at avoiding recompilation issues, and works well even
 | 
			
		||||
when some directory options were not specified in terms of `${prefix}'
 | 
			
		||||
at `configure' time.
 | 
			
		||||
 | 
			
		||||
Optional Features
 | 
			
		||||
=================
 | 
			
		||||
 | 
			
		||||
   If the package supports it, you can cause programs to be installed
 | 
			
		||||
with an extra prefix or suffix on their names by giving `configure' the
 | 
			
		||||
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
 | 
			
		||||
 | 
			
		||||
   Some packages pay attention to `--enable-FEATURE' options to
 | 
			
		||||
`configure', where FEATURE indicates an optional part of the package.
 | 
			
		||||
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
 | 
			
		||||
is something like `gnu-as' or `x' (for the X Window System).  The
 | 
			
		||||
`README' should mention any `--enable-' and `--with-' options that the
 | 
			
		||||
package recognizes.
 | 
			
		||||
 | 
			
		||||
   For packages that use the X Window System, `configure' can usually
 | 
			
		||||
find the X include and library files automatically, but if it doesn't,
 | 
			
		||||
you can use the `configure' options `--x-includes=DIR' and
 | 
			
		||||
`--x-libraries=DIR' to specify their locations.
 | 
			
		||||
 | 
			
		||||
   Some packages offer the ability to configure how verbose the
 | 
			
		||||
execution of `make' will be.  For these packages, running `./configure
 | 
			
		||||
--enable-silent-rules' sets the default to minimal output, which can be
 | 
			
		||||
overridden with `make V=1'; while running `./configure
 | 
			
		||||
--disable-silent-rules' sets the default to verbose, which can be
 | 
			
		||||
overridden with `make V=0'.
 | 
			
		||||
 | 
			
		||||
Particular systems
 | 
			
		||||
==================
 | 
			
		||||
 | 
			
		||||
   On HP-UX, the default C compiler is not ANSI C compatible.  If GNU
 | 
			
		||||
CC is not installed, it is recommended to use the following options in
 | 
			
		||||
order to use an ANSI C compiler:
 | 
			
		||||
 | 
			
		||||
     ./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
 | 
			
		||||
 | 
			
		||||
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
 | 
			
		||||
 | 
			
		||||
   HP-UX `make' updates targets which have the same time stamps as
 | 
			
		||||
their prerequisites, which makes it generally unusable when shipped
 | 
			
		||||
generated files such as `configure' are involved.  Use GNU `make'
 | 
			
		||||
instead.
 | 
			
		||||
 | 
			
		||||
   On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
 | 
			
		||||
parse its `<wchar.h>' header file.  The option `-nodtk' can be used as
 | 
			
		||||
a workaround.  If GNU CC is not installed, it is therefore recommended
 | 
			
		||||
to try
 | 
			
		||||
 | 
			
		||||
     ./configure CC="cc"
 | 
			
		||||
 | 
			
		||||
and if that doesn't work, try
 | 
			
		||||
 | 
			
		||||
     ./configure CC="cc -nodtk"
 | 
			
		||||
 | 
			
		||||
   On Solaris, don't put `/usr/ucb' early in your `PATH'.  This
 | 
			
		||||
directory contains several dysfunctional programs; working variants of
 | 
			
		||||
these programs are available in `/usr/bin'.  So, if you need `/usr/ucb'
 | 
			
		||||
in your `PATH', put it _after_ `/usr/bin'.
 | 
			
		||||
 | 
			
		||||
   On Haiku, software installed for all users goes in `/boot/common',
 | 
			
		||||
not `/usr/local'.  It is recommended to use the following options:
 | 
			
		||||
 | 
			
		||||
     ./configure --prefix=/boot/common
 | 
			
		||||
 | 
			
		||||
Specifying the System Type
 | 
			
		||||
==========================
 | 
			
		||||
 | 
			
		||||
   There may be some features `configure' cannot figure out
 | 
			
		||||
automatically, but needs to determine by the type of machine the package
 | 
			
		||||
will run on.  Usually, assuming the package is built to be run on the
 | 
			
		||||
_same_ architectures, `configure' can figure that out, but if it prints
 | 
			
		||||
a message saying it cannot guess the machine type, give it the
 | 
			
		||||
`--build=TYPE' option.  TYPE can either be a short name for the system
 | 
			
		||||
type, such as `sun4', or a canonical name which has the form:
 | 
			
		||||
 | 
			
		||||
     CPU-COMPANY-SYSTEM
 | 
			
		||||
 | 
			
		||||
where SYSTEM can have one of these forms:
 | 
			
		||||
 | 
			
		||||
     OS
 | 
			
		||||
     KERNEL-OS
 | 
			
		||||
 | 
			
		||||
   See the file `config.sub' for the possible values of each field.  If
 | 
			
		||||
`config.sub' isn't included in this package, then this package doesn't
 | 
			
		||||
need to know the machine type.
 | 
			
		||||
 | 
			
		||||
   If you are _building_ compiler tools for cross-compiling, you should
 | 
			
		||||
use the option `--target=TYPE' to select the type of system they will
 | 
			
		||||
produce code for.
 | 
			
		||||
 | 
			
		||||
   If you want to _use_ a cross compiler, that generates code for a
 | 
			
		||||
platform different from the build platform, you should specify the
 | 
			
		||||
"host" platform (i.e., that on which the generated programs will
 | 
			
		||||
eventually be run) with `--host=TYPE'.
 | 
			
		||||
 | 
			
		||||
Sharing Defaults
 | 
			
		||||
================
 | 
			
		||||
 | 
			
		||||
   If you want to set default values for `configure' scripts to share,
 | 
			
		||||
you can create a site shell script called `config.site' that gives
 | 
			
		||||
default values for variables like `CC', `cache_file', and `prefix'.
 | 
			
		||||
`configure' looks for `PREFIX/share/config.site' if it exists, then
 | 
			
		||||
`PREFIX/etc/config.site' if it exists.  Or, you can set the
 | 
			
		||||
`CONFIG_SITE' environment variable to the location of the site script.
 | 
			
		||||
A warning: not all `configure' scripts look for a site script.
 | 
			
		||||
 | 
			
		||||
Defining Variables
 | 
			
		||||
==================
 | 
			
		||||
 | 
			
		||||
   Variables not defined in a site shell script can be set in the
 | 
			
		||||
environment passed to `configure'.  However, some packages may run
 | 
			
		||||
configure again during the build, and the customized values of these
 | 
			
		||||
variables may be lost.  In order to avoid this problem, you should set
 | 
			
		||||
them in the `configure' command line, using `VAR=value'.  For example:
 | 
			
		||||
 | 
			
		||||
     ./configure CC=/usr/local2/bin/gcc
 | 
			
		||||
 | 
			
		||||
causes the specified `gcc' to be used as the C compiler (unless it is
 | 
			
		||||
overridden in the site shell script).
 | 
			
		||||
 | 
			
		||||
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
 | 
			
		||||
an Autoconf limitation.  Until the limitation is lifted, you can use
 | 
			
		||||
this workaround:
 | 
			
		||||
 | 
			
		||||
     CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
 | 
			
		||||
 | 
			
		||||
`configure' Invocation
 | 
			
		||||
======================
 | 
			
		||||
 | 
			
		||||
   `configure' recognizes the following options to control how it
 | 
			
		||||
operates.
 | 
			
		||||
 | 
			
		||||
`--help'
 | 
			
		||||
`-h'
 | 
			
		||||
     Print a summary of all of the options to `configure', and exit.
 | 
			
		||||
 | 
			
		||||
`--help=short'
 | 
			
		||||
`--help=recursive'
 | 
			
		||||
     Print a summary of the options unique to this package's
 | 
			
		||||
     `configure', and exit.  The `short' variant lists options used
 | 
			
		||||
     only in the top level, while the `recursive' variant lists options
 | 
			
		||||
     also present in any nested packages.
 | 
			
		||||
 | 
			
		||||
`--version'
 | 
			
		||||
`-V'
 | 
			
		||||
     Print the version of Autoconf used to generate the `configure'
 | 
			
		||||
     script, and exit.
 | 
			
		||||
 | 
			
		||||
`--cache-file=FILE'
 | 
			
		||||
     Enable the cache: use and save the results of the tests in FILE,
 | 
			
		||||
     traditionally `config.cache'.  FILE defaults to `/dev/null' to
 | 
			
		||||
     disable caching.
 | 
			
		||||
 | 
			
		||||
`--config-cache'
 | 
			
		||||
`-C'
 | 
			
		||||
     Alias for `--cache-file=config.cache'.
 | 
			
		||||
 | 
			
		||||
`--quiet'
 | 
			
		||||
`--silent'
 | 
			
		||||
`-q'
 | 
			
		||||
     Do not print messages saying which checks are being made.  To
 | 
			
		||||
     suppress all normal output, redirect it to `/dev/null' (any error
 | 
			
		||||
     messages will still be shown).
 | 
			
		||||
 | 
			
		||||
`--srcdir=DIR'
 | 
			
		||||
     Look for the package's source code in directory DIR.  Usually
 | 
			
		||||
     `configure' can determine that directory automatically.
 | 
			
		||||
 | 
			
		||||
`--prefix=DIR'
 | 
			
		||||
     Use DIR as the installation prefix.  *note Installation Names::
 | 
			
		||||
     for more details, including other options available for fine-tuning
 | 
			
		||||
     the installation locations.
 | 
			
		||||
 | 
			
		||||
`--no-create'
 | 
			
		||||
`-n'
 | 
			
		||||
     Run the configure checks, but stop before creating any output
 | 
			
		||||
     files.
 | 
			
		||||
 | 
			
		||||
`configure' also accepts some other, not widely useful, options.  Run
 | 
			
		||||
`configure --help' for more details.
 | 
			
		||||
							
								
								
									
										92
									
								
								tools/pcre/LICENCE
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								tools/pcre/LICENCE
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,92 @@
 | 
			
		||||
PCRE LICENCE
 | 
			
		||||
------------
 | 
			
		||||
 | 
			
		||||
PCRE is a library of functions to support regular expressions whose syntax
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
 | 
			
		||||
specified below. The documentation for PCRE, supplied in the "doc"
 | 
			
		||||
directory, is distributed under the same terms as the software itself.
 | 
			
		||||
 | 
			
		||||
The basic library functions are written in C and are freestanding. Also
 | 
			
		||||
included in the distribution is a set of C++ wrapper functions, and a
 | 
			
		||||
just-in-time compiler that can be used to optimize pattern matching. These
 | 
			
		||||
are both optional features that can be omitted when the library is built.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
THE BASIC LIBRARY FUNCTIONS
 | 
			
		||||
---------------------------
 | 
			
		||||
 | 
			
		||||
Written by:       Philip Hazel
 | 
			
		||||
Email local part: ph10
 | 
			
		||||
Email domain:     cam.ac.uk
 | 
			
		||||
 | 
			
		||||
University of Cambridge Computing Service,
 | 
			
		||||
Cambridge, England.
 | 
			
		||||
 | 
			
		||||
Copyright (c) 1997-2014 University of Cambridge
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
PCRE JUST-IN-TIME COMPILATION SUPPORT
 | 
			
		||||
-------------------------------------
 | 
			
		||||
 | 
			
		||||
Written by:       Zoltan Herczeg
 | 
			
		||||
Email local part: hzmester
 | 
			
		||||
Emain domain:     freemail.hu
 | 
			
		||||
 | 
			
		||||
Copyright(c) 2010-2014 Zoltan Herczeg
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
STACK-LESS JUST-IN-TIME COMPILER
 | 
			
		||||
--------------------------------
 | 
			
		||||
 | 
			
		||||
Written by:       Zoltan Herczeg
 | 
			
		||||
Email local part: hzmester
 | 
			
		||||
Emain domain:     freemail.hu
 | 
			
		||||
 | 
			
		||||
Copyright(c) 2009-2014 Zoltan Herczeg
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
THE C++ WRAPPER FUNCTIONS
 | 
			
		||||
-------------------------
 | 
			
		||||
 | 
			
		||||
Contributed by:   Google Inc.
 | 
			
		||||
 | 
			
		||||
Copyright (c) 2007-2012, Google Inc.
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
THE "BSD" LICENCE
 | 
			
		||||
-----------------
 | 
			
		||||
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
modification, are permitted provided that the following conditions are met:
 | 
			
		||||
 | 
			
		||||
    * Redistributions of source code must retain the above copyright notice,
 | 
			
		||||
      this list of conditions and the following disclaimer.
 | 
			
		||||
 | 
			
		||||
    * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
      notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
      documentation and/or other materials provided with the distribution.
 | 
			
		||||
 | 
			
		||||
    * Neither the name of the University of Cambridge nor the name of Google
 | 
			
		||||
      Inc. nor the names of their contributors may be used to endorse or
 | 
			
		||||
      promote products derived from this software without specific prior
 | 
			
		||||
      written permission.
 | 
			
		||||
 | 
			
		||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
			
		||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
			
		||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 | 
			
		||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 | 
			
		||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 | 
			
		||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 | 
			
		||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 | 
			
		||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 | 
			
		||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 | 
			
		||||
POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 | 
			
		||||
End
 | 
			
		||||
							
								
								
									
										894
									
								
								tools/pcre/Makefile.am
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										894
									
								
								tools/pcre/Makefile.am
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,894 @@
 | 
			
		||||
## Process this file with automake to produce Makefile.in.
 | 
			
		||||
 | 
			
		||||
ACLOCAL_AMFLAGS = -I m4
 | 
			
		||||
 | 
			
		||||
dist_doc_DATA = \
 | 
			
		||||
  doc/pcre.txt \
 | 
			
		||||
  doc/pcre-config.txt \
 | 
			
		||||
  doc/pcregrep.txt \
 | 
			
		||||
  doc/pcretest.txt \
 | 
			
		||||
  AUTHORS \
 | 
			
		||||
  COPYING \
 | 
			
		||||
  ChangeLog \
 | 
			
		||||
  LICENCE \
 | 
			
		||||
  NEWS \
 | 
			
		||||
  README
 | 
			
		||||
 | 
			
		||||
# Note that pcrecpp.html is not in this list; it is listed separately below.
 | 
			
		||||
 | 
			
		||||
dist_html_DATA = \
 | 
			
		||||
  doc/html/NON-AUTOTOOLS-BUILD.txt \
 | 
			
		||||
  doc/html/README.txt \
 | 
			
		||||
  doc/html/index.html \
 | 
			
		||||
  doc/html/pcre-config.html \
 | 
			
		||||
  doc/html/pcre.html \
 | 
			
		||||
  doc/html/pcre16.html \
 | 
			
		||||
  doc/html/pcre32.html \
 | 
			
		||||
  doc/html/pcre_assign_jit_stack.html \
 | 
			
		||||
  doc/html/pcre_compile.html \
 | 
			
		||||
  doc/html/pcre_compile2.html \
 | 
			
		||||
  doc/html/pcre_config.html \
 | 
			
		||||
  doc/html/pcre_copy_named_substring.html \
 | 
			
		||||
  doc/html/pcre_copy_substring.html \
 | 
			
		||||
  doc/html/pcre_dfa_exec.html \
 | 
			
		||||
  doc/html/pcre_exec.html \
 | 
			
		||||
  doc/html/pcre_free_study.html \
 | 
			
		||||
  doc/html/pcre_free_substring.html \
 | 
			
		||||
  doc/html/pcre_free_substring_list.html \
 | 
			
		||||
  doc/html/pcre_fullinfo.html \
 | 
			
		||||
  doc/html/pcre_get_named_substring.html \
 | 
			
		||||
  doc/html/pcre_get_stringnumber.html \
 | 
			
		||||
  doc/html/pcre_get_stringtable_entries.html \
 | 
			
		||||
  doc/html/pcre_get_substring.html \
 | 
			
		||||
  doc/html/pcre_get_substring_list.html \
 | 
			
		||||
  doc/html/pcre_jit_exec.html \
 | 
			
		||||
  doc/html/pcre_jit_stack_alloc.html \
 | 
			
		||||
  doc/html/pcre_jit_stack_free.html \
 | 
			
		||||
  doc/html/pcre_maketables.html \
 | 
			
		||||
  doc/html/pcre_pattern_to_host_byte_order.html \
 | 
			
		||||
  doc/html/pcre_refcount.html \
 | 
			
		||||
  doc/html/pcre_study.html \
 | 
			
		||||
  doc/html/pcre_utf16_to_host_byte_order.html \
 | 
			
		||||
  doc/html/pcre_utf32_to_host_byte_order.html \
 | 
			
		||||
  doc/html/pcre_version.html \
 | 
			
		||||
  doc/html/pcreapi.html \
 | 
			
		||||
  doc/html/pcrebuild.html \
 | 
			
		||||
  doc/html/pcrecallout.html \
 | 
			
		||||
  doc/html/pcrecompat.html \
 | 
			
		||||
  doc/html/pcredemo.html \
 | 
			
		||||
  doc/html/pcregrep.html \
 | 
			
		||||
  doc/html/pcrejit.html \
 | 
			
		||||
  doc/html/pcrelimits.html \
 | 
			
		||||
  doc/html/pcrematching.html \
 | 
			
		||||
  doc/html/pcrepartial.html \
 | 
			
		||||
  doc/html/pcrepattern.html \
 | 
			
		||||
  doc/html/pcreperform.html \
 | 
			
		||||
  doc/html/pcreposix.html \
 | 
			
		||||
  doc/html/pcreprecompile.html \
 | 
			
		||||
  doc/html/pcresample.html \
 | 
			
		||||
  doc/html/pcrestack.html \
 | 
			
		||||
  doc/html/pcresyntax.html \
 | 
			
		||||
  doc/html/pcretest.html \
 | 
			
		||||
  doc/html/pcreunicode.html
 | 
			
		||||
 | 
			
		||||
pcrecpp_html = doc/html/pcrecpp.html
 | 
			
		||||
dist_noinst_DATA = $(pcrecpp_html)
 | 
			
		||||
 | 
			
		||||
if WITH_PCRE_CPP
 | 
			
		||||
html_DATA = $(pcrecpp_html)
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
# The Libtool libraries to install.  We'll add to this later.
 | 
			
		||||
lib_LTLIBRARIES =
 | 
			
		||||
 | 
			
		||||
# Unit tests you want to run when people type 'make check'.
 | 
			
		||||
# TESTS is for binary unit tests, check_SCRIPTS for script-based tests
 | 
			
		||||
TESTS =
 | 
			
		||||
check_SCRIPTS =
 | 
			
		||||
dist_noinst_SCRIPTS =
 | 
			
		||||
 | 
			
		||||
# Some of the binaries we make are to be installed, and others are
 | 
			
		||||
# (non-user-visible) helper programs needed to build libpcre, libpcre16
 | 
			
		||||
# or libpcre32.
 | 
			
		||||
bin_PROGRAMS =
 | 
			
		||||
noinst_PROGRAMS =
 | 
			
		||||
 | 
			
		||||
# Additional files to delete on 'make clean' and 'make maintainer-clean'.
 | 
			
		||||
CLEANFILES =
 | 
			
		||||
MAINTAINERCLEANFILES =
 | 
			
		||||
 | 
			
		||||
# Additional files to bundle with the distribution, over and above what
 | 
			
		||||
# the Autotools include by default.
 | 
			
		||||
EXTRA_DIST =
 | 
			
		||||
 | 
			
		||||
# These files contain additional m4 macros that are used by autoconf.
 | 
			
		||||
EXTRA_DIST += \
 | 
			
		||||
  m4/ax_pthread.m4 m4/pcre_visibility.m4
 | 
			
		||||
 | 
			
		||||
# These files contain maintenance information
 | 
			
		||||
EXTRA_DIST += \
 | 
			
		||||
  doc/perltest.txt \
 | 
			
		||||
  NON-UNIX-USE \
 | 
			
		||||
  NON-AUTOTOOLS-BUILD \
 | 
			
		||||
  HACKING
 | 
			
		||||
 | 
			
		||||
# These files are used in the preparation of a release
 | 
			
		||||
EXTRA_DIST += \
 | 
			
		||||
  PrepareRelease \
 | 
			
		||||
  CheckMan \
 | 
			
		||||
  CleanTxt \
 | 
			
		||||
  Detrail \
 | 
			
		||||
  132html \
 | 
			
		||||
  doc/index.html.src
 | 
			
		||||
 | 
			
		||||
# These files are to do with building for Virtual Pascal
 | 
			
		||||
EXTRA_DIST += \
 | 
			
		||||
  makevp.bat \
 | 
			
		||||
  makevp_c.txt \
 | 
			
		||||
  makevp_l.txt \
 | 
			
		||||
  pcregexp.pas
 | 
			
		||||
 | 
			
		||||
# These files are usable versions of pcre.h and config.h that are distributed
 | 
			
		||||
# for the benefit of people who are building PCRE manually, without the
 | 
			
		||||
# Autotools support.
 | 
			
		||||
EXTRA_DIST += \
 | 
			
		||||
  pcre.h.generic \
 | 
			
		||||
  config.h.generic
 | 
			
		||||
 | 
			
		||||
# The only difference between pcre.h.in and pcre.h is the setting of the PCRE
 | 
			
		||||
# version number. Therefore, we can create the generic version just by copying.
 | 
			
		||||
pcre.h.generic: pcre.h.in configure.ac
 | 
			
		||||
	rm -f $@
 | 
			
		||||
	cp -p pcre.h $@
 | 
			
		||||
 | 
			
		||||
# It is more complicated for config.h.generic. We need the version that results
 | 
			
		||||
# from a default configuration so as to get all the default values for PCRE
 | 
			
		||||
# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by
 | 
			
		||||
# doing a configure in a temporary directory. However, some trickery is needed,
 | 
			
		||||
# because the source directory may already be configured. If you just try
 | 
			
		||||
# running configure in a new directory, it complains. For this reason, we move
 | 
			
		||||
# config.status out of the way while doing the default configuration. The
 | 
			
		||||
# resulting config.h is munged by perl to put #ifdefs round any #defines for
 | 
			
		||||
# macros with values, and to #undef all boolean macros such as HAVE_xxx and
 | 
			
		||||
# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. Make
 | 
			
		||||
# sure that PCRE_EXP_DEFN is unset (in case it has visibility settings).
 | 
			
		||||
config.h.generic: configure.ac
 | 
			
		||||
	rm -rf $@ _generic
 | 
			
		||||
	mkdir _generic
 | 
			
		||||
	cs=$(srcdir)/config.status; test ! -f $$cs || mv -f $$cs $$cs.aside
 | 
			
		||||
	cd _generic && $(abs_top_srcdir)/configure || :
 | 
			
		||||
	cs=$(srcdir)/config.status; test ! -f $$cs.aside || mv -f $$cs.aside $$cs
 | 
			
		||||
	test -f _generic/config.h
 | 
			
		||||
	perl -n \
 | 
			
		||||
	  -e 'BEGIN{$$blank=0;}' \
 | 
			
		||||
	  -e 'if(/PCRE_EXP_DEFN/){print"/* #undef PCRE_EXP_DEFN */\n";$$blank=0;next;}' \
 | 
			
		||||
	  -e 'if(/to make a symbol visible/){next;}' \
 | 
			
		||||
	  -e 'if(/__attribute__ \(\(visibility/){next;}' \
 | 
			
		||||
	  -e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \
 | 
			
		||||
	  -e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \
 | 
			
		||||
	  -e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \
 | 
			
		||||
	  -e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \
 | 
			
		||||
	  _generic/config.h >$@
 | 
			
		||||
	rm -rf _generic
 | 
			
		||||
 | 
			
		||||
MAINTAINERCLEANFILES += pcre.h.generic config.h.generic
 | 
			
		||||
 | 
			
		||||
# These are the header files we'll install. We do not distribute pcre.h because
 | 
			
		||||
# it is generated from pcre.h.in.
 | 
			
		||||
nodist_include_HEADERS = \
 | 
			
		||||
  pcre.h
 | 
			
		||||
include_HEADERS = \
 | 
			
		||||
  pcreposix.h
 | 
			
		||||
 | 
			
		||||
# These additional headers will be be installed if C++ support is enabled. We
 | 
			
		||||
# do not distribute pcrecpparg.h or pcre_stringpiece.h, as these are generated
 | 
			
		||||
# from corresponding .h.in files (which we do distribute).
 | 
			
		||||
if WITH_PCRE_CPP
 | 
			
		||||
nodist_include_HEADERS += \
 | 
			
		||||
  pcrecpparg.h \
 | 
			
		||||
  pcre_stringpiece.h
 | 
			
		||||
include_HEADERS += \
 | 
			
		||||
  pcrecpp.h \
 | 
			
		||||
  pcre_scanner.h
 | 
			
		||||
endif # WITH_PCRE_CPP
 | 
			
		||||
 | 
			
		||||
bin_SCRIPTS = pcre-config
 | 
			
		||||
 | 
			
		||||
## ---------------------------------------------------------------
 | 
			
		||||
## The dftables program is used to rebuild character tables before compiling
 | 
			
		||||
## PCRE, if --enable-rebuild-chartables is specified. It is not a user-visible
 | 
			
		||||
## program. The default (when --enable-rebuild-chartables is not specified) is
 | 
			
		||||
## to copy a distributed set of tables that are defined for ASCII code. In this
 | 
			
		||||
## case, dftables is not needed.
 | 
			
		||||
 | 
			
		||||
if WITH_REBUILD_CHARTABLES
 | 
			
		||||
 | 
			
		||||
noinst_PROGRAMS += dftables
 | 
			
		||||
dftables_SOURCES = dftables.c
 | 
			
		||||
 | 
			
		||||
pcre_chartables.c: dftables$(EXEEXT)
 | 
			
		||||
	./dftables$(EXEEXT) $@
 | 
			
		||||
else
 | 
			
		||||
 | 
			
		||||
pcre_chartables.c: $(srcdir)/pcre_chartables.c.dist
 | 
			
		||||
	rm -f $@
 | 
			
		||||
	$(LN_S) $(srcdir)/pcre_chartables.c.dist $@
 | 
			
		||||
 | 
			
		||||
endif # WITH_REBUILD_CHARTABLES
 | 
			
		||||
 | 
			
		||||
BUILT_SOURCES = pcre_chartables.c
 | 
			
		||||
 | 
			
		||||
## The main pcre library
 | 
			
		||||
 | 
			
		||||
# Build the 8 bit library if it is enabled.
 | 
			
		||||
if WITH_PCRE8
 | 
			
		||||
lib_LTLIBRARIES += libpcre.la
 | 
			
		||||
 | 
			
		||||
libpcre_la_SOURCES = \
 | 
			
		||||
  pcre_byte_order.c \
 | 
			
		||||
  pcre_compile.c \
 | 
			
		||||
  pcre_config.c \
 | 
			
		||||
  pcre_dfa_exec.c \
 | 
			
		||||
  pcre_exec.c \
 | 
			
		||||
  pcre_fullinfo.c \
 | 
			
		||||
  pcre_get.c \
 | 
			
		||||
  pcre_globals.c \
 | 
			
		||||
  pcre_internal.h \
 | 
			
		||||
  pcre_jit_compile.c \
 | 
			
		||||
  pcre_maketables.c \
 | 
			
		||||
  pcre_newline.c \
 | 
			
		||||
  pcre_ord2utf8.c \
 | 
			
		||||
  pcre_refcount.c \
 | 
			
		||||
  pcre_string_utils.c \
 | 
			
		||||
  pcre_study.c \
 | 
			
		||||
  pcre_tables.c \
 | 
			
		||||
  pcre_ucd.c \
 | 
			
		||||
  pcre_valid_utf8.c \
 | 
			
		||||
  pcre_version.c \
 | 
			
		||||
  pcre_xclass.c \
 | 
			
		||||
  ucp.h
 | 
			
		||||
 | 
			
		||||
libpcre_la_CFLAGS = \
 | 
			
		||||
  $(VISIBILITY_CFLAGS) \
 | 
			
		||||
  $(AM_CFLAGS)
 | 
			
		||||
 | 
			
		||||
libpcre_la_LIBADD =
 | 
			
		||||
 | 
			
		||||
## This file is generated as part of the building process, so don't distribute.
 | 
			
		||||
nodist_libpcre_la_SOURCES = \
 | 
			
		||||
  pcre_chartables.c
 | 
			
		||||
 | 
			
		||||
endif # WITH_PCRE8
 | 
			
		||||
 | 
			
		||||
# Build the 16 bit library if it is enabled.
 | 
			
		||||
if WITH_PCRE16
 | 
			
		||||
lib_LTLIBRARIES += libpcre16.la
 | 
			
		||||
libpcre16_la_SOURCES = \
 | 
			
		||||
  pcre16_byte_order.c \
 | 
			
		||||
  pcre16_chartables.c \
 | 
			
		||||
  pcre16_compile.c \
 | 
			
		||||
  pcre16_config.c \
 | 
			
		||||
  pcre16_dfa_exec.c \
 | 
			
		||||
  pcre16_exec.c \
 | 
			
		||||
  pcre16_fullinfo.c \
 | 
			
		||||
  pcre16_get.c \
 | 
			
		||||
  pcre16_globals.c \
 | 
			
		||||
  pcre16_jit_compile.c \
 | 
			
		||||
  pcre16_maketables.c \
 | 
			
		||||
  pcre16_newline.c \
 | 
			
		||||
  pcre16_ord2utf16.c \
 | 
			
		||||
  pcre16_refcount.c \
 | 
			
		||||
  pcre16_string_utils.c \
 | 
			
		||||
  pcre16_study.c \
 | 
			
		||||
  pcre16_tables.c \
 | 
			
		||||
  pcre16_ucd.c \
 | 
			
		||||
  pcre16_utf16_utils.c \
 | 
			
		||||
  pcre16_valid_utf16.c \
 | 
			
		||||
  pcre16_version.c \
 | 
			
		||||
  pcre16_xclass.c
 | 
			
		||||
 | 
			
		||||
libpcre16_la_CFLAGS = \
 | 
			
		||||
  $(VISIBILITY_CFLAGS) \
 | 
			
		||||
  $(AM_CFLAGS)
 | 
			
		||||
 | 
			
		||||
libpcre16_la_LIBADD =
 | 
			
		||||
 | 
			
		||||
## This file is generated as part of the building process, so don't distribute.
 | 
			
		||||
nodist_libpcre16_la_SOURCES = \
 | 
			
		||||
  pcre_chartables.c
 | 
			
		||||
 | 
			
		||||
endif # WITH_PCRE16
 | 
			
		||||
 | 
			
		||||
# Build the 32 bit library if it is enabled.
 | 
			
		||||
if WITH_PCRE32
 | 
			
		||||
lib_LTLIBRARIES += libpcre32.la
 | 
			
		||||
libpcre32_la_SOURCES = \
 | 
			
		||||
  pcre32_byte_order.c \
 | 
			
		||||
  pcre32_chartables.c \
 | 
			
		||||
  pcre32_compile.c \
 | 
			
		||||
  pcre32_config.c \
 | 
			
		||||
  pcre32_dfa_exec.c \
 | 
			
		||||
  pcre32_exec.c \
 | 
			
		||||
  pcre32_fullinfo.c \
 | 
			
		||||
  pcre32_get.c \
 | 
			
		||||
  pcre32_globals.c \
 | 
			
		||||
  pcre32_jit_compile.c \
 | 
			
		||||
  pcre32_maketables.c \
 | 
			
		||||
  pcre32_newline.c \
 | 
			
		||||
  pcre32_ord2utf32.c \
 | 
			
		||||
  pcre32_refcount.c \
 | 
			
		||||
  pcre32_string_utils.c \
 | 
			
		||||
  pcre32_study.c \
 | 
			
		||||
  pcre32_tables.c \
 | 
			
		||||
  pcre32_ucd.c \
 | 
			
		||||
  pcre32_utf32_utils.c \
 | 
			
		||||
  pcre32_valid_utf32.c \
 | 
			
		||||
  pcre32_version.c \
 | 
			
		||||
  pcre32_xclass.c
 | 
			
		||||
 | 
			
		||||
libpcre32_la_CFLAGS = \
 | 
			
		||||
  $(VISIBILITY_CFLAGS) \
 | 
			
		||||
  $(AM_CFLAGS)
 | 
			
		||||
 | 
			
		||||
libpcre32_la_LIBADD =
 | 
			
		||||
 | 
			
		||||
## This file is generated as part of the building process, so don't distribute.
 | 
			
		||||
nodist_libpcre32_la_SOURCES = \
 | 
			
		||||
  pcre_chartables.c
 | 
			
		||||
 | 
			
		||||
endif # WITH_PCRE32
 | 
			
		||||
 | 
			
		||||
# The pcre_chartables.c.dist file is the default version of pcre_chartables.c,
 | 
			
		||||
# used unless --enable-rebuild-chartables is specified.
 | 
			
		||||
EXTRA_DIST += pcre_chartables.c.dist
 | 
			
		||||
 | 
			
		||||
# The JIT compiler lives in a separate directory, but its files are #included
 | 
			
		||||
# when pcre_jit_compile.c is processed, so they must be distributed.
 | 
			
		||||
EXTRA_DIST += \
 | 
			
		||||
  sljit/sljitConfig.h \
 | 
			
		||||
  sljit/sljitConfigInternal.h \
 | 
			
		||||
  sljit/sljitExecAllocator.c \
 | 
			
		||||
  sljit/sljitLir.c \
 | 
			
		||||
  sljit/sljitLir.h \
 | 
			
		||||
  sljit/sljitNativeARM_32.c \
 | 
			
		||||
  sljit/sljitNativeARM_64.c \
 | 
			
		||||
  sljit/sljitNativeARM_T2_32.c \
 | 
			
		||||
  sljit/sljitNativeMIPS_32.c \
 | 
			
		||||
  sljit/sljitNativeMIPS_64.c \
 | 
			
		||||
  sljit/sljitNativeMIPS_common.c \
 | 
			
		||||
  sljit/sljitNativePPC_32.c \
 | 
			
		||||
  sljit/sljitNativePPC_64.c \
 | 
			
		||||
  sljit/sljitNativePPC_common.c \
 | 
			
		||||
  sljit/sljitNativeSPARC_32.c \
 | 
			
		||||
  sljit/sljitNativeSPARC_common.c \
 | 
			
		||||
  sljit/sljitNativeTILEGX_64.c \
 | 
			
		||||
  sljit/sljitNativeTILEGX-encoder.c \
 | 
			
		||||
  sljit/sljitNativeX86_32.c \
 | 
			
		||||
  sljit/sljitNativeX86_64.c \
 | 
			
		||||
  sljit/sljitNativeX86_common.c \
 | 
			
		||||
  sljit/sljitUtils.c
 | 
			
		||||
 | 
			
		||||
if WITH_PCRE8
 | 
			
		||||
libpcre_la_LDFLAGS = $(EXTRA_LIBPCRE_LDFLAGS)
 | 
			
		||||
endif # WITH_PCRE8
 | 
			
		||||
if WITH_PCRE16
 | 
			
		||||
libpcre16_la_LDFLAGS = $(EXTRA_LIBPCRE16_LDFLAGS)
 | 
			
		||||
endif # WITH_PCRE16
 | 
			
		||||
if WITH_PCRE32
 | 
			
		||||
libpcre32_la_LDFLAGS = $(EXTRA_LIBPCRE32_LDFLAGS)
 | 
			
		||||
endif # WITH_PCRE32
 | 
			
		||||
 | 
			
		||||
if WITH_VALGRIND
 | 
			
		||||
if WITH_PCRE8
 | 
			
		||||
libpcre_la_CFLAGS += $(VALGRIND_CFLAGS)
 | 
			
		||||
endif # WITH_PCRE8
 | 
			
		||||
if WITH_PCRE16
 | 
			
		||||
libpcre16_la_CFLAGS += $(VALGRIND_CFLAGS)
 | 
			
		||||
endif # WITH_PCRE16
 | 
			
		||||
if WITH_PCRE32
 | 
			
		||||
libpcre32_la_CFLAGS += $(VALGRIND_CFLAGS)
 | 
			
		||||
endif # WITH_PCRE32
 | 
			
		||||
endif # WITH_VALGRIND
 | 
			
		||||
 | 
			
		||||
if WITH_GCOV
 | 
			
		||||
if WITH_PCRE8
 | 
			
		||||
libpcre_la_CFLAGS += $(GCOV_CFLAGS)
 | 
			
		||||
endif # WITH_PCRE8
 | 
			
		||||
if WITH_PCRE16
 | 
			
		||||
libpcre16_la_CFLAGS += $(GCOV_CFLAGS)
 | 
			
		||||
endif # WITH_PCRE16
 | 
			
		||||
if WITH_PCRE32
 | 
			
		||||
libpcre32_la_CFLAGS += $(GCOV_CFLAGS)
 | 
			
		||||
endif # WITH_PCRE32
 | 
			
		||||
endif # WITH_GCOV
 | 
			
		||||
 | 
			
		||||
CLEANFILES += pcre_chartables.c
 | 
			
		||||
 | 
			
		||||
## If JIT support is enabled, arrange for the JIT test program to run.
 | 
			
		||||
if WITH_JIT
 | 
			
		||||
TESTS += pcre_jit_test
 | 
			
		||||
noinst_PROGRAMS += pcre_jit_test
 | 
			
		||||
pcre_jit_test_SOURCES = pcre_jit_test.c
 | 
			
		||||
pcre_jit_test_CFLAGS = $(AM_CFLAGS)
 | 
			
		||||
pcre_jit_test_LDADD =
 | 
			
		||||
if WITH_PCRE8
 | 
			
		||||
pcre_jit_test_LDADD += libpcre.la
 | 
			
		||||
endif # WITH_PCRE8
 | 
			
		||||
if WITH_PCRE16
 | 
			
		||||
pcre_jit_test_LDADD += libpcre16.la
 | 
			
		||||
endif # WITH_PCRE16
 | 
			
		||||
if WITH_PCRE32
 | 
			
		||||
pcre_jit_test_LDADD += libpcre32.la
 | 
			
		||||
endif # WITH_PCRE32
 | 
			
		||||
if WITH_GCOV
 | 
			
		||||
pcre_jit_test_CFLAGS += $(GCOV_CFLAGS)
 | 
			
		||||
pcre_jit_test_LDADD += $(GCOV_LIBS)
 | 
			
		||||
endif # WITH_GCOV
 | 
			
		||||
endif # WITH_JIT
 | 
			
		||||
 | 
			
		||||
## A version of the main pcre library that has a posix re API.
 | 
			
		||||
if WITH_PCRE8
 | 
			
		||||
 | 
			
		||||
lib_LTLIBRARIES += libpcreposix.la
 | 
			
		||||
libpcreposix_la_SOURCES = \
 | 
			
		||||
  pcreposix.c
 | 
			
		||||
libpcreposix_la_CFLAGS = $(VISIBILITY_CFLAGS) $(AM_CFLAGS)
 | 
			
		||||
libpcreposix_la_LDFLAGS = $(EXTRA_LIBPCREPOSIX_LDFLAGS)
 | 
			
		||||
libpcreposix_la_LIBADD = libpcre.la
 | 
			
		||||
 | 
			
		||||
if WITH_GCOV
 | 
			
		||||
libpcreposix_la_CFLAGS += $(GCOV_CFLAGS)
 | 
			
		||||
endif # WITH_GCOV
 | 
			
		||||
 | 
			
		||||
endif # WITH_PCRE8
 | 
			
		||||
 | 
			
		||||
## There's a C++ library as well.
 | 
			
		||||
if WITH_PCRE_CPP
 | 
			
		||||
 | 
			
		||||
lib_LTLIBRARIES += libpcrecpp.la
 | 
			
		||||
libpcrecpp_la_SOURCES = \
 | 
			
		||||
  pcrecpp_internal.h \
 | 
			
		||||
  pcrecpp.cc \
 | 
			
		||||
  pcre_scanner.cc \
 | 
			
		||||
  pcre_stringpiece.cc
 | 
			
		||||
libpcrecpp_la_CXXFLAGS = $(VISIBILITY_CXXFLAGS) $(AM_CXXFLAGS)
 | 
			
		||||
libpcrecpp_la_LDFLAGS = $(EXTRA_LIBPCRECPP_LDFLAGS)
 | 
			
		||||
libpcrecpp_la_LIBADD = libpcre.la
 | 
			
		||||
 | 
			
		||||
TESTS += pcrecpp_unittest
 | 
			
		||||
noinst_PROGRAMS += pcrecpp_unittest
 | 
			
		||||
pcrecpp_unittest_SOURCES = pcrecpp_unittest.cc
 | 
			
		||||
pcrecpp_unittest_CXXFLAGS = $(AM_CXXFLAGS)
 | 
			
		||||
pcrecpp_unittest_LDADD = libpcrecpp.la
 | 
			
		||||
 | 
			
		||||
TESTS += pcre_scanner_unittest
 | 
			
		||||
noinst_PROGRAMS += pcre_scanner_unittest
 | 
			
		||||
pcre_scanner_unittest_SOURCES = pcre_scanner_unittest.cc
 | 
			
		||||
pcre_scanner_unittest_CXXFLAGS = $(AM_CXXFLAGS)
 | 
			
		||||
pcre_scanner_unittest_LDADD = libpcrecpp.la
 | 
			
		||||
 | 
			
		||||
TESTS += pcre_stringpiece_unittest
 | 
			
		||||
noinst_PROGRAMS += pcre_stringpiece_unittest
 | 
			
		||||
pcre_stringpiece_unittest_SOURCES = pcre_stringpiece_unittest.cc
 | 
			
		||||
pcre_stringpiece_unittest_CXXFLAGS = $(AM_CXXFLAGS)
 | 
			
		||||
pcre_stringpiece_unittest_LDADD = libpcrecpp.la
 | 
			
		||||
 | 
			
		||||
if WITH_GCOV
 | 
			
		||||
libpcrecpp_la_CXXFLAGS += $(GCOV_CXXFLAGS)
 | 
			
		||||
pcrecpp_unittest_LDADD += $(GCOV_LIBS)
 | 
			
		||||
pcre_scanner_unittest_LDADD += $(GCOV_LIBS)
 | 
			
		||||
pcre_stringpiece_unittest_LDADD += $(GCOV_LIBS)
 | 
			
		||||
endif # WITH_GCOV
 | 
			
		||||
 | 
			
		||||
endif # WITH_PCRE_CPP
 | 
			
		||||
 | 
			
		||||
## The main unit tests
 | 
			
		||||
 | 
			
		||||
# Each unit test is a binary plus a script that runs that binary in various
 | 
			
		||||
# ways. We install these test binaries in case folks find it helpful.
 | 
			
		||||
 | 
			
		||||
TESTS += RunTest
 | 
			
		||||
dist_noinst_SCRIPTS += RunTest
 | 
			
		||||
EXTRA_DIST += RunTest.bat
 | 
			
		||||
bin_PROGRAMS += pcretest
 | 
			
		||||
pcretest_SOURCES = pcretest.c
 | 
			
		||||
pcretest_CFLAGS = $(AM_CFLAGS)
 | 
			
		||||
pcretest_LDADD = $(LIBREADLINE)
 | 
			
		||||
if WITH_PCRE8
 | 
			
		||||
pcretest_SOURCES += pcre_printint.c
 | 
			
		||||
pcretest_LDADD += libpcre.la libpcreposix.la
 | 
			
		||||
endif # WITH_PCRE8
 | 
			
		||||
if WITH_PCRE16
 | 
			
		||||
pcretest_SOURCES += pcre16_printint.c
 | 
			
		||||
pcretest_LDADD += libpcre16.la
 | 
			
		||||
endif # WITH_PCRE16
 | 
			
		||||
if WITH_PCRE32
 | 
			
		||||
pcretest_SOURCES += pcre32_printint.c
 | 
			
		||||
pcretest_LDADD += libpcre32.la
 | 
			
		||||
endif # WITH_PCRE32
 | 
			
		||||
if WITH_VALGRIND
 | 
			
		||||
pcretest_CFLAGS += $(VALGRIND_CFLAGS)
 | 
			
		||||
endif # WITH_VALGRIND
 | 
			
		||||
if WITH_GCOV
 | 
			
		||||
pcretest_CFLAGS += $(GCOV_CFLAGS)
 | 
			
		||||
pcretest_LDADD += $(GCOV_LIBS)
 | 
			
		||||
endif # WITH_GCOV
 | 
			
		||||
 | 
			
		||||
if WITH_PCRE8
 | 
			
		||||
TESTS += RunGrepTest
 | 
			
		||||
dist_noinst_SCRIPTS += RunGrepTest
 | 
			
		||||
bin_PROGRAMS += pcregrep
 | 
			
		||||
pcregrep_SOURCES = pcregrep.c
 | 
			
		||||
pcregrep_CFLAGS = $(AM_CFLAGS)
 | 
			
		||||
pcregrep_LDADD = $(LIBZ) $(LIBBZ2)
 | 
			
		||||
pcregrep_LDADD += libpcre.la libpcreposix.la
 | 
			
		||||
if WITH_GCOV
 | 
			
		||||
pcregrep_CFLAGS += $(GCOV_CFLAGS)
 | 
			
		||||
pcregrep_LDADD += $(GCOV_LIBS)
 | 
			
		||||
endif # WITH_GCOV
 | 
			
		||||
endif # WITH_PCRE8
 | 
			
		||||
 | 
			
		||||
EXTRA_DIST += \
 | 
			
		||||
  testdata/grepbinary \
 | 
			
		||||
  testdata/grepfilelist \
 | 
			
		||||
  testdata/grepinput \
 | 
			
		||||
  testdata/grepinput3 \
 | 
			
		||||
  testdata/grepinput8 \
 | 
			
		||||
  testdata/grepinputv \
 | 
			
		||||
  testdata/grepinputx \
 | 
			
		||||
  testdata/greplist \
 | 
			
		||||
  testdata/grepoutput \
 | 
			
		||||
  testdata/grepoutput8 \
 | 
			
		||||
  testdata/grepoutputN \
 | 
			
		||||
  testdata/greppatN4 \
 | 
			
		||||
  testdata/saved16 \
 | 
			
		||||
  testdata/saved16BE-1 \
 | 
			
		||||
  testdata/saved16BE-2 \
 | 
			
		||||
  testdata/saved16LE-1 \
 | 
			
		||||
  testdata/saved16LE-2 \
 | 
			
		||||
  testdata/saved32 \
 | 
			
		||||
  testdata/saved32BE-1 \
 | 
			
		||||
  testdata/saved32BE-2 \
 | 
			
		||||
  testdata/saved32LE-1 \
 | 
			
		||||
  testdata/saved32LE-2 \
 | 
			
		||||
  testdata/saved8 \
 | 
			
		||||
  testdata/testinput1 \
 | 
			
		||||
  testdata/testinput2 \
 | 
			
		||||
  testdata/testinput3 \
 | 
			
		||||
  testdata/testinput4 \
 | 
			
		||||
  testdata/testinput5 \
 | 
			
		||||
  testdata/testinput6 \
 | 
			
		||||
  testdata/testinput7 \
 | 
			
		||||
  testdata/testinput8 \
 | 
			
		||||
  testdata/testinput9 \
 | 
			
		||||
  testdata/testinput10 \
 | 
			
		||||
  testdata/testinput11 \
 | 
			
		||||
  testdata/testinput12 \
 | 
			
		||||
  testdata/testinput13 \
 | 
			
		||||
  testdata/testinput14 \
 | 
			
		||||
  testdata/testinput15 \
 | 
			
		||||
  testdata/testinput16 \
 | 
			
		||||
  testdata/testinput17 \
 | 
			
		||||
  testdata/testinput18 \
 | 
			
		||||
  testdata/testinput19 \
 | 
			
		||||
  testdata/testinput20 \
 | 
			
		||||
  testdata/testinput21 \
 | 
			
		||||
  testdata/testinput22 \
 | 
			
		||||
  testdata/testinput23 \
 | 
			
		||||
  testdata/testinput24 \
 | 
			
		||||
  testdata/testinput25 \
 | 
			
		||||
  testdata/testinput26 \
 | 
			
		||||
  testdata/testinputEBC \
 | 
			
		||||
  testdata/testoutput1 \
 | 
			
		||||
  testdata/testoutput2 \
 | 
			
		||||
  testdata/testoutput3 \
 | 
			
		||||
  testdata/testoutput3A \
 | 
			
		||||
  testdata/testoutput3B \
 | 
			
		||||
  testdata/testoutput4 \
 | 
			
		||||
  testdata/testoutput5 \
 | 
			
		||||
  testdata/testoutput6 \
 | 
			
		||||
  testdata/testoutput7 \
 | 
			
		||||
  testdata/testoutput8 \
 | 
			
		||||
  testdata/testoutput9 \
 | 
			
		||||
  testdata/testoutput10 \
 | 
			
		||||
  testdata/testoutput11-8 \
 | 
			
		||||
  testdata/testoutput11-16 \
 | 
			
		||||
  testdata/testoutput11-32 \
 | 
			
		||||
  testdata/testoutput12 \
 | 
			
		||||
  testdata/testoutput13 \
 | 
			
		||||
  testdata/testoutput14 \
 | 
			
		||||
  testdata/testoutput15 \
 | 
			
		||||
  testdata/testoutput16 \
 | 
			
		||||
  testdata/testoutput17 \
 | 
			
		||||
  testdata/testoutput18-16 \
 | 
			
		||||
  testdata/testoutput18-32 \
 | 
			
		||||
  testdata/testoutput19 \
 | 
			
		||||
  testdata/testoutput20 \
 | 
			
		||||
  testdata/testoutput21-16 \
 | 
			
		||||
  testdata/testoutput21-32 \
 | 
			
		||||
  testdata/testoutput22-16 \
 | 
			
		||||
  testdata/testoutput22-32 \
 | 
			
		||||
  testdata/testoutput23 \
 | 
			
		||||
  testdata/testoutput24 \
 | 
			
		||||
  testdata/testoutput25 \
 | 
			
		||||
  testdata/testoutput26 \
 | 
			
		||||
  testdata/testoutputEBC \
 | 
			
		||||
  testdata/wintestinput3 \
 | 
			
		||||
  testdata/wintestoutput3 \
 | 
			
		||||
  perltest.pl
 | 
			
		||||
 | 
			
		||||
CLEANFILES += \
 | 
			
		||||
	testsavedregex \
 | 
			
		||||
	teststderr \
 | 
			
		||||
        testtemp* \
 | 
			
		||||
	testtry \
 | 
			
		||||
        testNinput \
 | 
			
		||||
        testtrygrep \
 | 
			
		||||
        teststderrgrep \
 | 
			
		||||
        testNinputgrep
 | 
			
		||||
 | 
			
		||||
# PCRE demonstration program. No longer built automatcally. The point is that
 | 
			
		||||
# the users should build it themselves. So just distribute the source.
 | 
			
		||||
# noinst_PROGRAMS += pcredemo
 | 
			
		||||
# pcredemo_SOURCES = pcredemo.c
 | 
			
		||||
# pcredemo_LDADD = libpcre.la
 | 
			
		||||
 | 
			
		||||
EXTRA_DIST += pcredemo.c
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## Utility rules, documentation, etc.
 | 
			
		||||
 | 
			
		||||
# A compatibility line, the old build system worked with 'make test'
 | 
			
		||||
test: check ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# A PCRE user submitted the following addition, saying that it "will allow
 | 
			
		||||
# anyone using the 'mingw32' compiler to simply type 'make pcre.dll' and get a
 | 
			
		||||
# nice DLL for Windows use". (It is used by the pcre.dll target.)
 | 
			
		||||
DLL_OBJS= pcre_byte_order.o pcre_compile.o pcre_config.o \
 | 
			
		||||
	pcre_dfa_exec.o pcre_exec.o pcre_fullinfo.o pcre_get.o \
 | 
			
		||||
	pcre_globals.o pcre_jit_compile.o pcre_maketables.o \
 | 
			
		||||
	pcre_newline.o pcre_ord2utf8.o pcre_refcount.o \
 | 
			
		||||
	pcre_study.o pcre_tables.o pcre_ucd.o \
 | 
			
		||||
	pcre_valid_utf8.o pcre_version.o pcre_chartables.o \
 | 
			
		||||
	pcre_xclass.o
 | 
			
		||||
 | 
			
		||||
# A PCRE user submitted the following addition, saying that it "will allow
 | 
			
		||||
# anyone using the 'mingw32' compiler to simply type 'make pcre.dll' and get a
 | 
			
		||||
# nice DLL for Windows use".
 | 
			
		||||
pcre.dll: $(DLL_OBJS)
 | 
			
		||||
	$(CC) -shared -o pcre.dll -Wl,"--strip-all" -Wl,"--export-all-symbols" $(DLL_OBJS)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# We have .pc files for pkg-config users.
 | 
			
		||||
pkgconfigdir = $(libdir)/pkgconfig
 | 
			
		||||
pkgconfig_DATA = libpcre.pc libpcreposix.pc
 | 
			
		||||
if WITH_PCRE16
 | 
			
		||||
pkgconfig_DATA += libpcre16.pc
 | 
			
		||||
endif
 | 
			
		||||
if WITH_PCRE32
 | 
			
		||||
pkgconfig_DATA += libpcre32.pc
 | 
			
		||||
endif
 | 
			
		||||
if WITH_PCRE_CPP
 | 
			
		||||
pkgconfig_DATA += libpcrecpp.pc
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
# Note that pcrecpp.3 is not in this list, but is included separately below.
 | 
			
		||||
 | 
			
		||||
dist_man_MANS = \
 | 
			
		||||
  doc/pcre-config.1 \
 | 
			
		||||
  doc/pcre.3 \
 | 
			
		||||
  doc/pcre16.3 \
 | 
			
		||||
  doc/pcre32.3 \
 | 
			
		||||
  doc/pcre_assign_jit_stack.3 \
 | 
			
		||||
  doc/pcre_compile.3 \
 | 
			
		||||
  doc/pcre_compile2.3 \
 | 
			
		||||
  doc/pcre_config.3 \
 | 
			
		||||
  doc/pcre_copy_named_substring.3 \
 | 
			
		||||
  doc/pcre_copy_substring.3 \
 | 
			
		||||
  doc/pcre_dfa_exec.3 \
 | 
			
		||||
  doc/pcre_exec.3 \
 | 
			
		||||
  doc/pcre_free_study.3 \
 | 
			
		||||
  doc/pcre_free_substring.3 \
 | 
			
		||||
  doc/pcre_free_substring_list.3 \
 | 
			
		||||
  doc/pcre_fullinfo.3 \
 | 
			
		||||
  doc/pcre_get_named_substring.3 \
 | 
			
		||||
  doc/pcre_get_stringnumber.3 \
 | 
			
		||||
  doc/pcre_get_stringtable_entries.3 \
 | 
			
		||||
  doc/pcre_get_substring.3 \
 | 
			
		||||
  doc/pcre_get_substring_list.3 \
 | 
			
		||||
  doc/pcre_jit_exec.3 \
 | 
			
		||||
  doc/pcre_jit_stack_alloc.3 \
 | 
			
		||||
  doc/pcre_jit_stack_free.3 \
 | 
			
		||||
  doc/pcre_maketables.3 \
 | 
			
		||||
  doc/pcre_pattern_to_host_byte_order.3 \
 | 
			
		||||
  doc/pcre_refcount.3 \
 | 
			
		||||
  doc/pcre_study.3 \
 | 
			
		||||
  doc/pcre_utf16_to_host_byte_order.3 \
 | 
			
		||||
  doc/pcre_utf32_to_host_byte_order.3 \
 | 
			
		||||
  doc/pcre_version.3 \
 | 
			
		||||
  doc/pcreapi.3 \
 | 
			
		||||
  doc/pcrebuild.3 \
 | 
			
		||||
  doc/pcrecallout.3 \
 | 
			
		||||
  doc/pcrecompat.3 \
 | 
			
		||||
  doc/pcredemo.3 \
 | 
			
		||||
  doc/pcregrep.1 \
 | 
			
		||||
  doc/pcrejit.3 \
 | 
			
		||||
  doc/pcrelimits.3 \
 | 
			
		||||
  doc/pcrematching.3 \
 | 
			
		||||
  doc/pcrepartial.3 \
 | 
			
		||||
  doc/pcrepattern.3 \
 | 
			
		||||
  doc/pcreperform.3 \
 | 
			
		||||
  doc/pcreposix.3 \
 | 
			
		||||
  doc/pcreprecompile.3 \
 | 
			
		||||
  doc/pcresample.3 \
 | 
			
		||||
  doc/pcrestack.3 \
 | 
			
		||||
  doc/pcresyntax.3 \
 | 
			
		||||
  doc/pcretest.1 \
 | 
			
		||||
  doc/pcreunicode.3
 | 
			
		||||
 | 
			
		||||
# Arrange for the per-function man pages to have 16- and 32-bit names as well.
 | 
			
		||||
install-data-hook:
 | 
			
		||||
	ln -sf pcre_assign_jit_stack.3		 $(DESTDIR)$(man3dir)/pcre16_assign_jit_stack.3
 | 
			
		||||
	ln -sf pcre_compile.3			 $(DESTDIR)$(man3dir)/pcre16_compile.3
 | 
			
		||||
	ln -sf pcre_compile2.3			 $(DESTDIR)$(man3dir)/pcre16_compile2.3
 | 
			
		||||
	ln -sf pcre_config.3			 $(DESTDIR)$(man3dir)/pcre16_config.3
 | 
			
		||||
	ln -sf pcre_copy_named_substring.3	 $(DESTDIR)$(man3dir)/pcre16_copy_named_substring.3
 | 
			
		||||
	ln -sf pcre_copy_substring.3		 $(DESTDIR)$(man3dir)/pcre16_copy_substring.3
 | 
			
		||||
	ln -sf pcre_dfa_exec.3			 $(DESTDIR)$(man3dir)/pcre16_dfa_exec.3
 | 
			
		||||
	ln -sf pcre_exec.3			 $(DESTDIR)$(man3dir)/pcre16_exec.3
 | 
			
		||||
	ln -sf pcre_free_study.3		 $(DESTDIR)$(man3dir)/pcre16_free_study.3
 | 
			
		||||
	ln -sf pcre_free_substring.3		 $(DESTDIR)$(man3dir)/pcre16_free_substring.3
 | 
			
		||||
	ln -sf pcre_free_substring_list.3	 $(DESTDIR)$(man3dir)/pcre16_free_substring_list.3
 | 
			
		||||
	ln -sf pcre_fullinfo.3			 $(DESTDIR)$(man3dir)/pcre16_fullinfo.3
 | 
			
		||||
	ln -sf pcre_get_named_substring.3	 $(DESTDIR)$(man3dir)/pcre16_get_named_substring.3
 | 
			
		||||
	ln -sf pcre_get_stringnumber.3		 $(DESTDIR)$(man3dir)/pcre16_get_stringnumber.3
 | 
			
		||||
	ln -sf pcre_get_stringtable_entries.3	 $(DESTDIR)$(man3dir)/pcre16_get_stringtable_entries.3
 | 
			
		||||
	ln -sf pcre_get_substring.3		 $(DESTDIR)$(man3dir)/pcre16_get_substring.3
 | 
			
		||||
	ln -sf pcre_get_substring_list.3	 $(DESTDIR)$(man3dir)/pcre16_get_substring_list.3
 | 
			
		||||
	ln -sf pcre_jit_exec.3			 $(DESTDIR)$(man3dir)/pcre16_jit_exec.3
 | 
			
		||||
	ln -sf pcre_jit_stack_alloc.3		 $(DESTDIR)$(man3dir)/pcre16_jit_stack_alloc.3
 | 
			
		||||
	ln -sf pcre_jit_stack_free.3		 $(DESTDIR)$(man3dir)/pcre16_jit_stack_free.3
 | 
			
		||||
	ln -sf pcre_maketables.3		 $(DESTDIR)$(man3dir)/pcre16_maketables.3
 | 
			
		||||
	ln -sf pcre_pattern_to_host_byte_order.3 $(DESTDIR)$(man3dir)/pcre16_pattern_to_host_byte_order.3
 | 
			
		||||
	ln -sf pcre_refcount.3			 $(DESTDIR)$(man3dir)/pcre16_refcount.3
 | 
			
		||||
	ln -sf pcre_study.3			 $(DESTDIR)$(man3dir)/pcre16_study.3
 | 
			
		||||
	ln -sf pcre_utf16_to_host_byte_order.3	 $(DESTDIR)$(man3dir)/pcre16_utf16_to_host_byte_order.3
 | 
			
		||||
	ln -sf pcre_version.3			 $(DESTDIR)$(man3dir)/pcre16_version.3
 | 
			
		||||
	ln -sf pcre_assign_jit_stack.3		 $(DESTDIR)$(man3dir)/pcre32_assign_jit_stack.3
 | 
			
		||||
	ln -sf pcre_compile.3			 $(DESTDIR)$(man3dir)/pcre32_compile.3
 | 
			
		||||
	ln -sf pcre_compile2.3			 $(DESTDIR)$(man3dir)/pcre32_compile2.3
 | 
			
		||||
	ln -sf pcre_config.3			 $(DESTDIR)$(man3dir)/pcre32_config.3
 | 
			
		||||
	ln -sf pcre_copy_named_substring.3	 $(DESTDIR)$(man3dir)/pcre32_copy_named_substring.3
 | 
			
		||||
	ln -sf pcre_copy_substring.3		 $(DESTDIR)$(man3dir)/pcre32_copy_substring.3
 | 
			
		||||
	ln -sf pcre_dfa_exec.3			 $(DESTDIR)$(man3dir)/pcre32_dfa_exec.3
 | 
			
		||||
	ln -sf pcre_exec.3			 $(DESTDIR)$(man3dir)/pcre32_exec.3
 | 
			
		||||
	ln -sf pcre_free_study.3		 $(DESTDIR)$(man3dir)/pcre32_free_study.3
 | 
			
		||||
	ln -sf pcre_free_substring.3		 $(DESTDIR)$(man3dir)/pcre32_free_substring.3
 | 
			
		||||
	ln -sf pcre_free_substring_list.3	 $(DESTDIR)$(man3dir)/pcre32_free_substring_list.3
 | 
			
		||||
	ln -sf pcre_fullinfo.3			 $(DESTDIR)$(man3dir)/pcre32_fullinfo.3
 | 
			
		||||
	ln -sf pcre_get_named_substring.3	 $(DESTDIR)$(man3dir)/pcre32_get_named_substring.3
 | 
			
		||||
	ln -sf pcre_get_stringnumber.3		 $(DESTDIR)$(man3dir)/pcre32_get_stringnumber.3
 | 
			
		||||
	ln -sf pcre_get_stringtable_entries.3	 $(DESTDIR)$(man3dir)/pcre32_get_stringtable_entries.3
 | 
			
		||||
	ln -sf pcre_get_substring.3		 $(DESTDIR)$(man3dir)/pcre32_get_substring.3
 | 
			
		||||
	ln -sf pcre_get_substring_list.3	 $(DESTDIR)$(man3dir)/pcre32_get_substring_list.3
 | 
			
		||||
	ln -sf pcre_jit_exec.3			 $(DESTDIR)$(man3dir)/pcre32_jit_exec.3
 | 
			
		||||
	ln -sf pcre_jit_stack_alloc.3		 $(DESTDIR)$(man3dir)/pcre32_jit_stack_alloc.3
 | 
			
		||||
	ln -sf pcre_jit_stack_free.3		 $(DESTDIR)$(man3dir)/pcre32_jit_stack_free.3
 | 
			
		||||
	ln -sf pcre_maketables.3		 $(DESTDIR)$(man3dir)/pcre32_maketables.3
 | 
			
		||||
	ln -sf pcre_pattern_to_host_byte_order.3 $(DESTDIR)$(man3dir)/pcre32_pattern_to_host_byte_order.3
 | 
			
		||||
	ln -sf pcre_refcount.3			 $(DESTDIR)$(man3dir)/pcre32_refcount.3
 | 
			
		||||
	ln -sf pcre_study.3			 $(DESTDIR)$(man3dir)/pcre32_study.3
 | 
			
		||||
	ln -sf pcre_utf32_to_host_byte_order.3	 $(DESTDIR)$(man3dir)/pcre32_utf32_to_host_byte_order.3
 | 
			
		||||
	ln -sf pcre_version.3			 $(DESTDIR)$(man3dir)/pcre32_version.3
 | 
			
		||||
 | 
			
		||||
pcrecpp_man = doc/pcrecpp.3
 | 
			
		||||
EXTRA_DIST += $(pcrecpp_man)
 | 
			
		||||
 | 
			
		||||
if WITH_PCRE_CPP
 | 
			
		||||
man_MANS = $(pcrecpp_man)
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
# gcov/lcov code coverage reporting
 | 
			
		||||
 | 
			
		||||
if WITH_GCOV
 | 
			
		||||
 | 
			
		||||
# Coverage reporting targets:
 | 
			
		||||
#
 | 
			
		||||
# coverage: Create a coverage report from 'make check'
 | 
			
		||||
# coverage-baseline: Capture baseline coverage information
 | 
			
		||||
# coverage-reset: This zeros the coverage counters only
 | 
			
		||||
# coverage-report: This creates the coverage report only
 | 
			
		||||
# coverage-clean-report: This removes the generated coverage report
 | 
			
		||||
#   without cleaning the coverage data itself
 | 
			
		||||
# coverage-clean-data: This removes the captured coverage data without
 | 
			
		||||
#   removing the coverage files created at compile time (*.gcno)
 | 
			
		||||
# coverage-clean: This cleans all coverage data including the generated
 | 
			
		||||
#   coverage report.
 | 
			
		||||
 | 
			
		||||
COVERAGE_TEST_NAME = $(PACKAGE)
 | 
			
		||||
COVERAGE_NAME = $(PACKAGE)-$(VERSION)
 | 
			
		||||
COVERAGE_OUTPUT_FILE = $(COVERAGE_NAME)-coverage.info
 | 
			
		||||
COVERAGE_OUTPUT_DIR = $(COVERAGE_NAME)-coverage
 | 
			
		||||
COVERAGE_LCOV_EXTRA_FLAGS =
 | 
			
		||||
COVERAGE_GENHTML_EXTRA_FLAGS =
 | 
			
		||||
 | 
			
		||||
coverage_quiet = $(coverage_quiet_$(V))
 | 
			
		||||
coverage_quiet_ = $(coverage_quiet_$(AM_DEFAULT_VERBOSITY))
 | 
			
		||||
coverage_quiet_0 = --quiet
 | 
			
		||||
 | 
			
		||||
coverage-check: all
 | 
			
		||||
	-$(MAKE) $(AM_MAKEFLAGS) -k check
 | 
			
		||||
 | 
			
		||||
coverage-baseline:
 | 
			
		||||
	$(LCOV) $(coverage_quiet) \
 | 
			
		||||
		--directory $(top_builddir) \
 | 
			
		||||
		--output-file "$(COVERAGE_OUTPUT_FILE)" \
 | 
			
		||||
		--capture \
 | 
			
		||||
		--initial
 | 
			
		||||
 | 
			
		||||
coverage-report:
 | 
			
		||||
	$(LCOV) $(coverage_quiet) \
 | 
			
		||||
		--directory $(top_builddir) \
 | 
			
		||||
		--capture \
 | 
			
		||||
		--output-file "$(COVERAGE_OUTPUT_FILE).tmp" \
 | 
			
		||||
		--test-name "$(COVERAGE_TEST_NAME)" \
 | 
			
		||||
		--no-checksum \
 | 
			
		||||
		--compat-libtool \
 | 
			
		||||
		$(COVERAGE_LCOV_EXTRA_FLAGS)
 | 
			
		||||
	$(LCOV) $(coverage_quiet) \
 | 
			
		||||
		--directory $(top_builddir) \
 | 
			
		||||
		--output-file "$(COVERAGE_OUTPUT_FILE)" \
 | 
			
		||||
		--remove "$(COVERAGE_OUTPUT_FILE).tmp" \
 | 
			
		||||
		"/tmp/*" \
 | 
			
		||||
		"/usr/include/*" \
 | 
			
		||||
		"$(includedir)/*"
 | 
			
		||||
	-@rm -f "$(COVERAGE_OUTPUT_FILE).tmp"
 | 
			
		||||
	LANG=C $(GENHTML) $(coverage_quiet) \
 | 
			
		||||
		--prefix $(top_builddir) \
 | 
			
		||||
		--output-directory "$(COVERAGE_OUTPUT_DIR)" \
 | 
			
		||||
		--title "$(PACKAGE) $(VERSION) Code Coverage Report" \
 | 
			
		||||
		--show-details "$(COVERAGE_OUTPUT_FILE)" \
 | 
			
		||||
		--legend \
 | 
			
		||||
		$(COVERAGE_GENHTML_EXTRA_FLAGS)
 | 
			
		||||
	@echo "Code coverage report written to file://$(abs_builddir)/$(COVERAGE_OUTPUT_DIR)/index.html"
 | 
			
		||||
 | 
			
		||||
coverage-reset:
 | 
			
		||||
	-$(LCOV) $(coverage_quiet) --zerocounters --directory $(top_builddir)
 | 
			
		||||
 | 
			
		||||
coverage-clean-report:
 | 
			
		||||
	-rm -f "$(COVERAGE_OUTPUT_FILE)" "$(COVERAGE_OUTPUT_FILE).tmp"
 | 
			
		||||
	-rm -rf "$(COVERAGE_OUTPUT_DIR)"
 | 
			
		||||
 | 
			
		||||
coverage-clean-data:
 | 
			
		||||
	-find $(top_builddir) -name "*.gcda" -delete
 | 
			
		||||
 | 
			
		||||
coverage-clean: coverage-reset coverage-clean-report coverage-clean-data
 | 
			
		||||
	-find $(top_builddir) -name "*.gcno" -delete
 | 
			
		||||
 | 
			
		||||
coverage-distclean: coverage-clean
 | 
			
		||||
 | 
			
		||||
coverage: coverage-reset coverage-baseline coverage-check coverage-report
 | 
			
		||||
clean-local: coverage-clean
 | 
			
		||||
distclean-local: coverage-distclean
 | 
			
		||||
 | 
			
		||||
.PHONY: coverage coverage-baseline coverage-check coverage-report coverage-reset coverage-clean-report coverage-clean-data coverage-clean coverage-distclean
 | 
			
		||||
 | 
			
		||||
else
 | 
			
		||||
 | 
			
		||||
coverage:
 | 
			
		||||
	@echo "Configuring with --enable-coverage required to generate code coverage report."
 | 
			
		||||
 | 
			
		||||
endif # WITH_GCOV
 | 
			
		||||
 | 
			
		||||
## CMake support
 | 
			
		||||
 | 
			
		||||
EXTRA_DIST += \
 | 
			
		||||
  cmake/COPYING-CMAKE-SCRIPTS \
 | 
			
		||||
  cmake/FindPackageHandleStandardArgs.cmake \
 | 
			
		||||
  cmake/FindReadline.cmake \
 | 
			
		||||
  cmake/FindEditline.cmake \
 | 
			
		||||
  CMakeLists.txt \
 | 
			
		||||
  config-cmake.h.in
 | 
			
		||||
 | 
			
		||||
## end Makefile.am
 | 
			
		||||
							
								
								
									
										3283
									
								
								tools/pcre/Makefile.in
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3283
									
								
								tools/pcre/Makefile.in
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										693
									
								
								tools/pcre/NEWS
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										693
									
								
								tools/pcre/NEWS
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,693 @@
 | 
			
		||||
News about PCRE releases
 | 
			
		||||
------------------------
 | 
			
		||||
 | 
			
		||||
Release 8.35 04-April-2014
 | 
			
		||||
--------------------------
 | 
			
		||||
 | 
			
		||||
There have been performance improvements for classes containing non-ASCII
 | 
			
		||||
characters and the "auto-possessification" feature has been extended. Other
 | 
			
		||||
minor improvements have been implemented and bugs fixed. There is a new callout
 | 
			
		||||
feature to enable applications to do detailed stack checks at compile time, to
 | 
			
		||||
avoid running out of stack for deeply nested parentheses. The JIT compiler has
 | 
			
		||||
been extended with experimental support for ARM-64, MIPS-64, and PPC-LE.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.34 15-December-2013
 | 
			
		||||
-----------------------------
 | 
			
		||||
 | 
			
		||||
As well as fixing the inevitable bugs, performance has been improved by
 | 
			
		||||
refactoring and extending the amount of "auto-possessification" that PCRE does.
 | 
			
		||||
Other notable changes:
 | 
			
		||||
 | 
			
		||||
.  Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
 | 
			
		||||
   an empty string. If it can, pcretest shows this in its information output.
 | 
			
		||||
 | 
			
		||||
.  A back reference to a named subpattern when there is more than one of the
 | 
			
		||||
   same name now checks them in the order in which they appear in the pattern.
 | 
			
		||||
   The first one that is set is used for the reference. Previously only the
 | 
			
		||||
   first one was inspected. This change makes PCRE more compatible with Perl.
 | 
			
		||||
 | 
			
		||||
.  Unicode character properties were updated from Unicode 6.3.0.
 | 
			
		||||
 | 
			
		||||
.  The character VT has been added to the set of characters that match \s and
 | 
			
		||||
   are generally treated as white space, following this same change in Perl
 | 
			
		||||
   5.18. There is now no difference between "Perl space" and "POSIX space".
 | 
			
		||||
 | 
			
		||||
.  Perl has changed its handling of \8 and \9. If there is no previously
 | 
			
		||||
   encountered capturing group of those numbers, they are treated as the
 | 
			
		||||
   literal characters 8 and 9 instead of a binary zero followed by the
 | 
			
		||||
   literals. PCRE now does the same.
 | 
			
		||||
 | 
			
		||||
.  Following Perl, added \o{} to specify codepoints in octal, making it
 | 
			
		||||
   possible to specify values greater than 0777 and also making them
 | 
			
		||||
   unambiguous.
 | 
			
		||||
 | 
			
		||||
.  In UCP mode, \s was not matching two of the characters that Perl matches,
 | 
			
		||||
   namely NEL (U+0085) and MONGOLIAN VOWEL SEPARATOR (U+180E), though they
 | 
			
		||||
   were matched by \h.
 | 
			
		||||
 | 
			
		||||
.  Add JIT support for the 64 bit TileGX architecture.
 | 
			
		||||
 | 
			
		||||
.  Upgraded the handling of the POSIX classes [:graph:], [:print:], and
 | 
			
		||||
   [:punct:] when PCRE_UCP is set so as to include the same characters as Perl
 | 
			
		||||
   does in Unicode mode.
 | 
			
		||||
 | 
			
		||||
.  Perl no longer allows group names to start with digits, so I have made this
 | 
			
		||||
   change also in PCRE.
 | 
			
		||||
 | 
			
		||||
.  Added support for [[:<:]] and [[:>:]] as used in the BSD POSIX library to
 | 
			
		||||
   mean "start of word" and "end of word", respectively, as a transition aid.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.33 28-May-2013
 | 
			
		||||
--------------------------
 | 
			
		||||
 | 
			
		||||
A number of bugs are fixed, and some performance improvements have been made.
 | 
			
		||||
There are also some new features, of which these are the most important:
 | 
			
		||||
 | 
			
		||||
.  The behaviour of the backtracking verbs has been rationalized and
 | 
			
		||||
   documented in more detail.
 | 
			
		||||
 | 
			
		||||
.  JIT now supports callouts and all of the backtracking verbs.
 | 
			
		||||
 | 
			
		||||
.  Unicode validation has been updated in the light of Unicode Corrigendum #9,
 | 
			
		||||
   which points out that "non characters" are not "characters that may not
 | 
			
		||||
   appear in Unicode strings" but rather "characters that are reserved for
 | 
			
		||||
   internal use and have only local meaning".
 | 
			
		||||
 | 
			
		||||
.  (*LIMIT_MATCH=d) and (*LIMIT_RECURSION=d) have been added so that the
 | 
			
		||||
   creator of a pattern can specify lower (but not higher) limits for the
 | 
			
		||||
   matching process.
 | 
			
		||||
 | 
			
		||||
.  The PCRE_NEVER_UTF option is available to prevent pattern-writers from using
 | 
			
		||||
   the (*UTF) feature, as this could be a security issue.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.32 30-November-2012
 | 
			
		||||
-----------------------------
 | 
			
		||||
 | 
			
		||||
This release fixes a number of bugs, but also has some new features. These are
 | 
			
		||||
the highlights:
 | 
			
		||||
 | 
			
		||||
.  There is now support for 32-bit character strings and UTF-32. Like the
 | 
			
		||||
   16-bit support, this is done by compiling a separate 32-bit library.
 | 
			
		||||
 | 
			
		||||
.  \X now matches a Unicode extended grapheme cluster.
 | 
			
		||||
 | 
			
		||||
.  Case-independent matching of Unicode characters that have more than one
 | 
			
		||||
   "other case" now makes all three (or more) characters equivalent. This
 | 
			
		||||
   applies, for example, to Greek Sigma, which has two lowercase versions.
 | 
			
		||||
 | 
			
		||||
.  Unicode character properties are updated to Unicode 6.2.0.
 | 
			
		||||
 | 
			
		||||
.  The EBCDIC support, which had decayed, has had a spring clean.
 | 
			
		||||
 | 
			
		||||
.  A number of JIT optimizations have been added, which give faster JIT
 | 
			
		||||
   execution speed. In addition, a new direct interface to JIT execution is
 | 
			
		||||
   available. This bypasses some of the sanity checks of pcre_exec() to give a
 | 
			
		||||
   noticeable speed-up.
 | 
			
		||||
 | 
			
		||||
.  A number of issues in pcregrep have been fixed, making it more compatible
 | 
			
		||||
   with GNU grep. In particular, --exclude and --include (and variants) apply
 | 
			
		||||
   to all files now, not just those obtained from scanning a directory
 | 
			
		||||
   recursively. In Windows environments, the default action for directories is
 | 
			
		||||
   now "skip" instead of "read" (which provokes an error).
 | 
			
		||||
 | 
			
		||||
.  If the --only-matching (-o) option in pcregrep is specified multiple
 | 
			
		||||
   times, each one causes appropriate output. For example, -o1 -o2 outputs the
 | 
			
		||||
   substrings matched by the 1st and 2nd capturing parentheses. A separating
 | 
			
		||||
   string can be specified by --om-separator (default empty).
 | 
			
		||||
 | 
			
		||||
.  When PCRE is built via Autotools using a version of gcc that has the
 | 
			
		||||
   "visibility" feature, it is used to hide internal library functions that are
 | 
			
		||||
   not part of the public API.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.31 06-July-2012
 | 
			
		||||
-------------------------
 | 
			
		||||
 | 
			
		||||
This is mainly a bug-fixing release, with a small number of developments:
 | 
			
		||||
 | 
			
		||||
. The JIT compiler now supports partial matching and the (*MARK) and
 | 
			
		||||
  (*COMMIT) verbs.
 | 
			
		||||
 | 
			
		||||
. PCRE_INFO_MAXLOOKBEHIND can be used to find the longest lookbehind in a
 | 
			
		||||
  pattern.
 | 
			
		||||
 | 
			
		||||
. There should be a performance improvement when using the heap instead of the
 | 
			
		||||
  stack for recursion.
 | 
			
		||||
 | 
			
		||||
. pcregrep can now be linked with libedit as an alternative to libreadline.
 | 
			
		||||
 | 
			
		||||
. pcregrep now has a --file-list option where the list of files to scan is
 | 
			
		||||
  given as a file.
 | 
			
		||||
 | 
			
		||||
. pcregrep now recognizes binary files and there are related options.
 | 
			
		||||
 | 
			
		||||
. The Unicode tables have been updated to 6.1.0.
 | 
			
		||||
 | 
			
		||||
As always, the full list of changes is in the ChangeLog file.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.30 04-February-2012
 | 
			
		||||
-----------------------------
 | 
			
		||||
 | 
			
		||||
Release 8.30 introduces a major new feature: support for 16-bit character
 | 
			
		||||
strings, compiled as a separate library. There are a few changes to the
 | 
			
		||||
8-bit library, in addition to some bug fixes.
 | 
			
		||||
 | 
			
		||||
. The pcre_info() function, which has been obsolete for over 10 years, has
 | 
			
		||||
  been removed.
 | 
			
		||||
 | 
			
		||||
. When a compiled pattern was saved to a file and later reloaded on a host
 | 
			
		||||
  with different endianness, PCRE used automatically to swap the bytes in some
 | 
			
		||||
  of the data fields. With the advent of the 16-bit library, where more of this
 | 
			
		||||
  swapping is needed, it is no longer done automatically. Instead, the bad
 | 
			
		||||
  endianness is detected and a specific error is given. The user can then call
 | 
			
		||||
  a new function called pcre_pattern_to_host_byte_order() (or an equivalent
 | 
			
		||||
  16-bit function) to do the swap.
 | 
			
		||||
 | 
			
		||||
. In UTF-8 mode, the values 0xd800 to 0xdfff are not legal Unicode
 | 
			
		||||
  code points and are now faulted. (They are the so-called "surrogates"
 | 
			
		||||
  that are reserved for coding high values in UTF-16.)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.21 12-Dec-2011
 | 
			
		||||
------------------------
 | 
			
		||||
 | 
			
		||||
This is almost entirely a bug-fix release. The only new feature is the ability
 | 
			
		||||
to obtain the size of the memory used by the JIT compiler.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.20 21-Oct-2011
 | 
			
		||||
------------------------
 | 
			
		||||
 | 
			
		||||
The main change in this release is the inclusion of Zoltan Herczeg's
 | 
			
		||||
just-in-time compiler support, which can be accessed by building PCRE with
 | 
			
		||||
--enable-jit. Large performance benefits can be had in many situations. 8.20
 | 
			
		||||
also fixes an unfortunate bug that was introduced in 8.13 as well as tidying up
 | 
			
		||||
a number of infelicities and differences from Perl.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.13 16-Aug-2011
 | 
			
		||||
------------------------
 | 
			
		||||
 | 
			
		||||
This is mainly a bug-fix release. There has been a lot of internal refactoring.
 | 
			
		||||
The Unicode tables have been updated. The only new feature in the library is
 | 
			
		||||
the passing of *MARK information to callouts. Some additions have been made to
 | 
			
		||||
pcretest to make testing easier and more comprehensive. There is a new option
 | 
			
		||||
for pcregrep to adjust its internal buffer size.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.12 15-Jan-2011
 | 
			
		||||
------------------------
 | 
			
		||||
 | 
			
		||||
This release fixes some bugs in pcregrep, one of which caused the tests to fail
 | 
			
		||||
on 64-bit big-endian systems. There are no changes to the code of the library.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.11 10-Dec-2010
 | 
			
		||||
------------------------
 | 
			
		||||
 | 
			
		||||
A number of bugs in the library and in pcregrep have been fixed. As always, see
 | 
			
		||||
ChangeLog for details. The following are the non-bug-fix changes:
 | 
			
		||||
 | 
			
		||||
. Added --match-limit and --recursion-limit to pcregrep.
 | 
			
		||||
 | 
			
		||||
. Added an optional parentheses number to the -o and --only-matching options
 | 
			
		||||
  of pcregrep.
 | 
			
		||||
 | 
			
		||||
. Changed the way PCRE_PARTIAL_HARD affects the matching of $, \z, \Z, \b, and
 | 
			
		||||
  \B.
 | 
			
		||||
 | 
			
		||||
. Added PCRE_ERROR_SHORTUTF8 to make it possible to distinguish between a
 | 
			
		||||
  bad UTF-8 sequence and one that is incomplete when using PCRE_PARTIAL_HARD.
 | 
			
		||||
 | 
			
		||||
. Recognize (*NO_START_OPT) at the start of a pattern to set the PCRE_NO_
 | 
			
		||||
  START_OPTIMIZE option, which is now allowed at compile time
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.10 25-Jun-2010
 | 
			
		||||
------------------------
 | 
			
		||||
 | 
			
		||||
There are two major additions: support for (*MARK) and friends, and the option
 | 
			
		||||
PCRE_UCP, which changes the behaviour of \b, \d, \s, and \w (and their
 | 
			
		||||
opposites) so that they make use of Unicode properties. There are also a number
 | 
			
		||||
of lesser new features, and several bugs have been fixed. A new option,
 | 
			
		||||
--line-buffered, has been added to pcregrep, for use when it is connected to
 | 
			
		||||
pipes.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.02 19-Mar-2010
 | 
			
		||||
------------------------
 | 
			
		||||
 | 
			
		||||
Another bug-fix release.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.01 19-Jan-2010
 | 
			
		||||
------------------------
 | 
			
		||||
 | 
			
		||||
This is a bug-fix release. Several bugs in the code itself and some bugs and
 | 
			
		||||
infelicities in the build system have been fixed.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 8.00 19-Oct-09
 | 
			
		||||
----------------------
 | 
			
		||||
 | 
			
		||||
Bugs have been fixed in the library and in pcregrep. There are also some
 | 
			
		||||
enhancements. Restrictions on patterns used for partial matching have been
 | 
			
		||||
removed, extra information is given for partial matches, the partial matching
 | 
			
		||||
process has been improved, and an option to make a partial match override a
 | 
			
		||||
full match is available. The "study" process has been enhanced by finding a
 | 
			
		||||
lower bound matching length. Groups with duplicate numbers may now have
 | 
			
		||||
duplicated names without the use of PCRE_DUPNAMES. However, they may not have
 | 
			
		||||
different names. The documentation has been revised to reflect these changes.
 | 
			
		||||
The version number has been expanded to 3 digits as it is clear that the rate
 | 
			
		||||
of change is not slowing down.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 7.9 11-Apr-09
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
Mostly bugfixes and tidies with just a couple of minor functional additions.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 7.8 05-Sep-08
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
More bug fixes, plus a performance improvement in Unicode character property
 | 
			
		||||
lookup.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 7.7 07-May-08
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
This is once again mainly a bug-fix release, but there are a couple of new
 | 
			
		||||
features.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 7.6 28-Jan-08
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
The main reason for having this release so soon after 7.5 is because it fixes a
 | 
			
		||||
potential buffer overflow problem in pcre_compile() when run in UTF-8 mode. In
 | 
			
		||||
addition, the CMake configuration files have been brought up to date.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 7.5 10-Jan-08
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
This is mainly a bug-fix release. However the ability to link pcregrep with
 | 
			
		||||
libz or libbz2 and the ability to link pcretest with libreadline have been
 | 
			
		||||
added. Also the --line-offsets and --file-offsets options were added to
 | 
			
		||||
pcregrep.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 7.4 21-Sep-07
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
The only change of specification is the addition of options to control whether
 | 
			
		||||
\R matches any Unicode line ending (the default) or just CR, LF, and CRLF.
 | 
			
		||||
Otherwise, the changes are bug fixes and a refactoring to reduce the number of
 | 
			
		||||
relocations needed in a shared library. There have also been some documentation
 | 
			
		||||
updates, in particular, some more information about using CMake to build PCRE
 | 
			
		||||
has been added to the NON-UNIX-USE file.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 7.3 28-Aug-07
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
Most changes are bug fixes. Some that are not:
 | 
			
		||||
 | 
			
		||||
1. There is some support for Perl 5.10's experimental "backtracking control
 | 
			
		||||
   verbs" such as (*PRUNE).
 | 
			
		||||
 | 
			
		||||
2. UTF-8 checking is now as per RFC 3629 instead of RFC 2279; this is more
 | 
			
		||||
   restrictive in the strings it accepts.
 | 
			
		||||
 | 
			
		||||
3. Checking for potential integer overflow has been made more dynamic, and as a
 | 
			
		||||
   consequence there is no longer a hard limit on the size of a subpattern that
 | 
			
		||||
   has a limited repeat count.
 | 
			
		||||
 | 
			
		||||
4. When CRLF is a valid line-ending sequence, pcre_exec() and pcre_dfa_exec()
 | 
			
		||||
   no longer advance by two characters instead of one when an unanchored match
 | 
			
		||||
   fails at CRLF if there are explicit CR or LF matches within the pattern.
 | 
			
		||||
   This gets rid of some anomalous effects that previously occurred.
 | 
			
		||||
 | 
			
		||||
5. Some PCRE-specific settings for varying the newline options at the start of
 | 
			
		||||
   a pattern have been added.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 7.2 19-Jun-07
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
WARNING: saved patterns that were compiled by earlier versions of PCRE must be
 | 
			
		||||
recompiled for use with 7.2 (necessitated by the addition of \K, \h, \H, \v,
 | 
			
		||||
and \V).
 | 
			
		||||
 | 
			
		||||
Correction to the notes for 7.1: the note about shared libraries for Windows is
 | 
			
		||||
wrong. Previously, three libraries were built, but each could function
 | 
			
		||||
independently. For example, the pcreposix library also included all the
 | 
			
		||||
functions from the basic pcre library. The change is that the three libraries
 | 
			
		||||
are no longer independent. They are like the Unix libraries. To use the
 | 
			
		||||
pcreposix functions, for example, you need to link with both the pcreposix and
 | 
			
		||||
the basic pcre library.
 | 
			
		||||
 | 
			
		||||
Some more features from Perl 5.10 have been added:
 | 
			
		||||
 | 
			
		||||
  (?-n) and (?+n) relative references for recursion and subroutines.
 | 
			
		||||
 | 
			
		||||
  (?(-n) and (?(+n) relative references as conditions.
 | 
			
		||||
 | 
			
		||||
  \k{name} and \g{name} are synonyms for \k<name>.
 | 
			
		||||
 | 
			
		||||
  \K to reset the start of the matched string; for example, (foo)\Kbar
 | 
			
		||||
  matches bar preceded by foo, but only sets bar as the matched string.
 | 
			
		||||
 | 
			
		||||
  (?| introduces a group where the capturing parentheses in each alternative
 | 
			
		||||
  start from the same number; for example, (?|(abc)|(xyz)) sets capturing
 | 
			
		||||
  parentheses number 1 in both cases.
 | 
			
		||||
 | 
			
		||||
  \h, \H, \v, \V match horizontal and vertical whitespace, respectively.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 7.1 24-Apr-07
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
There is only one new feature in this release: a linebreak setting of
 | 
			
		||||
PCRE_NEWLINE_ANYCRLF. It is a cut-down version of PCRE_NEWLINE_ANY, which
 | 
			
		||||
recognizes only CRLF, CR, and LF as linebreaks.
 | 
			
		||||
 | 
			
		||||
A few bugs are fixed (see ChangeLog for details), but the major change is a
 | 
			
		||||
complete re-implementation of the build system. This now has full Autotools
 | 
			
		||||
support and so is now "standard" in some sense. It should help with compiling
 | 
			
		||||
PCRE in a wide variety of environments.
 | 
			
		||||
 | 
			
		||||
NOTE: when building shared libraries for Windows, three dlls are now built,
 | 
			
		||||
called libpcre, libpcreposix, and libpcrecpp. Previously, everything was
 | 
			
		||||
included in a single dll.
 | 
			
		||||
 | 
			
		||||
Another important change is that the dftables auxiliary program is no longer
 | 
			
		||||
compiled and run at "make" time by default. Instead, a default set of character
 | 
			
		||||
tables (assuming ASCII coding) is used. If you want to use dftables to generate
 | 
			
		||||
the character tables as previously, add --enable-rebuild-chartables to the
 | 
			
		||||
"configure" command. You must do this if you are compiling PCRE to run on a
 | 
			
		||||
system that uses EBCDIC code.
 | 
			
		||||
 | 
			
		||||
There is a discussion about character tables in the README file. The default is
 | 
			
		||||
not to use dftables so that that there is no problem when cross-compiling.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 7.0 19-Dec-06
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
This release has a new major number because there have been some internal
 | 
			
		||||
upheavals to facilitate the addition of new optimizations and other facilities,
 | 
			
		||||
and to make subsequent maintenance and extension easier. Compilation is likely
 | 
			
		||||
to be a bit slower, but there should be no major effect on runtime performance.
 | 
			
		||||
Previously compiled patterns are NOT upwards compatible with this release. If
 | 
			
		||||
you have saved compiled patterns from a previous release, you will have to
 | 
			
		||||
re-compile them. Important changes that are visible to users are:
 | 
			
		||||
 | 
			
		||||
1. The Unicode property tables have been updated to Unicode 5.0.0, which adds
 | 
			
		||||
   some more scripts.
 | 
			
		||||
 | 
			
		||||
2. The option PCRE_NEWLINE_ANY causes PCRE to recognize any Unicode newline
 | 
			
		||||
   sequence as a newline.
 | 
			
		||||
 | 
			
		||||
3. The \R escape matches a single Unicode newline sequence as a single unit.
 | 
			
		||||
 | 
			
		||||
4. New features that will appear in Perl 5.10 are now in PCRE. These include
 | 
			
		||||
   alternative Perl syntax for named parentheses, and Perl syntax for
 | 
			
		||||
   recursion.
 | 
			
		||||
 | 
			
		||||
5. The C++ wrapper interface has been extended by the addition of a
 | 
			
		||||
   QuoteMeta function and the ability to allow copy construction and
 | 
			
		||||
   assignment.
 | 
			
		||||
 | 
			
		||||
For a complete list of changes, see the ChangeLog file.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 6.7 04-Jul-06
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
The main additions to this release are the ability to use the same name for
 | 
			
		||||
multiple sets of parentheses, and support for CRLF line endings in both the
 | 
			
		||||
library and pcregrep (and in pcretest for testing).
 | 
			
		||||
 | 
			
		||||
Thanks to Ian Taylor, the stack usage for many kinds of pattern has been
 | 
			
		||||
significantly reduced for certain subject strings.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 6.5 01-Feb-06
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
Important changes in this release:
 | 
			
		||||
 | 
			
		||||
1. A number of new features have been added to pcregrep.
 | 
			
		||||
 | 
			
		||||
2. The Unicode property tables have been updated to Unicode 4.1.0, and the
 | 
			
		||||
   supported properties have been extended with script names such as "Arabic",
 | 
			
		||||
   and the derived properties "Any" and "L&". This has necessitated a change to
 | 
			
		||||
   the interal format of compiled patterns. Any saved compiled patterns that
 | 
			
		||||
   use \p or \P must be recompiled.
 | 
			
		||||
 | 
			
		||||
3. The specification of recursion in patterns has been changed so that all
 | 
			
		||||
   recursive subpatterns are automatically treated as atomic groups. Thus, for
 | 
			
		||||
   example, (?R) is treated as if it were (?>(?R)). This is necessary because
 | 
			
		||||
   otherwise there are situations where recursion does not work.
 | 
			
		||||
 | 
			
		||||
See the ChangeLog for a complete list of changes, which include a number of bug
 | 
			
		||||
fixes and tidies.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 6.0 07-Jun-05
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
The release number has been increased to 6.0 because of the addition of several
 | 
			
		||||
major new pieces of functionality.
 | 
			
		||||
 | 
			
		||||
A new function, pcre_dfa_exec(), which implements pattern matching using a DFA
 | 
			
		||||
algorithm, has been added. This has a number of advantages for certain cases,
 | 
			
		||||
though it does run more slowly, and lacks the ability to capture substrings. On
 | 
			
		||||
the other hand, it does find all matches, not just the first, and it works
 | 
			
		||||
better for partial matching. The pcrematching man page discusses the
 | 
			
		||||
differences.
 | 
			
		||||
 | 
			
		||||
The pcretest program has been enhanced so that it can make use of the new
 | 
			
		||||
pcre_dfa_exec() matching function and the extra features it provides.
 | 
			
		||||
 | 
			
		||||
The distribution now includes a C++ wrapper library. This is built
 | 
			
		||||
automatically if a C++ compiler is found. The pcrecpp man page discusses this
 | 
			
		||||
interface.
 | 
			
		||||
 | 
			
		||||
The code itself has been re-organized into many more files, one for each
 | 
			
		||||
function, so it no longer requires everything to be linked in when static
 | 
			
		||||
linkage is used. As a consequence, some internal functions have had to have
 | 
			
		||||
their names exposed. These functions all have names starting with _pcre_. They
 | 
			
		||||
are undocumented, and are not intended for use by outside callers.
 | 
			
		||||
 | 
			
		||||
The pcregrep program has been enhanced with new functionality such as
 | 
			
		||||
multiline-matching and options for output more matching context. See the
 | 
			
		||||
ChangeLog for a complete list of changes to the library and the utility
 | 
			
		||||
programs.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 5.0 13-Sep-04
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
The licence under which PCRE is released has been changed to the more
 | 
			
		||||
conventional "BSD" licence.
 | 
			
		||||
 | 
			
		||||
In the code, some bugs have been fixed, and there are also some major changes
 | 
			
		||||
in this release (which is why I've increased the number to 5.0). Some changes
 | 
			
		||||
are internal rearrangements, and some provide a number of new facilities. The
 | 
			
		||||
new features are:
 | 
			
		||||
 | 
			
		||||
1. There's an "automatic callout" feature that inserts callouts before every
 | 
			
		||||
   item in the regex, and there's a new callout field that gives the position
 | 
			
		||||
   in the pattern - useful for debugging and tracing.
 | 
			
		||||
 | 
			
		||||
2. The extra_data structure can now be used to pass in a set of character
 | 
			
		||||
   tables at exec time. This is useful if compiled regex are saved and re-used
 | 
			
		||||
   at a later time when the tables may not be at the same address. If the
 | 
			
		||||
   default internal tables are used, the pointer saved with the compiled
 | 
			
		||||
   pattern is now set to NULL, which means that you don't need to do anything
 | 
			
		||||
   special unless you are using custom tables.
 | 
			
		||||
 | 
			
		||||
3. It is possible, with some restrictions on the content of the regex, to
 | 
			
		||||
   request "partial" matching. A special return code is given if all of the
 | 
			
		||||
   subject string matched part of the regex. This could be useful for testing
 | 
			
		||||
   an input field as it is being typed.
 | 
			
		||||
 | 
			
		||||
4. There is now some optional support for Unicode character properties, which
 | 
			
		||||
   means that the patterns items such as \p{Lu} and \X can now be used. Only
 | 
			
		||||
   the general category properties are supported. If PCRE is compiled with this
 | 
			
		||||
   support, an additional 90K data structure is include, which increases the
 | 
			
		||||
   size of the library dramatically.
 | 
			
		||||
 | 
			
		||||
5. There is support for saving compiled patterns and re-using them later.
 | 
			
		||||
 | 
			
		||||
6. There is support for running regular expressions that were compiled on a
 | 
			
		||||
   different host with the opposite endianness.
 | 
			
		||||
 | 
			
		||||
7. The pcretest program has been extended to accommodate the new features.
 | 
			
		||||
 | 
			
		||||
The main internal rearrangement is that sequences of literal characters are no
 | 
			
		||||
longer handled as strings. Instead, each character is handled on its own. This
 | 
			
		||||
makes some UTF-8 handling easier, and makes the support of partial matching
 | 
			
		||||
possible. Compiled patterns containing long literal strings will be larger as a
 | 
			
		||||
result of this change; I hope that performance will not be much affected.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 4.5 01-Dec-03
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
Again mainly a bug-fix and tidying release, with only a couple of new features:
 | 
			
		||||
 | 
			
		||||
1. It's possible now to compile PCRE so that it does not use recursive
 | 
			
		||||
function calls when matching. Instead it gets memory from the heap. This slows
 | 
			
		||||
things down, but may be necessary on systems with limited stacks.
 | 
			
		||||
 | 
			
		||||
2. UTF-8 string checking has been tightened to reject overlong sequences and to
 | 
			
		||||
check that a starting offset points to the start of a character. Failure of the
 | 
			
		||||
latter returns a new error code: PCRE_ERROR_BADUTF8_OFFSET.
 | 
			
		||||
 | 
			
		||||
3. PCRE can now be compiled for systems that use EBCDIC code.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 4.4 21-Aug-03
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
This is mainly a bug-fix and tidying release. The only new feature is that PCRE
 | 
			
		||||
checks UTF-8 strings for validity by default. There is an option to suppress
 | 
			
		||||
this, just in case anybody wants that teeny extra bit of performance.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Releases 4.1 - 4.3
 | 
			
		||||
------------------
 | 
			
		||||
 | 
			
		||||
Sorry, I forgot about updating the NEWS file for these releases. Please take a
 | 
			
		||||
look at ChangeLog.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 4.0 17-Feb-03
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
There have been a lot of changes for the 4.0 release, adding additional
 | 
			
		||||
functionality and mending bugs. Below is a list of the highlights of the new
 | 
			
		||||
functionality. For full details of these features, please consult the
 | 
			
		||||
documentation. For a complete list of changes, see the ChangeLog file.
 | 
			
		||||
 | 
			
		||||
1. Support for Perl's \Q...\E escapes.
 | 
			
		||||
 | 
			
		||||
2. "Possessive quantifiers" ?+, *+, ++, and {,}+ which come from Sun's Java
 | 
			
		||||
package. They provide some syntactic sugar for simple cases of "atomic
 | 
			
		||||
grouping".
 | 
			
		||||
 | 
			
		||||
3. Support for the \G assertion. It is true when the current matching position
 | 
			
		||||
is at the start point of the match.
 | 
			
		||||
 | 
			
		||||
4. A new feature that provides some of the functionality that Perl provides
 | 
			
		||||
with (?{...}). The facility is termed a "callout". The way it is done in PCRE
 | 
			
		||||
is for the caller to provide an optional function, by setting pcre_callout to
 | 
			
		||||
its entry point. To get the function called, the regex must include (?C) at
 | 
			
		||||
appropriate points.
 | 
			
		||||
 | 
			
		||||
5. Support for recursive calls to individual subpatterns. This makes it really
 | 
			
		||||
easy to get totally confused.
 | 
			
		||||
 | 
			
		||||
6. Support for named subpatterns. The Python syntax (?P<name>...) is used to
 | 
			
		||||
name a group.
 | 
			
		||||
 | 
			
		||||
7. Several extensions to UTF-8 support; it is now fairly complete. There is an
 | 
			
		||||
option for pcregrep to make it operate in UTF-8 mode.
 | 
			
		||||
 | 
			
		||||
8. The single man page has been split into a number of separate man pages.
 | 
			
		||||
These also give rise to individual HTML pages which are put in a separate
 | 
			
		||||
directory. There is an index.html page that lists them all. Some hyperlinking
 | 
			
		||||
between the pages has been installed.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 3.5 15-Aug-01
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
1. The configuring system has been upgraded to use later versions of autoconf
 | 
			
		||||
and libtool. By default it builds both a shared and a static library if the OS
 | 
			
		||||
supports it. You can use --disable-shared or --disable-static on the configure
 | 
			
		||||
command if you want only one of them.
 | 
			
		||||
 | 
			
		||||
2. The pcretest utility is now installed along with pcregrep because it is
 | 
			
		||||
useful for users (to test regexs) and by doing this, it automatically gets
 | 
			
		||||
relinked by libtool. The documentation has been turned into a man page, so
 | 
			
		||||
there are now .1, .txt, and .html versions in /doc.
 | 
			
		||||
 | 
			
		||||
3. Upgrades to pcregrep:
 | 
			
		||||
   (i)   Added long-form option names like gnu grep.
 | 
			
		||||
   (ii)  Added --help to list all options with an explanatory phrase.
 | 
			
		||||
   (iii) Added -r, --recursive to recurse into sub-directories.
 | 
			
		||||
   (iv)  Added -f, --file to read patterns from a file.
 | 
			
		||||
 | 
			
		||||
4. Added --enable-newline-is-cr and --enable-newline-is-lf to the configure
 | 
			
		||||
script, to force use of CR or LF instead of \n in the source. On non-Unix
 | 
			
		||||
systems, the value can be set in config.h.
 | 
			
		||||
 | 
			
		||||
5. The limit of 200 on non-capturing parentheses is a _nesting_ limit, not an
 | 
			
		||||
absolute limit. Changed the text of the error message to make this clear, and
 | 
			
		||||
likewise updated the man page.
 | 
			
		||||
 | 
			
		||||
6. The limit of 99 on the number of capturing subpatterns has been removed.
 | 
			
		||||
The new limit is 65535, which I hope will not be a "real" limit.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 3.3 01-Aug-00
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
There is some support for UTF-8 character strings. This is incomplete and
 | 
			
		||||
experimental. The documentation describes what is and what is not implemented.
 | 
			
		||||
Otherwise, this is just a bug-fixing release.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Release 3.0 01-Feb-00
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
1. A "configure" script is now used to configure PCRE for Unix systems. It
 | 
			
		||||
builds a Makefile, a config.h file, and the pcre-config script.
 | 
			
		||||
 | 
			
		||||
2. PCRE is built as a shared library by default.
 | 
			
		||||
 | 
			
		||||
3. There is support for POSIX classes such as [:alpha:].
 | 
			
		||||
 | 
			
		||||
5. There is an experimental recursion feature.
 | 
			
		||||
 | 
			
		||||
----------------------------------------------------------------------------
 | 
			
		||||
          IMPORTANT FOR THOSE UPGRADING FROM VERSIONS BEFORE 2.00
 | 
			
		||||
 | 
			
		||||
Please note that there has been a change in the API such that a larger
 | 
			
		||||
ovector is required at matching time, to provide some additional workspace.
 | 
			
		||||
The new man page has details. This change was necessary in order to support
 | 
			
		||||
some of the new functionality in Perl 5.005.
 | 
			
		||||
 | 
			
		||||
          IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.00
 | 
			
		||||
 | 
			
		||||
Another (I hope this is the last!) change has been made to the API for the
 | 
			
		||||
pcre_compile() function. An additional argument has been added to make it
 | 
			
		||||
possible to pass over a pointer to character tables built in the current
 | 
			
		||||
locale by pcre_maketables(). To use the default tables, this new argument
 | 
			
		||||
should be passed as NULL.
 | 
			
		||||
 | 
			
		||||
          IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05
 | 
			
		||||
 | 
			
		||||
Yet another (and again I hope this really is the last) change has been made
 | 
			
		||||
to the API for the pcre_exec() function. An additional argument has been
 | 
			
		||||
added to make it possible to start the match other than at the start of the
 | 
			
		||||
subject string. This is important if there are lookbehinds. The new man
 | 
			
		||||
page has the details, but you just want to convert existing programs, all
 | 
			
		||||
you need to do is to stick in a new fifth argument to pcre_exec(), with a
 | 
			
		||||
value of zero. For example, change
 | 
			
		||||
 | 
			
		||||
  pcre_exec(pattern, extra, subject, length, options, ovec, ovecsize)
 | 
			
		||||
to
 | 
			
		||||
  pcre_exec(pattern, extra, subject, length, 0, options, ovec, ovecsize)
 | 
			
		||||
 | 
			
		||||
****
 | 
			
		||||
							
								
								
									
										764
									
								
								tools/pcre/NON-AUTOTOOLS-BUILD
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										764
									
								
								tools/pcre/NON-AUTOTOOLS-BUILD
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,764 @@
 | 
			
		||||
Building PCRE without using autotools
 | 
			
		||||
-------------------------------------
 | 
			
		||||
 | 
			
		||||
This document contains the following sections:
 | 
			
		||||
 | 
			
		||||
  General
 | 
			
		||||
  Generic instructions for the PCRE C library
 | 
			
		||||
  The C++ wrapper functions
 | 
			
		||||
  Building for virtual Pascal
 | 
			
		||||
  Stack size in Windows environments
 | 
			
		||||
  Linking programs in Windows environments
 | 
			
		||||
  Calling conventions in Windows environments
 | 
			
		||||
  Comments about Win32 builds
 | 
			
		||||
  Building PCRE on Windows with CMake
 | 
			
		||||
  Use of relative paths with CMake on Windows
 | 
			
		||||
  Testing with RunTest.bat
 | 
			
		||||
  Building under Windows CE with Visual Studio 200x
 | 
			
		||||
  Building under Windows with BCC5.5
 | 
			
		||||
  Building using Borland C++ Builder 2007 (CB2007) and higher
 | 
			
		||||
  Building PCRE on OpenVMS
 | 
			
		||||
  Building PCRE on Stratus OpenVOS
 | 
			
		||||
  Building PCRE on native z/OS and z/VM
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
GENERAL
 | 
			
		||||
 | 
			
		||||
I (Philip Hazel) have no experience of Windows or VMS sytems and how their
 | 
			
		||||
libraries work. The items in the PCRE distribution and Makefile that relate to
 | 
			
		||||
anything other than Linux systems are untested by me.
 | 
			
		||||
 | 
			
		||||
There are some other comments and files (including some documentation in CHM
 | 
			
		||||
format) in the Contrib directory on the FTP site:
 | 
			
		||||
 | 
			
		||||
  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
 | 
			
		||||
 | 
			
		||||
The basic PCRE library consists entirely of code written in Standard C, and so
 | 
			
		||||
should compile successfully on any system that has a Standard C compiler and
 | 
			
		||||
library. The C++ wrapper functions are a separate issue (see below).
 | 
			
		||||
 | 
			
		||||
The PCRE distribution includes a "configure" file for use by the configure/make
 | 
			
		||||
(autotools) build system, as found in many Unix-like environments. The README
 | 
			
		||||
file contains information about the options for "configure".
 | 
			
		||||
 | 
			
		||||
There is also support for CMake, which some users prefer, especially in Windows
 | 
			
		||||
environments, though it can also be run in Unix-like environments. See the
 | 
			
		||||
section entitled "Building PCRE on Windows with CMake" below.
 | 
			
		||||
 | 
			
		||||
Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
 | 
			
		||||
names config.h.generic and pcre.h.generic. These are provided for those who
 | 
			
		||||
build PCRE without using "configure" or CMake. If you use "configure" or CMake,
 | 
			
		||||
the .generic versions are not used.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
 | 
			
		||||
 | 
			
		||||
The following are generic instructions for building the PCRE C library "by
 | 
			
		||||
hand". If you are going to use CMake, this section does not apply to you; you
 | 
			
		||||
can skip ahead to the CMake section.
 | 
			
		||||
 | 
			
		||||
 (1) Copy or rename the file config.h.generic as config.h, and edit the macro
 | 
			
		||||
     settings that it contains to whatever is appropriate for your environment.
 | 
			
		||||
 | 
			
		||||
     In particular, you can alter the definition of the NEWLINE macro to
 | 
			
		||||
     specify what character(s) you want to be interpreted as line terminators.
 | 
			
		||||
     In an EBCDIC environment, you MUST change NEWLINE, because its default
 | 
			
		||||
     value is 10, an ASCII LF. The usual EBCDIC newline character is 21 (0x15,
 | 
			
		||||
     NL), though in some cases it may be 37 (0x25).
 | 
			
		||||
 | 
			
		||||
     When you compile any of the PCRE modules, you must specify -DHAVE_CONFIG_H
 | 
			
		||||
     to your compiler so that config.h is included in the sources.
 | 
			
		||||
 | 
			
		||||
     An alternative approach is not to edit config.h, but to use -D on the
 | 
			
		||||
     compiler command line to make any changes that you need to the
 | 
			
		||||
     configuration options. In this case -DHAVE_CONFIG_H must not be set.
 | 
			
		||||
 | 
			
		||||
     NOTE: There have been occasions when the way in which certain parameters
 | 
			
		||||
     in config.h are used has changed between releases. (In the configure/make
 | 
			
		||||
     world, this is handled automatically.) When upgrading to a new release,
 | 
			
		||||
     you are strongly advised to review config.h.generic before re-using what
 | 
			
		||||
     you had previously.
 | 
			
		||||
 | 
			
		||||
 (2) Copy or rename the file pcre.h.generic as pcre.h.
 | 
			
		||||
 | 
			
		||||
 (3) EITHER:
 | 
			
		||||
       Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
 | 
			
		||||
 | 
			
		||||
     OR:
 | 
			
		||||
       Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if
 | 
			
		||||
       you have set up config.h), and then run it with the single argument
 | 
			
		||||
       "pcre_chartables.c". This generates a set of standard character tables
 | 
			
		||||
       and writes them to that file. The tables are generated using the default
 | 
			
		||||
       C locale for your system. If you want to use a locale that is specified
 | 
			
		||||
       by LC_xxx environment variables, add the -L option to the dftables
 | 
			
		||||
       command. You must use this method if you are building on a system that
 | 
			
		||||
       uses EBCDIC code.
 | 
			
		||||
 | 
			
		||||
     The tables in pcre_chartables.c are defaults. The caller of PCRE can
 | 
			
		||||
     specify alternative tables at run time.
 | 
			
		||||
 | 
			
		||||
 (4) Ensure that you have the following header files:
 | 
			
		||||
 | 
			
		||||
       pcre_internal.h
 | 
			
		||||
       ucp.h
 | 
			
		||||
 | 
			
		||||
 (5) For an 8-bit library, compile the following source files, setting
 | 
			
		||||
     -DHAVE_CONFIG_H as a compiler option if you have set up config.h with your
 | 
			
		||||
     configuration, or else use other -D settings to change the configuration
 | 
			
		||||
     as required.
 | 
			
		||||
 | 
			
		||||
       pcre_byte_order.c
 | 
			
		||||
       pcre_chartables.c
 | 
			
		||||
       pcre_compile.c
 | 
			
		||||
       pcre_config.c
 | 
			
		||||
       pcre_dfa_exec.c
 | 
			
		||||
       pcre_exec.c
 | 
			
		||||
       pcre_fullinfo.c
 | 
			
		||||
       pcre_get.c
 | 
			
		||||
       pcre_globals.c
 | 
			
		||||
       pcre_jit_compile.c
 | 
			
		||||
       pcre_maketables.c
 | 
			
		||||
       pcre_newline.c
 | 
			
		||||
       pcre_ord2utf8.c
 | 
			
		||||
       pcre_refcount.c
 | 
			
		||||
       pcre_string_utils.c
 | 
			
		||||
       pcre_study.c
 | 
			
		||||
       pcre_tables.c
 | 
			
		||||
       pcre_ucd.c
 | 
			
		||||
       pcre_valid_utf8.c
 | 
			
		||||
       pcre_version.c
 | 
			
		||||
       pcre_xclass.c
 | 
			
		||||
 | 
			
		||||
     Make sure that you include -I. in the compiler command (or equivalent for
 | 
			
		||||
     an unusual compiler) so that all included PCRE header files are first
 | 
			
		||||
     sought in the current directory. Otherwise you run the risk of picking up
 | 
			
		||||
     a previously-installed file from somewhere else.
 | 
			
		||||
 | 
			
		||||
     Note that you must still compile pcre_jit_compile.c, even if you have not
 | 
			
		||||
     defined SUPPORT_JIT in config.h, because when JIT support is not
 | 
			
		||||
     configured, dummy functions are compiled. When JIT support IS configured,
 | 
			
		||||
     pcre_jit_compile.c #includes sources from the sljit subdirectory, where
 | 
			
		||||
     there should be 16 files, all of whose names begin with "sljit".
 | 
			
		||||
 | 
			
		||||
 (6) Now link all the compiled code into an object library in whichever form
 | 
			
		||||
     your system keeps such libraries. This is the basic PCRE C 8-bit library.
 | 
			
		||||
     If your system has static and shared libraries, you may have to do this
 | 
			
		||||
     once for each type.
 | 
			
		||||
 | 
			
		||||
 (7) If you want to build a 16-bit library (as well as, or instead of the 8-bit
 | 
			
		||||
     or 32-bit libraries) repeat steps 5-6 with the following files:
 | 
			
		||||
 | 
			
		||||
       pcre16_byte_order.c
 | 
			
		||||
       pcre16_chartables.c
 | 
			
		||||
       pcre16_compile.c
 | 
			
		||||
       pcre16_config.c
 | 
			
		||||
       pcre16_dfa_exec.c
 | 
			
		||||
       pcre16_exec.c
 | 
			
		||||
       pcre16_fullinfo.c
 | 
			
		||||
       pcre16_get.c
 | 
			
		||||
       pcre16_globals.c
 | 
			
		||||
       pcre16_jit_compile.c
 | 
			
		||||
       pcre16_maketables.c
 | 
			
		||||
       pcre16_newline.c
 | 
			
		||||
       pcre16_ord2utf16.c
 | 
			
		||||
       pcre16_refcount.c
 | 
			
		||||
       pcre16_string_utils.c
 | 
			
		||||
       pcre16_study.c
 | 
			
		||||
       pcre16_tables.c
 | 
			
		||||
       pcre16_ucd.c
 | 
			
		||||
       pcre16_utf16_utils.c
 | 
			
		||||
       pcre16_valid_utf16.c
 | 
			
		||||
       pcre16_version.c
 | 
			
		||||
       pcre16_xclass.c
 | 
			
		||||
 | 
			
		||||
 (8) If you want to build a 32-bit library (as well as, or instead of the 8-bit
 | 
			
		||||
     or 16-bit libraries) repeat steps 5-6 with the following files:
 | 
			
		||||
 | 
			
		||||
       pcre32_byte_order.c
 | 
			
		||||
       pcre32_chartables.c
 | 
			
		||||
       pcre32_compile.c
 | 
			
		||||
       pcre32_config.c
 | 
			
		||||
       pcre32_dfa_exec.c
 | 
			
		||||
       pcre32_exec.c
 | 
			
		||||
       pcre32_fullinfo.c
 | 
			
		||||
       pcre32_get.c
 | 
			
		||||
       pcre32_globals.c
 | 
			
		||||
       pcre32_jit_compile.c
 | 
			
		||||
       pcre32_maketables.c
 | 
			
		||||
       pcre32_newline.c
 | 
			
		||||
       pcre32_ord2utf32.c
 | 
			
		||||
       pcre32_refcount.c
 | 
			
		||||
       pcre32_string_utils.c
 | 
			
		||||
       pcre32_study.c
 | 
			
		||||
       pcre32_tables.c
 | 
			
		||||
       pcre32_ucd.c
 | 
			
		||||
       pcre32_utf32_utils.c
 | 
			
		||||
       pcre32_valid_utf32.c
 | 
			
		||||
       pcre32_version.c
 | 
			
		||||
       pcre32_xclass.c
 | 
			
		||||
 | 
			
		||||
 (9) If you want to build the POSIX wrapper functions (which apply only to the
 | 
			
		||||
     8-bit library), ensure that you have the pcreposix.h file and then compile
 | 
			
		||||
     pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result
 | 
			
		||||
     (on its own) as the pcreposix library.
 | 
			
		||||
 | 
			
		||||
(10) The pcretest program can be linked with any combination of the 8-bit,
 | 
			
		||||
     16-bit and 32-bit libraries (depending on what you selected in config.h).
 | 
			
		||||
     Compile pcretest.c and pcre_printint.c (again, don't forget
 | 
			
		||||
     -DHAVE_CONFIG_H) and link them together with the appropriate library/ies.
 | 
			
		||||
     If you compiled an 8-bit library, pcretest also needs the pcreposix
 | 
			
		||||
     wrapper library unless you compiled it with -DNOPOSIX.
 | 
			
		||||
 | 
			
		||||
(11) Run pcretest on the testinput files in the testdata directory, and check
 | 
			
		||||
     that the output matches the corresponding testoutput files. There are
 | 
			
		||||
     comments about what each test does in the section entitled "Testing PCRE"
 | 
			
		||||
     in the README file. If you compiled more than one of the 8-bit, 16-bit and
 | 
			
		||||
     32-bit libraries, you need to run pcretest with the -16 option to do
 | 
			
		||||
     16-bit tests and with the -32 option to do 32-bit tests.
 | 
			
		||||
 | 
			
		||||
     Some tests are relevant only when certain build-time options are selected.
 | 
			
		||||
     For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run
 | 
			
		||||
     if you have built PCRE without it. See the comments at the start of each
 | 
			
		||||
     testinput file. If you have a suitable Unix-like shell, the RunTest script
 | 
			
		||||
     will run the appropriate tests for you. The command "RunTest list" will
 | 
			
		||||
     output a list of all the tests.
 | 
			
		||||
 | 
			
		||||
     Note that the supplied files are in Unix format, with just LF characters
 | 
			
		||||
     as line terminators. You may need to edit them to change this if your
 | 
			
		||||
     system uses a different convention. If you are using Windows, you probably
 | 
			
		||||
     should use the wintestinput3 file instead of testinput3 (and the
 | 
			
		||||
     corresponding output file). This is a locale test; wintestinput3 sets the
 | 
			
		||||
     locale to "french" rather than "fr_FR", and there some minor output
 | 
			
		||||
     differences.
 | 
			
		||||
 | 
			
		||||
(12) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested
 | 
			
		||||
     by the testdata files. However, you might also like to build and run
 | 
			
		||||
     the freestanding JIT test program, pcre_jit_test.c.
 | 
			
		||||
 | 
			
		||||
(13) If you want to use the pcregrep command, compile and link pcregrep.c; it
 | 
			
		||||
     uses only the basic 8-bit PCRE library (it does not need the pcreposix
 | 
			
		||||
     library).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
THE C++ WRAPPER FUNCTIONS
 | 
			
		||||
 | 
			
		||||
The PCRE distribution also contains some C++ wrapper functions and tests,
 | 
			
		||||
applicable to the 8-bit library, which were contributed by Google Inc. On a
 | 
			
		||||
system that can use "configure" and "make", the functions are automatically
 | 
			
		||||
built into a library called pcrecpp. It should be straightforward to compile
 | 
			
		||||
the .cc files manually on other systems. The files called xxx_unittest.cc are
 | 
			
		||||
test programs for each of the corresponding xxx.cc files.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING FOR VIRTUAL PASCAL
 | 
			
		||||
 | 
			
		||||
A script for building PCRE using Borland's C++ compiler for use with VPASCAL
 | 
			
		||||
was contributed by Alexander Tokarev. Stefan Weber updated the script and added
 | 
			
		||||
additional files. The following files in the distribution are for building PCRE
 | 
			
		||||
for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
STACK SIZE IN WINDOWS ENVIRONMENTS
 | 
			
		||||
 | 
			
		||||
The default processor stack size of 1Mb in some Windows environments is too
 | 
			
		||||
small for matching patterns that need much recursion. In particular, test 2 may
 | 
			
		||||
fail because of this. Normally, running out of stack causes a crash, but there
 | 
			
		||||
have been cases where the test program has just died silently. See your linker
 | 
			
		||||
documentation for how to increase stack size if you experience problems. The
 | 
			
		||||
Linux default of 8Mb is a reasonable choice for the stack, though even that can
 | 
			
		||||
be too small for some pattern/subject combinations.
 | 
			
		||||
 | 
			
		||||
PCRE has a compile configuration option to disable the use of stack for
 | 
			
		||||
recursion so that heap is used instead. However, pattern matching is
 | 
			
		||||
significantly slower when this is done. There is more about stack usage in the
 | 
			
		||||
"pcrestack" documentation.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
 | 
			
		||||
 | 
			
		||||
If you want to statically link a program against a PCRE library in the form of
 | 
			
		||||
a non-dll .a file, you must define PCRE_STATIC before including pcre.h or
 | 
			
		||||
pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will
 | 
			
		||||
be declared __declspec(dllimport), with unwanted results.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
 | 
			
		||||
 | 
			
		||||
It is possible to compile programs to use different calling conventions using
 | 
			
		||||
MSVC. Search the web for "calling conventions" for more information. To make it
 | 
			
		||||
easier to change the calling convention for the exported functions in the
 | 
			
		||||
PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external
 | 
			
		||||
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
 | 
			
		||||
not set, it defaults to empty; the default calling convention is then used
 | 
			
		||||
(which is what is wanted most of the time).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE ON WINDOWS WITH CMAKE")
 | 
			
		||||
 | 
			
		||||
There are two ways of building PCRE using the "configure, make, make install"
 | 
			
		||||
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
 | 
			
		||||
the same thing; they are completely different from each other. There is also
 | 
			
		||||
support for building using CMake, which some users find a more straightforward
 | 
			
		||||
way of building PCRE under Windows.
 | 
			
		||||
 | 
			
		||||
The MinGW home page (http://www.mingw.org/) says this:
 | 
			
		||||
 | 
			
		||||
  MinGW: A collection of freely available and freely distributable Windows
 | 
			
		||||
  specific header files and import libraries combined with GNU toolsets that
 | 
			
		||||
  allow one to produce native Windows programs that do not rely on any
 | 
			
		||||
  3rd-party C runtime DLLs.
 | 
			
		||||
 | 
			
		||||
The Cygwin home page (http://www.cygwin.com/) says this:
 | 
			
		||||
 | 
			
		||||
  Cygwin is a Linux-like environment for Windows. It consists of two parts:
 | 
			
		||||
 | 
			
		||||
  . A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
 | 
			
		||||
    substantial Linux API functionality
 | 
			
		||||
 | 
			
		||||
  . A collection of tools which provide Linux look and feel.
 | 
			
		||||
 | 
			
		||||
  The Cygwin DLL currently works with all recent, commercially released x86 32
 | 
			
		||||
  bit and 64 bit versions of Windows, with the exception of Windows CE.
 | 
			
		||||
 | 
			
		||||
On both MinGW and Cygwin, PCRE should build correctly using:
 | 
			
		||||
 | 
			
		||||
  ./configure && make && make install
 | 
			
		||||
 | 
			
		||||
This should create two libraries called libpcre and libpcreposix, and, if you
 | 
			
		||||
have enabled building the C++ wrapper, a third one called libpcrecpp. These are
 | 
			
		||||
independent libraries: when you link with libpcreposix or libpcrecpp you must
 | 
			
		||||
also link with libpcre, which contains the basic functions. (Some earlier
 | 
			
		||||
releases of PCRE included the basic libpcre functions in libpcreposix. This no
 | 
			
		||||
longer happens.)
 | 
			
		||||
 | 
			
		||||
A user submitted a special-purpose patch that makes it easy to create
 | 
			
		||||
"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll"
 | 
			
		||||
as a special target. If you use this target, no other files are built, and in
 | 
			
		||||
particular, the pcretest and pcregrep programs are not built. An example of how
 | 
			
		||||
this might be used is:
 | 
			
		||||
 | 
			
		||||
  ./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll
 | 
			
		||||
 | 
			
		||||
Using Cygwin's compiler generates libraries and executables that depend on
 | 
			
		||||
cygwin1.dll. If a library that is generated this way is distributed,
 | 
			
		||||
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
 | 
			
		||||
licence, this forces not only PCRE to be under the GPL, but also the entire
 | 
			
		||||
application. A distributor who wants to keep their own code proprietary must
 | 
			
		||||
purchase an appropriate Cygwin licence.
 | 
			
		||||
 | 
			
		||||
MinGW has no such restrictions. The MinGW compiler generates a library or
 | 
			
		||||
executable that can run standalone on Windows without any third party dll or
 | 
			
		||||
licensing issues.
 | 
			
		||||
 | 
			
		||||
But there is more complication:
 | 
			
		||||
 | 
			
		||||
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
 | 
			
		||||
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
 | 
			
		||||
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
 | 
			
		||||
gcc and MinGW's gcc). So, a user can:
 | 
			
		||||
 | 
			
		||||
. Build native binaries by using MinGW or by getting Cygwin and using
 | 
			
		||||
  -mno-cygwin.
 | 
			
		||||
 | 
			
		||||
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
 | 
			
		||||
  compiler flags.
 | 
			
		||||
 | 
			
		||||
The test files that are supplied with PCRE are in UNIX format, with LF
 | 
			
		||||
characters as line terminators. Unless your PCRE library uses a default newline
 | 
			
		||||
option that includes LF as a valid newline, it may be necessary to change the
 | 
			
		||||
line terminators in the test files to get some of the tests to work.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING PCRE ON WINDOWS WITH CMAKE
 | 
			
		||||
 | 
			
		||||
CMake is an alternative configuration facility that can be used instead of
 | 
			
		||||
"configure". CMake creates project files (make files, solution files, etc.)
 | 
			
		||||
tailored to numerous development environments, including Visual Studio,
 | 
			
		||||
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
 | 
			
		||||
spaces in the names for your CMake installation and your PCRE source and build
 | 
			
		||||
directories.
 | 
			
		||||
 | 
			
		||||
The following instructions were contributed by a PCRE user. If they are not
 | 
			
		||||
followed exactly, errors may occur. In the event that errors do occur, it is
 | 
			
		||||
recommended that you delete the CMake cache before attempting to repeat the
 | 
			
		||||
CMake build process. In the CMake GUI, the cache can be deleted by selecting
 | 
			
		||||
"File > Delete Cache".
 | 
			
		||||
 | 
			
		||||
1.  Install the latest CMake version available from http://www.cmake.org/, and
 | 
			
		||||
    ensure that cmake\bin is on your path.
 | 
			
		||||
 | 
			
		||||
2.  Unzip (retaining folder structure) the PCRE source tree into a source
 | 
			
		||||
    directory such as C:\pcre. You should ensure your local date and time
 | 
			
		||||
    is not earlier than the file dates in your source dir if the release is
 | 
			
		||||
    very new.
 | 
			
		||||
 | 
			
		||||
3.  Create a new, empty build directory, preferably a subdirectory of the
 | 
			
		||||
    source dir. For example, C:\pcre\pcre-xx\build.
 | 
			
		||||
 | 
			
		||||
4.  Run cmake-gui from the Shell envirornment of your build tool, for example,
 | 
			
		||||
    Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
 | 
			
		||||
    to start Cmake from the Windows Start menu, as this can lead to errors.
 | 
			
		||||
 | 
			
		||||
5.  Enter C:\pcre\pcre-xx and C:\pcre\pcre-xx\build for the source and build
 | 
			
		||||
    directories, respectively.
 | 
			
		||||
 | 
			
		||||
6.  Hit the "Configure" button.
 | 
			
		||||
 | 
			
		||||
7.  Select the particular IDE / build tool that you are using (Visual
 | 
			
		||||
    Studio, MSYS makefiles, MinGW makefiles, etc.)
 | 
			
		||||
 | 
			
		||||
8.  The GUI will then list several configuration options. This is where
 | 
			
		||||
    you can enable UTF-8 support or other PCRE optional features.
 | 
			
		||||
 | 
			
		||||
9.  Hit "Configure" again. The adjacent "Generate" button should now be
 | 
			
		||||
    active.
 | 
			
		||||
 | 
			
		||||
10. Hit "Generate".
 | 
			
		||||
 | 
			
		||||
11. The build directory should now contain a usable build system, be it a
 | 
			
		||||
    solution file for Visual Studio, makefiles for MinGW, etc. Exit from
 | 
			
		||||
    cmake-gui and use the generated build system with your compiler or IDE.
 | 
			
		||||
    E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE
 | 
			
		||||
    solution, select the desired configuration (Debug, or Release, etc.) and
 | 
			
		||||
    build the ALL_BUILD project.
 | 
			
		||||
 | 
			
		||||
12. If during configuration with cmake-gui you've elected to build the test
 | 
			
		||||
    programs, you can execute them by building the test project. E.g., for
 | 
			
		||||
    MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
 | 
			
		||||
    most recent build configuration is targeted by the tests. A summary of
 | 
			
		||||
    test results is presented. Complete test output is subsequently
 | 
			
		||||
    available for review in Testing\Temporary under your build dir.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
 | 
			
		||||
 | 
			
		||||
A PCRE user comments as follows: I thought that others may want to know the
 | 
			
		||||
current state of CMAKE_USE_RELATIVE_PATHS support on Windows. Here it is:
 | 
			
		||||
 | 
			
		||||
-- AdditionalIncludeDirectories is only partially modified (only the
 | 
			
		||||
   first path - see below)
 | 
			
		||||
-- Only some of the contained file paths are modified - shown below for
 | 
			
		||||
   pcre.vcproj
 | 
			
		||||
-- It properly modifies
 | 
			
		||||
 | 
			
		||||
I am sure CMake people can fix that if they want to. Until then one will
 | 
			
		||||
need to replace existing absolute paths in project files with relative
 | 
			
		||||
paths manually (e.g. from VS) - relative to project file location. I did
 | 
			
		||||
just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big
 | 
			
		||||
deal.
 | 
			
		||||
 | 
			
		||||
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
 | 
			
		||||
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
 | 
			
		||||
 | 
			
		||||
RelativePath="pcre.h"
 | 
			
		||||
RelativePath="pcre_chartables.c"
 | 
			
		||||
RelativePath="pcre_chartables.c.rule"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
TESTING WITH RUNTEST.BAT
 | 
			
		||||
 | 
			
		||||
If configured with CMake, building the test project ("make test" or building
 | 
			
		||||
ALL_TESTS in Visual Studio) creates (and runs) pcre_test.bat (and depending
 | 
			
		||||
on your configuration options, possibly other test programs) in the build
 | 
			
		||||
directory. Pcre_test.bat runs RunTest.Bat with correct source and exe paths.
 | 
			
		||||
 | 
			
		||||
For manual testing with RunTest.bat, provided the build dir is a subdirectory
 | 
			
		||||
of the source directory: Open command shell window. Chdir to the location
 | 
			
		||||
of your pcretest.exe and pcregrep.exe programs. Call RunTest.bat with
 | 
			
		||||
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
 | 
			
		||||
 | 
			
		||||
To run only a particular test with RunTest.Bat provide a test number argument.
 | 
			
		||||
 | 
			
		||||
Otherwise:
 | 
			
		||||
 | 
			
		||||
1. Copy RunTest.bat into the directory where pcretest.exe and pcregrep.exe
 | 
			
		||||
   have been created.
 | 
			
		||||
 | 
			
		||||
2. Edit RunTest.bat to indentify the full or relative location of
 | 
			
		||||
   the pcre source (wherein which the testdata folder resides), e.g.:
 | 
			
		||||
 | 
			
		||||
   set srcdir=C:\pcre\pcre-8.20
 | 
			
		||||
 | 
			
		||||
3. In a Windows command environment, chdir to the location of your bat and
 | 
			
		||||
   exe programs.
 | 
			
		||||
 | 
			
		||||
4. Run RunTest.bat. Test outputs will automatically be compared to expected
 | 
			
		||||
   results, and discrepancies will be identified in the console output.
 | 
			
		||||
 | 
			
		||||
To independently test the just-in-time compiler, run pcre_jit_test.exe.
 | 
			
		||||
To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
 | 
			
		||||
pcre_scanner_unittest.exe.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
 | 
			
		||||
 | 
			
		||||
Vincent Richomme sent a zip archive of files to help with this process. They
 | 
			
		||||
can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
 | 
			
		||||
site.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING UNDER WINDOWS WITH BCC5.5
 | 
			
		||||
 | 
			
		||||
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
 | 
			
		||||
 | 
			
		||||
Some of the core BCC libraries have a version of PCRE from 1998 built in, which
 | 
			
		||||
can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a version
 | 
			
		||||
mismatch. I'm including an easy workaround below, if you'd like to include it
 | 
			
		||||
in the non-unix instructions:
 | 
			
		||||
 | 
			
		||||
When linking a project with BCC5.5, pcre.lib must be included before any of the
 | 
			
		||||
libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command line.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING USING BORLAND C++ BUILDER 2007 (CB2007) AND HIGHER
 | 
			
		||||
 | 
			
		||||
A PCRE user sent these comments about this environment (see also the comment
 | 
			
		||||
from another user that follows them):
 | 
			
		||||
 | 
			
		||||
The XE versions of C++ Builder come with a RegularExpressionsCore class which
 | 
			
		||||
contain a version of TPerlRegEx. However, direct use of the C PCRE library may
 | 
			
		||||
be desirable.
 | 
			
		||||
 | 
			
		||||
The default makevp.bat, however, supplied with PCRE builds a version of PCRE
 | 
			
		||||
that is not usable with any version of C++ Builder because the compiler ships
 | 
			
		||||
with an embedded version of PCRE, version 2.01 from 1998! [See also the note
 | 
			
		||||
about BCC5.5 above.] If you want to use PCRE you'll need to rename the
 | 
			
		||||
functions (pcre_compile to pcre_compile_bcc, etc) or do as I have done and just
 | 
			
		||||
use the 16 bit versions. I'm using std::wstring everywhere anyway. Since the
 | 
			
		||||
embedded version of PCRE does not have the 16 bit function names, there is no
 | 
			
		||||
conflict.
 | 
			
		||||
 | 
			
		||||
Building PCRE using a C++ Builder static library project file (recommended):
 | 
			
		||||
 | 
			
		||||
1. Rename or remove pcre.h, pcreposi.h, and pcreposix.h from your C++ Builder
 | 
			
		||||
original include path.
 | 
			
		||||
 | 
			
		||||
2. Download PCRE from pcre.org and extract to a directory.
 | 
			
		||||
 | 
			
		||||
3. Rename pcre_chartables.c.dist to pcre_chartables.c, pcre.h.generic to
 | 
			
		||||
pcre.h, and config.h.generic to config.h.
 | 
			
		||||
 | 
			
		||||
4. Edit pcre.h and pcre_config.c so that they include config.h.
 | 
			
		||||
 | 
			
		||||
5. Edit config.h like so:
 | 
			
		||||
 | 
			
		||||
Comment out the following lines:
 | 
			
		||||
#define PACKAGE "pcre"
 | 
			
		||||
#define PACKAGE_BUGREPORT ""
 | 
			
		||||
#define PACKAGE_NAME "PCRE"
 | 
			
		||||
#define PACKAGE_STRING "PCRE 8.32"
 | 
			
		||||
#define PACKAGE_TARNAME "pcre"
 | 
			
		||||
#define PACKAGE_URL ""
 | 
			
		||||
#define PACKAGE_VERSION "8.32"
 | 
			
		||||
 | 
			
		||||
Add the following lines:
 | 
			
		||||
#ifndef SUPPORT_UTF
 | 
			
		||||
#define SUPPORT_UTF 100 // any value is fine
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef SUPPORT_UCP
 | 
			
		||||
#define SUPPORT_UCP 101 // any value is fine
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef SUPPORT_UCP
 | 
			
		||||
#define SUPPORT_PCRE16 102 // any value is fine
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef SUPPORT_UTF8
 | 
			
		||||
#define SUPPORT_UTF8 103 // any value is fine
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
6. Build a C++ Builder project using the IDE. Go to File / New / Other and
 | 
			
		||||
choose Static Library. You can name it pcre.cbproj or whatever. Now set your
 | 
			
		||||
paths by going to Project / Options. Set the Include path. Do this from the
 | 
			
		||||
"Base" option to apply to both Release and Debug builds. Now add the following
 | 
			
		||||
files to the project:
 | 
			
		||||
 | 
			
		||||
pcre.h
 | 
			
		||||
pcre16_byte_order.c
 | 
			
		||||
pcre16_chartables.c
 | 
			
		||||
pcre16_compile.c
 | 
			
		||||
pcre16_config.c
 | 
			
		||||
pcre16_dfa_exec.c
 | 
			
		||||
pcre16_exec.c
 | 
			
		||||
pcre16_fullinfo.c
 | 
			
		||||
pcre16_get.c
 | 
			
		||||
pcre16_globals.c
 | 
			
		||||
pcre16_maketables.c
 | 
			
		||||
pcre16_newline.c
 | 
			
		||||
pcre16_ord2utf16.c
 | 
			
		||||
pcre16_printint.c
 | 
			
		||||
pcre16_refcount.c
 | 
			
		||||
pcre16_string_utils.c
 | 
			
		||||
pcre16_study.c
 | 
			
		||||
pcre16_tables.c
 | 
			
		||||
pcre16_ucd.c
 | 
			
		||||
pcre16_utf16_utils.c
 | 
			
		||||
pcre16_valid_utf16.c
 | 
			
		||||
pcre16_version.c
 | 
			
		||||
pcre16_xclass.c
 | 
			
		||||
 | 
			
		||||
//Optional
 | 
			
		||||
pcre_version.c
 | 
			
		||||
 | 
			
		||||
7. After compiling the .lib file, copy the .lib and header files to a project
 | 
			
		||||
you want to use PCRE with. Enjoy.
 | 
			
		||||
 | 
			
		||||
Optional ... Building PCRE using the makevp.bat file:
 | 
			
		||||
 | 
			
		||||
1. Edit makevp_c.txt and makevp_l.txt and change all the names to the 16 bit
 | 
			
		||||
versions.
 | 
			
		||||
 | 
			
		||||
2. Edit makevp.bat and set the path to C++ Builder. Run makevp.bat.
 | 
			
		||||
 | 
			
		||||
Another PCRE user added this comment:
 | 
			
		||||
 | 
			
		||||
Another approach I successfully used for some years with BCB 5 and 6 was to
 | 
			
		||||
make sure that include and library paths of PCRE are configured before the
 | 
			
		||||
default paths of the IDE in the dialogs where one can manage those paths.
 | 
			
		||||
Afterwards one can open the project files using a text editor and manually add
 | 
			
		||||
the self created library for pcre itself, pcrecpp doesn't ship with the IDE, in
 | 
			
		||||
the library nodes where the IDE manages its own libraries to link against in
 | 
			
		||||
front of the IDE-own libraries. This way one can use the default PCRE function
 | 
			
		||||
names without getting access violations on runtime.
 | 
			
		||||
 | 
			
		||||
  <ALLLIB value="libpcre.lib $(LIBFILES) $(LIBRARIES) import32.lib cp32mt.lib"/>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING PCRE ON OPENVMS
 | 
			
		||||
 | 
			
		||||
Stephen Hoffman sent the following, in December 2012:
 | 
			
		||||
 | 
			
		||||
"Here <http://labs.hoffmanlabs.com/node/1847> is a very short write-up on the
 | 
			
		||||
OpenVMS port and here
 | 
			
		||||
 | 
			
		||||
<http://labs.hoffmanlabs.com/labsnotes/pcre-vms-8_32.zip>
 | 
			
		||||
 | 
			
		||||
is a zip with the OpenVMS files, and with one modified testing-related PCRE
 | 
			
		||||
file." This is a port of PCRE 8.32.
 | 
			
		||||
 | 
			
		||||
Earlier, Dan Mooney sent the following comments about building PCRE on OpenVMS.
 | 
			
		||||
They relate to an older version of PCRE that used fewer source files, so the
 | 
			
		||||
exact commands will need changing. See the current list of source files above.
 | 
			
		||||
 | 
			
		||||
"It was quite easy to compile and link the library. I don't have a formal
 | 
			
		||||
make file but the attached file [reproduced below] contains the OpenVMS DCL
 | 
			
		||||
commands I used to build the library. I had to add #define
 | 
			
		||||
POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere.
 | 
			
		||||
 | 
			
		||||
The library was built on:
 | 
			
		||||
O/S: HP OpenVMS v7.3-1
 | 
			
		||||
Compiler: Compaq C v6.5-001-48BCD
 | 
			
		||||
Linker: vA13-01
 | 
			
		||||
 | 
			
		||||
The test results did not match 100% due to the issues you mention in your
 | 
			
		||||
documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I
 | 
			
		||||
modified some of the character tables temporarily and was able to get the
 | 
			
		||||
results to match. Tests using the fr locale did not match since I don't have
 | 
			
		||||
that locale loaded. The study size was always reported to be 3 less than the
 | 
			
		||||
value in the standard test output files."
 | 
			
		||||
 | 
			
		||||
=========================
 | 
			
		||||
$! This DCL procedure builds PCRE on OpenVMS
 | 
			
		||||
$!
 | 
			
		||||
$! I followed the instructions in the non-unix-use file in the distribution.
 | 
			
		||||
$!
 | 
			
		||||
$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES
 | 
			
		||||
$ COMPILE DFTABLES.C
 | 
			
		||||
$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ
 | 
			
		||||
$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C
 | 
			
		||||
$ COMPILE MAKETABLES.C
 | 
			
		||||
$ COMPILE GET.C
 | 
			
		||||
$ COMPILE STUDY.C
 | 
			
		||||
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
 | 
			
		||||
$! did not seem to be defined anywhere.
 | 
			
		||||
$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support.
 | 
			
		||||
$ COMPILE PCRE.C
 | 
			
		||||
$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ
 | 
			
		||||
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
 | 
			
		||||
$! did not seem to be defined anywhere.
 | 
			
		||||
$ COMPILE PCREPOSIX.C
 | 
			
		||||
$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ
 | 
			
		||||
$ COMPILE PCRETEST.C
 | 
			
		||||
$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB
 | 
			
		||||
$! C programs that want access to command line arguments must be
 | 
			
		||||
$! defined as a symbol
 | 
			
		||||
$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE"
 | 
			
		||||
$! Arguments must be enclosed in quotes.
 | 
			
		||||
$ PCRETEST "-C"
 | 
			
		||||
$! Test results:
 | 
			
		||||
$!
 | 
			
		||||
$!   The test results did not match 100%. The functions isprint(), iscntrl(),
 | 
			
		||||
$!   isgraph() and ispunct() on OpenVMS must not produce the same results
 | 
			
		||||
$!   as the system that built the test output files provided with the
 | 
			
		||||
$!   distribution.
 | 
			
		||||
$!
 | 
			
		||||
$!   The study size did not match and was always 3 less on OpenVMS.
 | 
			
		||||
$!
 | 
			
		||||
$!   Locale could not be set to fr
 | 
			
		||||
$!
 | 
			
		||||
=========================
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING PCRE ON STRATUS OPENVOS
 | 
			
		||||
 | 
			
		||||
These notes on the port of PCRE to VOS (lightly edited) were supplied by
 | 
			
		||||
Ashutosh Warikoo, whose email address has the local part awarikoo and the
 | 
			
		||||
domain nse.co.in. The port was for version 7.9 in August 2009.
 | 
			
		||||
 | 
			
		||||
1.   Building PCRE
 | 
			
		||||
 | 
			
		||||
I built pcre on OpenVOS Release 17.0.1at using GNU Tools 3.4a without any
 | 
			
		||||
problems. I used the following packages to build PCRE:
 | 
			
		||||
 | 
			
		||||
  ftp://ftp.stratus.com/pub/vos/posix/ga/posix.save.evf.gz
 | 
			
		||||
 | 
			
		||||
Please read and follow the instructions that come with these packages. To start
 | 
			
		||||
the build of pcre, from the root of the package type:
 | 
			
		||||
 | 
			
		||||
  ./build.sh
 | 
			
		||||
 | 
			
		||||
2. Installing PCRE
 | 
			
		||||
 | 
			
		||||
Once you have successfully built PCRE, login to the SysAdmin group, switch to
 | 
			
		||||
the root user, and type
 | 
			
		||||
 | 
			
		||||
  [ !create_dir (master_disk)>usr   --if needed ]
 | 
			
		||||
  [ !create_dir (master_disk)>usr>local   --if needed ]
 | 
			
		||||
    !gmake install
 | 
			
		||||
 | 
			
		||||
This installs PCRE and its man pages into /usr/local. You can add
 | 
			
		||||
(master_disk)>usr>local>bin to your command search paths, or if you are in
 | 
			
		||||
BASH, add /usr/local/bin to the PATH environment variable.
 | 
			
		||||
 | 
			
		||||
4. Restrictions
 | 
			
		||||
 | 
			
		||||
This port requires readline library optionally. However during the build I
 | 
			
		||||
faced some yet unexplored errors while linking with readline. As it was an
 | 
			
		||||
optional component I chose to disable it.
 | 
			
		||||
 | 
			
		||||
5. Known Problems
 | 
			
		||||
 | 
			
		||||
I ran the test suite, but you will have to be your own judge of whether this
 | 
			
		||||
command, and this port, suits your purposes. If you find any problems that
 | 
			
		||||
appear to be related to the port itself, please let me know. Please see the
 | 
			
		||||
build.log file in the root of the package also.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING PCRE ON NATIVE Z/OS AND Z/VM
 | 
			
		||||
 | 
			
		||||
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
 | 
			
		||||
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
 | 
			
		||||
applications can be supported through UNIX System Services, and in such an
 | 
			
		||||
environment PCRE can be built in the same way as in other systems. However, in
 | 
			
		||||
native z/OS (without UNIX System Services) and in z/VM, special ports are
 | 
			
		||||
required. For details, please see this web site:
 | 
			
		||||
 | 
			
		||||
  http://www.zaconsultants.net
 | 
			
		||||
 | 
			
		||||
There is also a mirror here:
 | 
			
		||||
 | 
			
		||||
  http://www.vsoft-software.com/downloads.html
 | 
			
		||||
 | 
			
		||||
==========================
 | 
			
		||||
Last Updated: 14 May 2013
 | 
			
		||||
							
								
								
									
										7
									
								
								tools/pcre/NON-UNIX-USE
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								tools/pcre/NON-UNIX-USE
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,7 @@
 | 
			
		||||
Compiling PCRE on non-Unix systems
 | 
			
		||||
----------------------------------
 | 
			
		||||
 | 
			
		||||
This has been renamed to better reflect its contents. Please see the file
 | 
			
		||||
NON-AUTOTOOLS-BUILD for details of how to build PCRE without using autotools.
 | 
			
		||||
 | 
			
		||||
####
 | 
			
		||||
							
								
								
									
										258
									
								
								tools/pcre/PrepareRelease
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										258
									
								
								tools/pcre/PrepareRelease
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,258 @@
 | 
			
		||||
#/bin/sh
 | 
			
		||||
 | 
			
		||||
# Script to prepare the files for building a PCRE release. It does some
 | 
			
		||||
# processing of the documentation, detrails files, and creates pcre.h.generic
 | 
			
		||||
# and config.h.generic (for use by builders who can't run ./configure).
 | 
			
		||||
 | 
			
		||||
# You must run this script before runnning "make dist". If its first argument
 | 
			
		||||
# is "doc", it stops after preparing the documentation. There are no other
 | 
			
		||||
# arguments. The script makes use of the following files:
 | 
			
		||||
 | 
			
		||||
# 132html     A Perl script that converts a .1 or .3 man page into HTML. It
 | 
			
		||||
#             "knows" the relevant troff constructs that are used in the PCRE
 | 
			
		||||
#             man pages.
 | 
			
		||||
 | 
			
		||||
# CheckMan    A Perl script that checks man pages for typos in the mark up.
 | 
			
		||||
 | 
			
		||||
# CleanTxt    A Perl script that cleans up the output of "nroff -man" by
 | 
			
		||||
#             removing backspaces and other redundant text so as to produce
 | 
			
		||||
#             a readable .txt file.
 | 
			
		||||
 | 
			
		||||
# Detrail     A Perl script that removes trailing spaces from files.
 | 
			
		||||
 | 
			
		||||
# doc/index.html.src
 | 
			
		||||
#             A file that is copied as index.html into the doc/html directory
 | 
			
		||||
#             when the HTML documentation is built. It works like this so that
 | 
			
		||||
#             doc/html can be deleted and re-created from scratch.
 | 
			
		||||
 | 
			
		||||
# README & NON-AUTOTOOLS-BUILD
 | 
			
		||||
#             These files are copied into the doc/html directory, with .txt
 | 
			
		||||
#             extensions so that they can by hyperlinked from the HTML 
 | 
			
		||||
#             documentation, because some people just go to the HTML without
 | 
			
		||||
#             looking for text files.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# First, sort out the documentation. Remove pcredemo.3 first because it won't
 | 
			
		||||
# pass the markup check (it is created below, using markup that none of the
 | 
			
		||||
# other pages use).
 | 
			
		||||
 | 
			
		||||
cd doc
 | 
			
		||||
echo Processing documentation
 | 
			
		||||
 | 
			
		||||
/bin/rm -f pcredemo.3
 | 
			
		||||
 | 
			
		||||
# Check the remaining man pages
 | 
			
		||||
 | 
			
		||||
perl ../CheckMan *.1 *.3
 | 
			
		||||
if [ $? != 0 ] ; then exit 1; fi
 | 
			
		||||
 | 
			
		||||
# Make Text form of the documentation. It needs some mangling to make it
 | 
			
		||||
# tidy for online reading. Concatenate all the .3 stuff, but omit the
 | 
			
		||||
# individual function pages.
 | 
			
		||||
 | 
			
		||||
cat <<End >pcre.txt
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
This file contains a concatenation of the PCRE man pages, converted to plain
 | 
			
		||||
text format for ease of searching with a text editor, or for use on systems
 | 
			
		||||
that do not have a man page processor. The small individual files that give
 | 
			
		||||
synopses of each function in the library have not been included. Neither has
 | 
			
		||||
the pcredemo program. There are separate text files for the pcregrep and
 | 
			
		||||
pcretest commands.
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
End
 | 
			
		||||
 | 
			
		||||
echo "Making pcre.txt"
 | 
			
		||||
for file in pcre pcre16 pcre32 pcrebuild pcrematching pcreapi pcrecallout \
 | 
			
		||||
            pcrecompat pcrepattern pcresyntax pcreunicode pcrejit pcrepartial \
 | 
			
		||||
            pcreprecompile pcreperform pcreposix pcrecpp pcresample \
 | 
			
		||||
            pcrelimits pcrestack ; do
 | 
			
		||||
  echo "  Processing $file.3"
 | 
			
		||||
  nroff -c -man $file.3 >$file.rawtxt
 | 
			
		||||
  perl ../CleanTxt <$file.rawtxt >>pcre.txt
 | 
			
		||||
  /bin/rm $file.rawtxt
 | 
			
		||||
  echo "------------------------------------------------------------------------------" >>pcre.txt
 | 
			
		||||
  if [ "$file" != "pcresample" ] ; then
 | 
			
		||||
    echo " " >>pcre.txt
 | 
			
		||||
    echo " " >>pcre.txt
 | 
			
		||||
  fi
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
# The three commands
 | 
			
		||||
for file in pcretest pcregrep pcre-config ; do
 | 
			
		||||
  echo Making $file.txt
 | 
			
		||||
  nroff -c -man $file.1 >$file.rawtxt
 | 
			
		||||
  perl ../CleanTxt <$file.rawtxt >$file.txt
 | 
			
		||||
  /bin/rm $file.rawtxt
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Make pcredemo.3 from the pcredemo.c source file
 | 
			
		||||
 | 
			
		||||
echo "Making pcredemo.3"
 | 
			
		||||
perl <<"END" >pcredemo.3
 | 
			
		||||
  open(IN, "../pcredemo.c") || die "Failed to open pcredemo.c\n";
 | 
			
		||||
  open(OUT, ">pcredemo.3") || die "Failed to open pcredemo.3\n";
 | 
			
		||||
  print OUT ".\\\" Start example.\n" .
 | 
			
		||||
            ".de EX\n" .
 | 
			
		||||
            ".  nr mE \\\\n(.f\n" .
 | 
			
		||||
            ".  nf\n" .
 | 
			
		||||
            ".  nh\n" .
 | 
			
		||||
            ".  ft CW\n" .
 | 
			
		||||
            "..\n" .
 | 
			
		||||
            ".\n" .
 | 
			
		||||
            ".\n" .
 | 
			
		||||
            ".\\\" End example.\n" .
 | 
			
		||||
            ".de EE\n" .
 | 
			
		||||
            ".  ft \\\\n(mE\n" .
 | 
			
		||||
            ".  fi\n" .
 | 
			
		||||
            ".  hy \\\\n(HY\n" .
 | 
			
		||||
            "..\n" .
 | 
			
		||||
            ".\n" .
 | 
			
		||||
            ".EX\n" ;
 | 
			
		||||
  while (<IN>)
 | 
			
		||||
    {
 | 
			
		||||
    s/\\/\\e/g;
 | 
			
		||||
    print OUT;
 | 
			
		||||
    }
 | 
			
		||||
  print OUT ".EE\n";
 | 
			
		||||
  close(IN);
 | 
			
		||||
  close(OUT);
 | 
			
		||||
END
 | 
			
		||||
if [ $? != 0 ] ; then exit 1; fi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Make HTML form of the documentation.
 | 
			
		||||
 | 
			
		||||
echo "Making HTML documentation"
 | 
			
		||||
/bin/rm html/*
 | 
			
		||||
cp index.html.src html/index.html
 | 
			
		||||
cp ../README html/README.txt
 | 
			
		||||
cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
 | 
			
		||||
 | 
			
		||||
for file in *.1 ; do
 | 
			
		||||
  base=`basename $file .1`
 | 
			
		||||
  echo "  Making $base.html"
 | 
			
		||||
  perl ../132html -toc $base <$file >html/$base.html
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
# Exclude table of contents for function summaries. It seems that expr
 | 
			
		||||
# forces an anchored regex. Also exclude them for small pages that have
 | 
			
		||||
# only one section.
 | 
			
		||||
 | 
			
		||||
for file in *.3 ; do
 | 
			
		||||
  base=`basename $file .3`
 | 
			
		||||
  toc=-toc
 | 
			
		||||
  if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
 | 
			
		||||
  if [ "$base" = "pcresample" ]  || \
 | 
			
		||||
     [ "$base" = "pcrestack" ]   || \
 | 
			
		||||
     [ "$base" = "pcrecompat" ]  || \
 | 
			
		||||
     [ "$base" = "pcrelimits" ]  || \
 | 
			
		||||
     [ "$base" = "pcreperform" ] || \
 | 
			
		||||
     [ "$base" = "pcreunicode" ] ; then
 | 
			
		||||
    toc=""
 | 
			
		||||
  fi
 | 
			
		||||
  echo "  Making $base.html"
 | 
			
		||||
  perl ../132html $toc $base <$file >html/$base.html
 | 
			
		||||
  if [ $? != 0 ] ; then exit 1; fi
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
# End of documentation processing; stop if only documentation required.
 | 
			
		||||
 | 
			
		||||
cd ..
 | 
			
		||||
echo Documentation done
 | 
			
		||||
if [ "$1" = "doc" ] ; then exit; fi
 | 
			
		||||
 | 
			
		||||
# These files are detrailed; do not detrail the test data because there may be
 | 
			
		||||
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
 | 
			
		||||
# line endings and the detrail script removes all trailing white space. The
 | 
			
		||||
# configure files are also omitted from the detrailing. We don't bother with
 | 
			
		||||
# those pcre[16|32]_xx files that just define COMPILE_PCRE16 and then #include the
 | 
			
		||||
# common file, because they aren't going to change.
 | 
			
		||||
 | 
			
		||||
files="\
 | 
			
		||||
  Makefile.am \
 | 
			
		||||
  Makefile.in \
 | 
			
		||||
  configure.ac \
 | 
			
		||||
  README \
 | 
			
		||||
  LICENCE \
 | 
			
		||||
  COPYING \
 | 
			
		||||
  AUTHORS \
 | 
			
		||||
  NEWS \
 | 
			
		||||
  NON-UNIX-USE \
 | 
			
		||||
  NON-AUTOTOOLS-BUILD \
 | 
			
		||||
  INSTALL \
 | 
			
		||||
  132html \
 | 
			
		||||
  CleanTxt \
 | 
			
		||||
  Detrail \
 | 
			
		||||
  ChangeLog \
 | 
			
		||||
  CMakeLists.txt \
 | 
			
		||||
  RunGrepTest \
 | 
			
		||||
  RunTest \
 | 
			
		||||
  pcre-config.in \
 | 
			
		||||
  libpcre.pc.in \
 | 
			
		||||
  libpcre16.pc.in \
 | 
			
		||||
  libpcre32.pc.in \
 | 
			
		||||
  libpcreposix.pc.in \
 | 
			
		||||
  libpcrecpp.pc.in \
 | 
			
		||||
  config.h.in \
 | 
			
		||||
  pcre_chartables.c.dist \
 | 
			
		||||
  pcredemo.c \
 | 
			
		||||
  pcregrep.c \
 | 
			
		||||
  pcretest.c \
 | 
			
		||||
  dftables.c \
 | 
			
		||||
  pcreposix.c \
 | 
			
		||||
  pcreposix.h \
 | 
			
		||||
  pcre.h.in \
 | 
			
		||||
  pcre_internal.h \
 | 
			
		||||
  pcre_byte_order.c \
 | 
			
		||||
  pcre_compile.c \
 | 
			
		||||
  pcre_config.c \
 | 
			
		||||
  pcre_dfa_exec.c \
 | 
			
		||||
  pcre_exec.c \
 | 
			
		||||
  pcre_fullinfo.c \
 | 
			
		||||
  pcre_get.c \
 | 
			
		||||
  pcre_globals.c \
 | 
			
		||||
  pcre_jit_compile.c \
 | 
			
		||||
  pcre_jit_test.c \
 | 
			
		||||
  pcre_maketables.c \
 | 
			
		||||
  pcre_newline.c \
 | 
			
		||||
  pcre_ord2utf8.c \
 | 
			
		||||
  pcre16_ord2utf16.c \
 | 
			
		||||
  pcre32_ord2utf32.c \
 | 
			
		||||
  pcre_printint.c \
 | 
			
		||||
  pcre_refcount.c \
 | 
			
		||||
  pcre_string_utils.c \
 | 
			
		||||
  pcre_study.c \
 | 
			
		||||
  pcre_tables.c \
 | 
			
		||||
  pcre_valid_utf8.c \
 | 
			
		||||
  pcre_version.c \
 | 
			
		||||
  pcre_xclass.c \
 | 
			
		||||
  pcre16_utf16_utils.c \
 | 
			
		||||
  pcre32_utf32_utils.c \
 | 
			
		||||
  pcre16_valid_utf16.c \
 | 
			
		||||
  pcre32_valid_utf32.c \
 | 
			
		||||
  pcre_scanner.cc \
 | 
			
		||||
  pcre_scanner.h \
 | 
			
		||||
  pcre_scanner_unittest.cc \
 | 
			
		||||
  pcrecpp.cc \
 | 
			
		||||
  pcrecpp.h \
 | 
			
		||||
  pcrecpparg.h.in \
 | 
			
		||||
  pcrecpp_unittest.cc \
 | 
			
		||||
  pcre_stringpiece.cc \
 | 
			
		||||
  pcre_stringpiece.h.in \
 | 
			
		||||
  pcre_stringpiece_unittest.cc \
 | 
			
		||||
  perltest.pl \
 | 
			
		||||
  ucp.h \
 | 
			
		||||
  makevp.bat \
 | 
			
		||||
  pcre.def \
 | 
			
		||||
  libpcre.def \
 | 
			
		||||
  libpcreposix.def"
 | 
			
		||||
 | 
			
		||||
echo Detrailing
 | 
			
		||||
perl ./Detrail $files doc/p* doc/html/*
 | 
			
		||||
 | 
			
		||||
echo Done
 | 
			
		||||
 | 
			
		||||
#End
 | 
			
		||||
							
								
								
									
										991
									
								
								tools/pcre/README
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										991
									
								
								tools/pcre/README
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,991 @@
 | 
			
		||||
README file for PCRE (Perl-compatible regular expression library)
 | 
			
		||||
-----------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
The latest release of PCRE is always available in three alternative formats
 | 
			
		||||
from:
 | 
			
		||||
 | 
			
		||||
  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
 | 
			
		||||
  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.bz2
 | 
			
		||||
  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
 | 
			
		||||
 | 
			
		||||
There is a mailing list for discussion about the development of PCRE at
 | 
			
		||||
pcre-dev@exim.org. You can access the archives and subscribe or manage your
 | 
			
		||||
subscription here:
 | 
			
		||||
 | 
			
		||||
   https://lists.exim.org/mailman/listinfo/pcre-dev
 | 
			
		||||
 | 
			
		||||
Please read the NEWS file if you are upgrading from a previous release.
 | 
			
		||||
The contents of this README file are:
 | 
			
		||||
 | 
			
		||||
  The PCRE APIs
 | 
			
		||||
  Documentation for PCRE
 | 
			
		||||
  Contributions by users of PCRE
 | 
			
		||||
  Building PCRE on non-Unix-like systems
 | 
			
		||||
  Building PCRE without using autotools
 | 
			
		||||
  Building PCRE using autotools
 | 
			
		||||
  Retrieving configuration information
 | 
			
		||||
  Shared libraries
 | 
			
		||||
  Cross-compiling using autotools
 | 
			
		||||
  Using HP's ANSI C++ compiler (aCC)
 | 
			
		||||
  Compiling in Tru64 using native compilers
 | 
			
		||||
  Using Sun's compilers for Solaris
 | 
			
		||||
  Using PCRE from MySQL
 | 
			
		||||
  Making new tarballs
 | 
			
		||||
  Testing PCRE
 | 
			
		||||
  Character tables
 | 
			
		||||
  File manifest
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
The PCRE APIs
 | 
			
		||||
-------------
 | 
			
		||||
 | 
			
		||||
PCRE is written in C, and it has its own API. There are three sets of
 | 
			
		||||
functions, one for the 8-bit library, which processes strings of bytes, one for
 | 
			
		||||
the 16-bit library, which processes strings of 16-bit values, and one for the
 | 
			
		||||
32-bit library, which processes strings of 32-bit values. The distribution also
 | 
			
		||||
includes a set of C++ wrapper functions (see the pcrecpp man page for details),
 | 
			
		||||
courtesy of Google Inc., which can be used to call the 8-bit PCRE library from
 | 
			
		||||
C++.
 | 
			
		||||
 | 
			
		||||
In addition, there is a set of C wrapper functions (again, just for the 8-bit
 | 
			
		||||
library) that are based on the POSIX regular expression API (see the pcreposix
 | 
			
		||||
man page). These end up in the library called libpcreposix. Note that this just
 | 
			
		||||
provides a POSIX calling interface to PCRE; the regular expressions themselves
 | 
			
		||||
still follow Perl syntax and semantics. The POSIX API is restricted, and does
 | 
			
		||||
not give full access to all of PCRE's facilities.
 | 
			
		||||
 | 
			
		||||
The header file for the POSIX-style functions is called pcreposix.h. The
 | 
			
		||||
official POSIX name is regex.h, but I did not want to risk possible problems
 | 
			
		||||
with existing files of that name by distributing it that way. To use PCRE with
 | 
			
		||||
an existing program that uses the POSIX API, pcreposix.h will have to be
 | 
			
		||||
renamed or pointed at by a link.
 | 
			
		||||
 | 
			
		||||
If you are using the POSIX interface to PCRE and there is already a POSIX regex
 | 
			
		||||
library installed on your system, as well as worrying about the regex.h header
 | 
			
		||||
file (as mentioned above), you must also take care when linking programs to
 | 
			
		||||
ensure that they link with PCRE's libpcreposix library. Otherwise they may pick
 | 
			
		||||
up the POSIX functions of the same name from the other library.
 | 
			
		||||
 | 
			
		||||
One way of avoiding this confusion is to compile PCRE with the addition of
 | 
			
		||||
-Dregcomp=PCREregcomp (and similarly for the other POSIX functions) to the
 | 
			
		||||
compiler flags (CFLAGS if you are using "configure" -- see below). This has the
 | 
			
		||||
effect of renaming the functions so that the names no longer clash. Of course,
 | 
			
		||||
you have to do the same thing for your applications, or write them using the
 | 
			
		||||
new names.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Documentation for PCRE
 | 
			
		||||
----------------------
 | 
			
		||||
 | 
			
		||||
If you install PCRE in the normal way on a Unix-like system, you will end up
 | 
			
		||||
with a set of man pages whose names all start with "pcre". The one that is just
 | 
			
		||||
called "pcre" lists all the others. In addition to these man pages, the PCRE
 | 
			
		||||
documentation is supplied in two other forms:
 | 
			
		||||
 | 
			
		||||
  1. There are files called doc/pcre.txt, doc/pcregrep.txt, and
 | 
			
		||||
     doc/pcretest.txt in the source distribution. The first of these is a
 | 
			
		||||
     concatenation of the text forms of all the section 3 man pages except
 | 
			
		||||
     the listing of pcredemo.c and those that summarize individual functions.
 | 
			
		||||
     The other two are the text forms of the section 1 man pages for the
 | 
			
		||||
     pcregrep and pcretest commands. These text forms are provided for ease of
 | 
			
		||||
     scanning with text editors or similar tools. They are installed in
 | 
			
		||||
     <prefix>/share/doc/pcre, where <prefix> is the installation prefix
 | 
			
		||||
     (defaulting to /usr/local).
 | 
			
		||||
 | 
			
		||||
  2. A set of files containing all the documentation in HTML form, hyperlinked
 | 
			
		||||
     in various ways, and rooted in a file called index.html, is distributed in
 | 
			
		||||
     doc/html and installed in <prefix>/share/doc/pcre/html.
 | 
			
		||||
 | 
			
		||||
Users of PCRE have contributed files containing the documentation for various
 | 
			
		||||
releases in CHM format. These can be found in the Contrib directory of the FTP
 | 
			
		||||
site (see next section).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Contributions by users of PCRE
 | 
			
		||||
------------------------------
 | 
			
		||||
 | 
			
		||||
You can find contributions from PCRE users in the directory
 | 
			
		||||
 | 
			
		||||
  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
 | 
			
		||||
 | 
			
		||||
There is a README file giving brief descriptions of what they are. Some are
 | 
			
		||||
complete in themselves; others are pointers to URLs containing relevant files.
 | 
			
		||||
Some of this material is likely to be well out-of-date. Several of the earlier
 | 
			
		||||
contributions provided support for compiling PCRE on various flavours of
 | 
			
		||||
Windows (I myself do not use Windows). Nowadays there is more Windows support
 | 
			
		||||
in the standard distribution, so these contibutions have been archived.
 | 
			
		||||
 | 
			
		||||
A PCRE user maintains downloadable Windows binaries of the pcregrep and
 | 
			
		||||
pcretest programs here:
 | 
			
		||||
 | 
			
		||||
  http://www.rexegg.com/pcregrep-pcretest.html
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Building PCRE on non-Unix-like systems
 | 
			
		||||
--------------------------------------
 | 
			
		||||
 | 
			
		||||
For a non-Unix-like system, please read the comments in the file
 | 
			
		||||
NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
 | 
			
		||||
"make" you may be able to build PCRE using autotools in the same way as for
 | 
			
		||||
many Unix-like systems.
 | 
			
		||||
 | 
			
		||||
PCRE can also be configured using the GUI facility provided by CMake's
 | 
			
		||||
cmake-gui command. This creates Makefiles, solution files, etc. The file
 | 
			
		||||
NON-AUTOTOOLS-BUILD has information about CMake.
 | 
			
		||||
 | 
			
		||||
PCRE has been compiled on many different operating systems. It should be
 | 
			
		||||
straightforward to build PCRE on any system that has a Standard C compiler and
 | 
			
		||||
library, because it uses only Standard C functions.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Building PCRE without using autotools
 | 
			
		||||
-------------------------------------
 | 
			
		||||
 | 
			
		||||
The use of autotools (in particular, libtool) is problematic in some
 | 
			
		||||
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
 | 
			
		||||
file for ways of building PCRE without using autotools.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Building PCRE using autotools
 | 
			
		||||
-----------------------------
 | 
			
		||||
 | 
			
		||||
If you are using HP's ANSI C++ compiler (aCC), please see the special note
 | 
			
		||||
in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.
 | 
			
		||||
 | 
			
		||||
The following instructions assume the use of the widely used "configure; make;
 | 
			
		||||
make install" (autotools) process.
 | 
			
		||||
 | 
			
		||||
To build PCRE on system that supports autotools, first run the "configure"
 | 
			
		||||
command from the PCRE distribution directory, with your current directory set
 | 
			
		||||
to the directory where you want the files to be created. This command is a
 | 
			
		||||
standard GNU "autoconf" configuration script, for which generic instructions
 | 
			
		||||
are supplied in the file INSTALL.
 | 
			
		||||
 | 
			
		||||
Most commonly, people build PCRE within its own distribution directory, and in
 | 
			
		||||
this case, on many systems, just running "./configure" is sufficient. However,
 | 
			
		||||
the usual methods of changing standard defaults are available. For example:
 | 
			
		||||
 | 
			
		||||
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
 | 
			
		||||
 | 
			
		||||
This command specifies that the C compiler should be run with the flags '-O2
 | 
			
		||||
-Wall' instead of the default, and that "make install" should install PCRE
 | 
			
		||||
under /opt/local instead of the default /usr/local.
 | 
			
		||||
 | 
			
		||||
If you want to build in a different directory, just run "configure" with that
 | 
			
		||||
directory as current. For example, suppose you have unpacked the PCRE source
 | 
			
		||||
into /source/pcre/pcre-xxx, but you want to build it in /build/pcre/pcre-xxx:
 | 
			
		||||
 | 
			
		||||
cd /build/pcre/pcre-xxx
 | 
			
		||||
/source/pcre/pcre-xxx/configure
 | 
			
		||||
 | 
			
		||||
PCRE is written in C and is normally compiled as a C library. However, it is
 | 
			
		||||
possible to build it as a C++ library, though the provided building apparatus
 | 
			
		||||
does not have any features to support this.
 | 
			
		||||
 | 
			
		||||
There are some optional features that can be included or omitted from the PCRE
 | 
			
		||||
library. They are also documented in the pcrebuild man page.
 | 
			
		||||
 | 
			
		||||
. By default, both shared and static libraries are built. You can change this
 | 
			
		||||
  by adding one of these options to the "configure" command:
 | 
			
		||||
 | 
			
		||||
  --disable-shared
 | 
			
		||||
  --disable-static
 | 
			
		||||
 | 
			
		||||
  (See also "Shared libraries on Unix-like systems" below.)
 | 
			
		||||
 | 
			
		||||
. By default, only the 8-bit library is built. If you add --enable-pcre16 to
 | 
			
		||||
  the "configure" command, the 16-bit library is also built. If you add
 | 
			
		||||
  --enable-pcre32 to the "configure" command, the 32-bit library is also built.
 | 
			
		||||
  If you want only the 16-bit or 32-bit library, use --disable-pcre8 to disable
 | 
			
		||||
  building the 8-bit library.
 | 
			
		||||
 | 
			
		||||
. If you are building the 8-bit library and want to suppress the building of
 | 
			
		||||
  the C++ wrapper library, you can add --disable-cpp to the "configure"
 | 
			
		||||
  command. Otherwise, when "configure" is run without --disable-pcre8, it will
 | 
			
		||||
  try to find a C++ compiler and C++ header files, and if it succeeds, it will
 | 
			
		||||
  try to build the C++ wrapper.
 | 
			
		||||
 | 
			
		||||
. If you want to include support for just-in-time compiling, which can give
 | 
			
		||||
  large performance improvements on certain platforms, add --enable-jit to the
 | 
			
		||||
  "configure" command. This support is available only for certain hardware
 | 
			
		||||
  architectures. If you try to enable it on an unsupported architecture, there
 | 
			
		||||
  will be a compile time error.
 | 
			
		||||
 | 
			
		||||
. When JIT support is enabled, pcregrep automatically makes use of it, unless
 | 
			
		||||
  you add --disable-pcregrep-jit to the "configure" command.
 | 
			
		||||
 | 
			
		||||
. If you want to make use of the support for UTF-8 Unicode character strings in
 | 
			
		||||
  the 8-bit library, or UTF-16 Unicode character strings in the 16-bit library,
 | 
			
		||||
  or UTF-32 Unicode character strings in the 32-bit library, you must add
 | 
			
		||||
  --enable-utf to the "configure" command. Without it, the code for handling
 | 
			
		||||
  UTF-8, UTF-16 and UTF-8 is not included in the relevant library. Even
 | 
			
		||||
  when --enable-utf is included, the use of a UTF encoding still has to be
 | 
			
		||||
  enabled by an option at run time. When PCRE is compiled with this option, its
 | 
			
		||||
  input can only either be ASCII or UTF-8/16/32, even when running on EBCDIC
 | 
			
		||||
  platforms. It is not possible to use both --enable-utf and --enable-ebcdic at
 | 
			
		||||
  the same time.
 | 
			
		||||
 | 
			
		||||
. There are no separate options for enabling UTF-8, UTF-16 and UTF-32
 | 
			
		||||
  independently because that would allow ridiculous settings such as requesting
 | 
			
		||||
  UTF-16 support while building only the 8-bit library. However, the option
 | 
			
		||||
  --enable-utf8 is retained for backwards compatibility with earlier releases
 | 
			
		||||
  that did not support 16-bit or 32-bit character strings. It is synonymous with
 | 
			
		||||
  --enable-utf. It is not possible to configure one library with UTF support
 | 
			
		||||
  and the other without in the same configuration.
 | 
			
		||||
 | 
			
		||||
. If, in addition to support for UTF-8/16/32 character strings, you want to
 | 
			
		||||
  include support for the \P, \p, and \X sequences that recognize Unicode
 | 
			
		||||
  character properties, you must add --enable-unicode-properties to the
 | 
			
		||||
  "configure" command. This adds about 30K to the size of the library (in the
 | 
			
		||||
  form of a property table); only the basic two-letter properties such as Lu
 | 
			
		||||
  are supported.
 | 
			
		||||
 | 
			
		||||
. You can build PCRE to recognize either CR or LF or the sequence CRLF or any
 | 
			
		||||
  of the preceding, or any of the Unicode newline sequences as indicating the
 | 
			
		||||
  end of a line. Whatever you specify at build time is the default; the caller
 | 
			
		||||
  of PCRE can change the selection at run time. The default newline indicator
 | 
			
		||||
  is a single LF character (the Unix standard). You can specify the default
 | 
			
		||||
  newline indicator by adding --enable-newline-is-cr or --enable-newline-is-lf
 | 
			
		||||
  or --enable-newline-is-crlf or --enable-newline-is-anycrlf or
 | 
			
		||||
  --enable-newline-is-any to the "configure" command, respectively.
 | 
			
		||||
 | 
			
		||||
  If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
 | 
			
		||||
  the standard tests will fail, because the lines in the test files end with
 | 
			
		||||
  LF. Even if the files are edited to change the line endings, there are likely
 | 
			
		||||
  to be some failures. With --enable-newline-is-anycrlf or
 | 
			
		||||
  --enable-newline-is-any, many tests should succeed, but there may be some
 | 
			
		||||
  failures.
 | 
			
		||||
 | 
			
		||||
. By default, the sequence \R in a pattern matches any Unicode line ending
 | 
			
		||||
  sequence. This is independent of the option specifying what PCRE considers to
 | 
			
		||||
  be the end of a line (see above). However, the caller of PCRE can restrict \R
 | 
			
		||||
  to match only CR, LF, or CRLF. You can make this the default by adding
 | 
			
		||||
  --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
 | 
			
		||||
 | 
			
		||||
. When called via the POSIX interface, PCRE uses malloc() to get additional
 | 
			
		||||
  storage for processing capturing parentheses if there are more than 10 of
 | 
			
		||||
  them in a pattern. You can increase this threshold by setting, for example,
 | 
			
		||||
 | 
			
		||||
  --with-posix-malloc-threshold=20
 | 
			
		||||
 | 
			
		||||
  on the "configure" command.
 | 
			
		||||
 | 
			
		||||
. PCRE has a counter that limits the depth of nesting of parentheses in a
 | 
			
		||||
  pattern. This limits the amount of system stack that a pattern uses when it
 | 
			
		||||
  is compiled. The default is 250, but you can change it by setting, for
 | 
			
		||||
  example,
 | 
			
		||||
 | 
			
		||||
  --with-parens-nest-limit=500
 | 
			
		||||
 | 
			
		||||
. PCRE has a counter that can be set to limit the amount of resources it uses
 | 
			
		||||
  when matching a pattern. If the limit is exceeded during a match, the match
 | 
			
		||||
  fails. The default is ten million. You can change the default by setting, for
 | 
			
		||||
  example,
 | 
			
		||||
 | 
			
		||||
  --with-match-limit=500000
 | 
			
		||||
 | 
			
		||||
  on the "configure" command. This is just the default; individual calls to
 | 
			
		||||
  pcre_exec() can supply their own value. There is more discussion on the
 | 
			
		||||
  pcreapi man page.
 | 
			
		||||
 | 
			
		||||
. There is a separate counter that limits the depth of recursive function calls
 | 
			
		||||
  during a matching process. This also has a default of ten million, which is
 | 
			
		||||
  essentially "unlimited". You can change the default by setting, for example,
 | 
			
		||||
 | 
			
		||||
  --with-match-limit-recursion=500000
 | 
			
		||||
 | 
			
		||||
  Recursive function calls use up the runtime stack; running out of stack can
 | 
			
		||||
  cause programs to crash in strange ways. There is a discussion about stack
 | 
			
		||||
  sizes in the pcrestack man page.
 | 
			
		||||
 | 
			
		||||
. The default maximum compiled pattern size is around 64K. You can increase
 | 
			
		||||
  this by adding --with-link-size=3 to the "configure" command. In the 8-bit
 | 
			
		||||
  library, PCRE then uses three bytes instead of two for offsets to different
 | 
			
		||||
  parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
 | 
			
		||||
  the same as --with-link-size=4, which (in both libraries) uses four-byte
 | 
			
		||||
  offsets. Increasing the internal link size reduces performance. In the 32-bit
 | 
			
		||||
  library, the only supported link size is 4.
 | 
			
		||||
 | 
			
		||||
. You can build PCRE so that its internal match() function that is called from
 | 
			
		||||
  pcre_exec() does not call itself recursively. Instead, it uses memory blocks
 | 
			
		||||
  obtained from the heap via the special functions pcre_stack_malloc() and
 | 
			
		||||
  pcre_stack_free() to save data that would otherwise be saved on the stack. To
 | 
			
		||||
  build PCRE like this, use
 | 
			
		||||
 | 
			
		||||
  --disable-stack-for-recursion
 | 
			
		||||
 | 
			
		||||
  on the "configure" command. PCRE runs more slowly in this mode, but it may be
 | 
			
		||||
  necessary in environments with limited stack sizes. This applies only to the
 | 
			
		||||
  normal execution of the pcre_exec() function; if JIT support is being
 | 
			
		||||
  successfully used, it is not relevant. Equally, it does not apply to
 | 
			
		||||
  pcre_dfa_exec(), which does not use deeply nested recursion. There is a
 | 
			
		||||
  discussion about stack sizes in the pcrestack man page.
 | 
			
		||||
 | 
			
		||||
. For speed, PCRE uses four tables for manipulating and identifying characters
 | 
			
		||||
  whose code point values are less than 256. By default, it uses a set of
 | 
			
		||||
  tables for ASCII encoding that is part of the distribution. If you specify
 | 
			
		||||
 | 
			
		||||
  --enable-rebuild-chartables
 | 
			
		||||
 | 
			
		||||
  a program called dftables is compiled and run in the default C locale when
 | 
			
		||||
  you obey "make". It builds a source file called pcre_chartables.c. If you do
 | 
			
		||||
  not specify this option, pcre_chartables.c is created as a copy of
 | 
			
		||||
  pcre_chartables.c.dist. See "Character tables" below for further information.
 | 
			
		||||
 | 
			
		||||
. It is possible to compile PCRE for use on systems that use EBCDIC as their
 | 
			
		||||
  character code (as opposed to ASCII/Unicode) by specifying
 | 
			
		||||
 | 
			
		||||
  --enable-ebcdic
 | 
			
		||||
 | 
			
		||||
  This automatically implies --enable-rebuild-chartables (see above). However,
 | 
			
		||||
  when PCRE is built this way, it always operates in EBCDIC. It cannot support
 | 
			
		||||
  both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
 | 
			
		||||
  which specifies that the code value for the EBCDIC NL character is 0x25
 | 
			
		||||
  instead of the default 0x15.
 | 
			
		||||
 | 
			
		||||
. In environments where valgrind is installed, if you specify
 | 
			
		||||
 | 
			
		||||
  --enable-valgrind
 | 
			
		||||
 | 
			
		||||
  PCRE will use valgrind annotations to mark certain memory regions as
 | 
			
		||||
  unaddressable. This allows it to detect invalid memory accesses, and is
 | 
			
		||||
  mostly useful for debugging PCRE itself.
 | 
			
		||||
 | 
			
		||||
. In environments where the gcc compiler is used and lcov version 1.6 or above
 | 
			
		||||
  is installed, if you specify
 | 
			
		||||
 | 
			
		||||
  --enable-coverage
 | 
			
		||||
 | 
			
		||||
  the build process implements a code coverage report for the test suite. The
 | 
			
		||||
  report is generated by running "make coverage". If ccache is installed on
 | 
			
		||||
  your system, it must be disabled when building PCRE for coverage reporting.
 | 
			
		||||
  You can do this by setting the environment variable CCACHE_DISABLE=1 before
 | 
			
		||||
  running "make" to build PCRE. There is more information about coverage
 | 
			
		||||
  reporting in the "pcrebuild" documentation.
 | 
			
		||||
 | 
			
		||||
. The pcregrep program currently supports only 8-bit data files, and so
 | 
			
		||||
  requires the 8-bit PCRE library. It is possible to compile pcregrep to use
 | 
			
		||||
  libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
 | 
			
		||||
  specifying one or both of
 | 
			
		||||
 | 
			
		||||
  --enable-pcregrep-libz
 | 
			
		||||
  --enable-pcregrep-libbz2
 | 
			
		||||
 | 
			
		||||
  Of course, the relevant libraries must be installed on your system.
 | 
			
		||||
 | 
			
		||||
. The default size (in bytes) of the internal buffer used by pcregrep can be
 | 
			
		||||
  set by, for example:
 | 
			
		||||
 | 
			
		||||
  --with-pcregrep-bufsize=51200
 | 
			
		||||
 | 
			
		||||
  The value must be a plain integer. The default is 20480.
 | 
			
		||||
 | 
			
		||||
. It is possible to compile pcretest so that it links with the libreadline
 | 
			
		||||
  or libedit libraries, by specifying, respectively,
 | 
			
		||||
 | 
			
		||||
  --enable-pcretest-libreadline or --enable-pcretest-libedit
 | 
			
		||||
 | 
			
		||||
  If this is done, when pcretest's input is from a terminal, it reads it using
 | 
			
		||||
  the readline() function. This provides line-editing and history facilities.
 | 
			
		||||
  Note that libreadline is GPL-licenced, so if you distribute a binary of
 | 
			
		||||
  pcretest linked in this way, there may be licensing issues. These can be
 | 
			
		||||
  avoided by linking with libedit (which has a BSD licence) instead.
 | 
			
		||||
 | 
			
		||||
  Enabling libreadline causes the -lreadline option to be added to the pcretest
 | 
			
		||||
  build. In many operating environments with a sytem-installed readline
 | 
			
		||||
  library this is sufficient. However, in some environments (e.g. if an
 | 
			
		||||
  unmodified distribution version of readline is in use), it may be necessary
 | 
			
		||||
  to specify something like LIBS="-lncurses" as well. This is because, to quote
 | 
			
		||||
  the readline INSTALL, "Readline uses the termcap functions, but does not link
 | 
			
		||||
  with the termcap or curses library itself, allowing applications which link
 | 
			
		||||
  with readline the to choose an appropriate library." If you get error
 | 
			
		||||
  messages about missing functions tgetstr, tgetent, tputs, tgetflag, or tgoto,
 | 
			
		||||
  this is the problem, and linking with the ncurses library should fix it.
 | 
			
		||||
 | 
			
		||||
The "configure" script builds the following files for the basic C library:
 | 
			
		||||
 | 
			
		||||
. Makefile             the makefile that builds the library
 | 
			
		||||
. config.h             build-time configuration options for the library
 | 
			
		||||
. pcre.h               the public PCRE header file
 | 
			
		||||
. pcre-config          script that shows the building settings such as CFLAGS
 | 
			
		||||
                         that were set for "configure"
 | 
			
		||||
. libpcre.pc         ) data for the pkg-config command
 | 
			
		||||
. libpcre16.pc       )
 | 
			
		||||
. libpcre32.pc       )
 | 
			
		||||
. libpcreposix.pc    )
 | 
			
		||||
. libtool              script that builds shared and/or static libraries
 | 
			
		||||
 | 
			
		||||
Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
 | 
			
		||||
names config.h.generic and pcre.h.generic. These are provided for those who
 | 
			
		||||
have to built PCRE without using "configure" or CMake. If you use "configure"
 | 
			
		||||
or CMake, the .generic versions are not used.
 | 
			
		||||
 | 
			
		||||
When building the 8-bit library, if a C++ compiler is found, the following
 | 
			
		||||
files are also built:
 | 
			
		||||
 | 
			
		||||
. libpcrecpp.pc        data for the pkg-config command
 | 
			
		||||
. pcrecpparg.h         header file for calling PCRE via the C++ wrapper
 | 
			
		||||
. pcre_stringpiece.h   header for the C++ "stringpiece" functions
 | 
			
		||||
 | 
			
		||||
The "configure" script also creates config.status, which is an executable
 | 
			
		||||
script that can be run to recreate the configuration, and config.log, which
 | 
			
		||||
contains compiler output from tests that "configure" runs.
 | 
			
		||||
 | 
			
		||||
Once "configure" has run, you can run "make". This builds the the libraries
 | 
			
		||||
libpcre, libpcre16 and/or libpcre32, and a test program called pcretest. If you
 | 
			
		||||
enabled JIT support with --enable-jit, a test program called pcre_jit_test is
 | 
			
		||||
built as well.
 | 
			
		||||
 | 
			
		||||
If the 8-bit library is built, libpcreposix and the pcregrep command are also
 | 
			
		||||
built, and if a C++ compiler was found on your system, and you did not disable
 | 
			
		||||
it with --disable-cpp, "make" builds the C++ wrapper library, which is called
 | 
			
		||||
libpcrecpp, as well as some test programs called pcrecpp_unittest,
 | 
			
		||||
pcre_scanner_unittest, and pcre_stringpiece_unittest.
 | 
			
		||||
 | 
			
		||||
The command "make check" runs all the appropriate tests. Details of the PCRE
 | 
			
		||||
tests are given below in a separate section of this document.
 | 
			
		||||
 | 
			
		||||
You can use "make install" to install PCRE into live directories on your
 | 
			
		||||
system. The following are installed (file names are all relative to the
 | 
			
		||||
<prefix> that is set when "configure" is run):
 | 
			
		||||
 | 
			
		||||
  Commands (bin):
 | 
			
		||||
    pcretest
 | 
			
		||||
    pcregrep (if 8-bit support is enabled)
 | 
			
		||||
    pcre-config
 | 
			
		||||
 | 
			
		||||
  Libraries (lib):
 | 
			
		||||
    libpcre16     (if 16-bit support is enabled)
 | 
			
		||||
    libpcre32     (if 32-bit support is enabled)
 | 
			
		||||
    libpcre       (if 8-bit support is enabled)
 | 
			
		||||
    libpcreposix  (if 8-bit support is enabled)
 | 
			
		||||
    libpcrecpp    (if 8-bit and C++ support is enabled)
 | 
			
		||||
 | 
			
		||||
  Configuration information (lib/pkgconfig):
 | 
			
		||||
    libpcre16.pc
 | 
			
		||||
    libpcre32.pc
 | 
			
		||||
    libpcre.pc
 | 
			
		||||
    libpcreposix.pc
 | 
			
		||||
    libpcrecpp.pc (if C++ support is enabled)
 | 
			
		||||
 | 
			
		||||
  Header files (include):
 | 
			
		||||
    pcre.h
 | 
			
		||||
    pcreposix.h
 | 
			
		||||
    pcre_scanner.h      )
 | 
			
		||||
    pcre_stringpiece.h  ) if C++ support is enabled
 | 
			
		||||
    pcrecpp.h           )
 | 
			
		||||
    pcrecpparg.h        )
 | 
			
		||||
 | 
			
		||||
  Man pages (share/man/man{1,3}):
 | 
			
		||||
    pcregrep.1
 | 
			
		||||
    pcretest.1
 | 
			
		||||
    pcre-config.1
 | 
			
		||||
    pcre.3
 | 
			
		||||
    pcre*.3 (lots more pages, all starting "pcre")
 | 
			
		||||
 | 
			
		||||
  HTML documentation (share/doc/pcre/html):
 | 
			
		||||
    index.html
 | 
			
		||||
    *.html (lots more pages, hyperlinked from index.html)
 | 
			
		||||
 | 
			
		||||
  Text file documentation (share/doc/pcre):
 | 
			
		||||
    AUTHORS
 | 
			
		||||
    COPYING
 | 
			
		||||
    ChangeLog
 | 
			
		||||
    LICENCE
 | 
			
		||||
    NEWS
 | 
			
		||||
    README
 | 
			
		||||
    pcre.txt         (a concatenation of the man(3) pages)
 | 
			
		||||
    pcretest.txt     the pcretest man page
 | 
			
		||||
    pcregrep.txt     the pcregrep man page
 | 
			
		||||
    pcre-config.txt  the pcre-config man page
 | 
			
		||||
 | 
			
		||||
If you want to remove PCRE from your system, you can run "make uninstall".
 | 
			
		||||
This removes all the files that "make install" installed. However, it does not
 | 
			
		||||
remove any directories, because these are often shared with other programs.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Retrieving configuration information
 | 
			
		||||
------------------------------------
 | 
			
		||||
 | 
			
		||||
Running "make install" installs the command pcre-config, which can be used to
 | 
			
		||||
recall information about the PCRE configuration and installation. For example:
 | 
			
		||||
 | 
			
		||||
  pcre-config --version
 | 
			
		||||
 | 
			
		||||
prints the version number, and
 | 
			
		||||
 | 
			
		||||
  pcre-config --libs
 | 
			
		||||
 | 
			
		||||
outputs information about where the library is installed. This command can be
 | 
			
		||||
included in makefiles for programs that use PCRE, saving the programmer from
 | 
			
		||||
having to remember too many details.
 | 
			
		||||
 | 
			
		||||
The pkg-config command is another system for saving and retrieving information
 | 
			
		||||
about installed libraries. Instead of separate commands for each library, a
 | 
			
		||||
single command is used. For example:
 | 
			
		||||
 | 
			
		||||
  pkg-config --cflags pcre
 | 
			
		||||
 | 
			
		||||
The data is held in *.pc files that are installed in a directory called
 | 
			
		||||
<prefix>/lib/pkgconfig.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Shared libraries
 | 
			
		||||
----------------
 | 
			
		||||
 | 
			
		||||
The default distribution builds PCRE as shared libraries and static libraries,
 | 
			
		||||
as long as the operating system supports shared libraries. Shared library
 | 
			
		||||
support relies on the "libtool" script which is built as part of the
 | 
			
		||||
"configure" process.
 | 
			
		||||
 | 
			
		||||
The libtool script is used to compile and link both shared and static
 | 
			
		||||
libraries. They are placed in a subdirectory called .libs when they are newly
 | 
			
		||||
built. The programs pcretest and pcregrep are built to use these uninstalled
 | 
			
		||||
libraries (by means of wrapper scripts in the case of shared libraries). When
 | 
			
		||||
you use "make install" to install shared libraries, pcregrep and pcretest are
 | 
			
		||||
automatically re-built to use the newly installed shared libraries before being
 | 
			
		||||
installed themselves. However, the versions left in the build directory still
 | 
			
		||||
use the uninstalled libraries.
 | 
			
		||||
 | 
			
		||||
To build PCRE using static libraries only you must use --disable-shared when
 | 
			
		||||
configuring it. For example:
 | 
			
		||||
 | 
			
		||||
./configure --prefix=/usr/gnu --disable-shared
 | 
			
		||||
 | 
			
		||||
Then run "make" in the usual way. Similarly, you can use --disable-static to
 | 
			
		||||
build only shared libraries.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Cross-compiling using autotools
 | 
			
		||||
-------------------------------
 | 
			
		||||
 | 
			
		||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
 | 
			
		||||
order to cross-compile PCRE for some other host. However, you should NOT
 | 
			
		||||
specify --enable-rebuild-chartables, because if you do, the dftables.c source
 | 
			
		||||
file is compiled and run on the local host, in order to generate the inbuilt
 | 
			
		||||
character tables (the pcre_chartables.c file). This will probably not work,
 | 
			
		||||
because dftables.c needs to be compiled with the local compiler, not the cross
 | 
			
		||||
compiler.
 | 
			
		||||
 | 
			
		||||
When --enable-rebuild-chartables is not specified, pcre_chartables.c is created
 | 
			
		||||
by making a copy of pcre_chartables.c.dist, which is a default set of tables
 | 
			
		||||
that assumes ASCII code. Cross-compiling with the default tables should not be
 | 
			
		||||
a problem.
 | 
			
		||||
 | 
			
		||||
If you need to modify the character tables when cross-compiling, you should
 | 
			
		||||
move pcre_chartables.c.dist out of the way, then compile dftables.c by hand and
 | 
			
		||||
run it on the local host to make a new version of pcre_chartables.c.dist.
 | 
			
		||||
Then when you cross-compile PCRE this new version of the tables will be used.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Using HP's ANSI C++ compiler (aCC)
 | 
			
		||||
----------------------------------
 | 
			
		||||
 | 
			
		||||
Unless C++ support is disabled by specifying the "--disable-cpp" option of the
 | 
			
		||||
"configure" script, you must include the "-AA" option in the CXXFLAGS
 | 
			
		||||
environment variable in order for the C++ components to compile correctly.
 | 
			
		||||
 | 
			
		||||
Also, note that the aCC compiler on PA-RISC platforms may have a defect whereby
 | 
			
		||||
needed libraries fail to get included when specifying the "-AA" compiler
 | 
			
		||||
option. If you experience unresolved symbols when linking the C++ programs,
 | 
			
		||||
use the workaround of specifying the following environment variable prior to
 | 
			
		||||
running the "configure" script:
 | 
			
		||||
 | 
			
		||||
  CXXLDFLAGS="-lstd_v2 -lCsup_v2"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Compiling in Tru64 using native compilers
 | 
			
		||||
-----------------------------------------
 | 
			
		||||
 | 
			
		||||
The following error may occur when compiling with native compilers in the Tru64
 | 
			
		||||
operating system:
 | 
			
		||||
 | 
			
		||||
  CXX    libpcrecpp_la-pcrecpp.lo
 | 
			
		||||
cxx: Error: /usr/lib/cmplrs/cxx/V7.1-006/include/cxx/iosfwd, line 58: #error
 | 
			
		||||
          directive: "cannot include iosfwd -- define __USE_STD_IOSTREAM to
 | 
			
		||||
          override default - see section 7.1.2 of the C++ Using Guide"
 | 
			
		||||
#error "cannot include iosfwd -- define __USE_STD_IOSTREAM to override default
 | 
			
		||||
- see section 7.1.2 of the C++ Using Guide"
 | 
			
		||||
 | 
			
		||||
This may be followed by other errors, complaining that 'namespace "std" has no
 | 
			
		||||
member'. The solution to this is to add the line
 | 
			
		||||
 | 
			
		||||
#define __USE_STD_IOSTREAM 1
 | 
			
		||||
 | 
			
		||||
to the config.h file.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Using Sun's compilers for Solaris
 | 
			
		||||
---------------------------------
 | 
			
		||||
 | 
			
		||||
A user reports that the following configurations work on Solaris 9 sparcv9 and
 | 
			
		||||
Solaris 9 x86 (32-bit):
 | 
			
		||||
 | 
			
		||||
  Solaris 9 sparcv9: ./configure --disable-cpp CC=/bin/cc CFLAGS="-m64 -g"
 | 
			
		||||
  Solaris 9 x86:     ./configure --disable-cpp CC=/bin/cc CFLAGS="-g"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Using PCRE from MySQL
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
On systems where both PCRE and MySQL are installed, it is possible to make use
 | 
			
		||||
of PCRE from within MySQL, as an alternative to the built-in pattern matching.
 | 
			
		||||
There is a web page that tells you how to do this:
 | 
			
		||||
 | 
			
		||||
  http://www.mysqludf.org/lib_mysqludf_preg/index.php
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Making new tarballs
 | 
			
		||||
-------------------
 | 
			
		||||
 | 
			
		||||
The command "make dist" creates three PCRE tarballs, in tar.gz, tar.bz2, and
 | 
			
		||||
zip formats. The command "make distcheck" does the same, but then does a trial
 | 
			
		||||
build of the new distribution to ensure that it works.
 | 
			
		||||
 | 
			
		||||
If you have modified any of the man page sources in the doc directory, you
 | 
			
		||||
should first run the PrepareRelease script before making a distribution. This
 | 
			
		||||
script creates the .txt and HTML forms of the documentation from the man pages.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Testing PCRE
 | 
			
		||||
------------
 | 
			
		||||
 | 
			
		||||
To test the basic PCRE library on a Unix-like system, run the RunTest script.
 | 
			
		||||
There is another script called RunGrepTest that tests the options of the
 | 
			
		||||
pcregrep command. If the C++ wrapper library is built, three test programs
 | 
			
		||||
called pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest
 | 
			
		||||
are also built. When JIT support is enabled, another test program called
 | 
			
		||||
pcre_jit_test is built.
 | 
			
		||||
 | 
			
		||||
Both the scripts and all the program tests are run if you obey "make check" or
 | 
			
		||||
"make test". For other environments, see the instructions in
 | 
			
		||||
NON-AUTOTOOLS-BUILD.
 | 
			
		||||
 | 
			
		||||
The RunTest script runs the pcretest test program (which is documented in its
 | 
			
		||||
own man page) on each of the relevant testinput files in the testdata
 | 
			
		||||
directory, and compares the output with the contents of the corresponding
 | 
			
		||||
testoutput files. RunTest uses a file called testtry to hold the main output
 | 
			
		||||
from pcretest. Other files whose names begin with "test" are used as working
 | 
			
		||||
files in some tests.
 | 
			
		||||
 | 
			
		||||
Some tests are relevant only when certain build-time options were selected. For
 | 
			
		||||
example, the tests for UTF-8/16/32 support are run only if --enable-utf was
 | 
			
		||||
used. RunTest outputs a comment when it skips a test.
 | 
			
		||||
 | 
			
		||||
Many of the tests that are not skipped are run up to three times. The second
 | 
			
		||||
run forces pcre_study() to be called for all patterns except for a few in some
 | 
			
		||||
tests that are marked "never study" (see the pcretest program for how this is
 | 
			
		||||
done). If JIT support is available, the non-DFA tests are run a third time,
 | 
			
		||||
this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option.
 | 
			
		||||
This testing can be suppressed by putting "nojit" on the RunTest command line.
 | 
			
		||||
 | 
			
		||||
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
 | 
			
		||||
libraries that are enabled. If you want to run just one set of tests, call
 | 
			
		||||
RunTest with either the -8, -16 or -32 option.
 | 
			
		||||
 | 
			
		||||
If valgrind is installed, you can run the tests under it by putting "valgrind"
 | 
			
		||||
on the RunTest command line. To run pcretest on just one or more specific test
 | 
			
		||||
files, give their numbers as arguments to RunTest, for example:
 | 
			
		||||
 | 
			
		||||
  RunTest 2 7 11
 | 
			
		||||
 | 
			
		||||
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
 | 
			
		||||
end), or a number preceded by ~ to exclude a test. For example:
 | 
			
		||||
 | 
			
		||||
  Runtest 3-15 ~10
 | 
			
		||||
 | 
			
		||||
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
 | 
			
		||||
except test 13. Whatever order the arguments are in, the tests are always run
 | 
			
		||||
in numerical order.
 | 
			
		||||
 | 
			
		||||
You can also call RunTest with the single argument "list" to cause it to output
 | 
			
		||||
a list of tests.
 | 
			
		||||
 | 
			
		||||
The first test file can be fed directly into the perltest.pl script to check
 | 
			
		||||
that Perl gives the same results. The only difference you should see is in the
 | 
			
		||||
first few lines, where the Perl version is given instead of the PCRE version.
 | 
			
		||||
 | 
			
		||||
The second set of tests check pcre_fullinfo(), pcre_study(),
 | 
			
		||||
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
 | 
			
		||||
detection, and run-time flags that are specific to PCRE, as well as the POSIX
 | 
			
		||||
wrapper API. It also uses the debugging flags to check some of the internals of
 | 
			
		||||
pcre_compile().
 | 
			
		||||
 | 
			
		||||
If you build PCRE with a locale setting that is not the standard C locale, the
 | 
			
		||||
character tables may be different (see next paragraph). In some cases, this may
 | 
			
		||||
cause failures in the second set of tests. For example, in a locale where the
 | 
			
		||||
isprint() function yields TRUE for characters in the range 128-255, the use of
 | 
			
		||||
[:isascii:] inside a character class defines a different set of characters, and
 | 
			
		||||
this shows up in this test as a difference in the compiled code, which is being
 | 
			
		||||
listed for checking. Where the comparison test output contains [\x00-\x7f] the
 | 
			
		||||
test will contain [\x00-\xff], and similarly in some other cases. This is not a
 | 
			
		||||
bug in PCRE.
 | 
			
		||||
 | 
			
		||||
The third set of tests checks pcre_maketables(), the facility for building a
 | 
			
		||||
set of character tables for a specific locale and using them instead of the
 | 
			
		||||
default tables. The tests make use of the "fr_FR" (French) locale. Before
 | 
			
		||||
running the test, the script checks for the presence of this locale by running
 | 
			
		||||
the "locale" command. If that command fails, or if it doesn't include "fr_FR"
 | 
			
		||||
in the list of available locales, the third test cannot be run, and a comment
 | 
			
		||||
is output to say why. If running this test produces instances of the error
 | 
			
		||||
 | 
			
		||||
  ** Failed to set locale "fr_FR"
 | 
			
		||||
 | 
			
		||||
in the comparison output, it means that locale is not available on your system,
 | 
			
		||||
despite being listed by "locale". This does not mean that PCRE is broken.
 | 
			
		||||
 | 
			
		||||
[If you are trying to run this test on Windows, you may be able to get it to
 | 
			
		||||
work by changing "fr_FR" to "french" everywhere it occurs. Alternatively, use
 | 
			
		||||
RunTest.bat. The version of RunTest.bat included with PCRE 7.4 and above uses
 | 
			
		||||
Windows versions of test 2. More info on using RunTest.bat is included in the
 | 
			
		||||
document entitled NON-UNIX-USE.]
 | 
			
		||||
 | 
			
		||||
The fourth and fifth tests check the UTF-8/16/32 support and error handling and
 | 
			
		||||
internal UTF features of PCRE that are not relevant to Perl, respectively. The
 | 
			
		||||
sixth and seventh tests do the same for Unicode character properties support.
 | 
			
		||||
 | 
			
		||||
The eighth, ninth, and tenth tests check the pcre_dfa_exec() alternative
 | 
			
		||||
matching function, in non-UTF-8/16/32 mode, UTF-8/16/32 mode, and UTF-8/16/32
 | 
			
		||||
mode with Unicode property support, respectively.
 | 
			
		||||
 | 
			
		||||
The eleventh test checks some internal offsets and code size features; it is
 | 
			
		||||
run only when the default "link size" of 2 is set (in other cases the sizes
 | 
			
		||||
change) and when Unicode property support is enabled.
 | 
			
		||||
 | 
			
		||||
The twelfth test is run only when JIT support is available, and the thirteenth
 | 
			
		||||
test is run only when JIT support is not available. They test some JIT-specific
 | 
			
		||||
features such as information output from pcretest about JIT compilation.
 | 
			
		||||
 | 
			
		||||
The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and
 | 
			
		||||
the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit
 | 
			
		||||
mode. These are tests that generate different output in the two modes. They are
 | 
			
		||||
for general cases, UTF-8/16/32 support, and Unicode property support,
 | 
			
		||||
respectively.
 | 
			
		||||
 | 
			
		||||
The twentieth test is run only in 16/32-bit mode. It tests some specific
 | 
			
		||||
16/32-bit features of the DFA matching engine.
 | 
			
		||||
 | 
			
		||||
The twenty-first and twenty-second tests are run only in 16/32-bit mode, when
 | 
			
		||||
the link size is set to 2 for the 16-bit library. They test reloading
 | 
			
		||||
pre-compiled patterns.
 | 
			
		||||
 | 
			
		||||
The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are
 | 
			
		||||
for general cases, and UTF-16 support, respectively.
 | 
			
		||||
 | 
			
		||||
The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are
 | 
			
		||||
for general cases, and UTF-32 support, respectively.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Character tables
 | 
			
		||||
----------------
 | 
			
		||||
 | 
			
		||||
For speed, PCRE uses four tables for manipulating and identifying characters
 | 
			
		||||
whose code point values are less than 256. The final argument of the
 | 
			
		||||
pcre_compile() function is a pointer to a block of memory containing the
 | 
			
		||||
concatenated tables. A call to pcre_maketables() can be used to generate a set
 | 
			
		||||
of tables in the current locale. If the final argument for pcre_compile() is
 | 
			
		||||
passed as NULL, a set of default tables that is built into the binary is used.
 | 
			
		||||
 | 
			
		||||
The source file called pcre_chartables.c contains the default set of tables. By
 | 
			
		||||
default, this is created as a copy of pcre_chartables.c.dist, which contains
 | 
			
		||||
tables for ASCII coding. However, if --enable-rebuild-chartables is specified
 | 
			
		||||
for ./configure, a different version of pcre_chartables.c is built by the
 | 
			
		||||
program dftables (compiled from dftables.c), which uses the ANSI C character
 | 
			
		||||
handling functions such as isalnum(), isalpha(), isupper(), islower(), etc. to
 | 
			
		||||
build the table sources. This means that the default C locale which is set for
 | 
			
		||||
your system will control the contents of these default tables. You can change
 | 
			
		||||
the default tables by editing pcre_chartables.c and then re-building PCRE. If
 | 
			
		||||
you do this, you should take care to ensure that the file does not get
 | 
			
		||||
automatically re-generated. The best way to do this is to move
 | 
			
		||||
pcre_chartables.c.dist out of the way and replace it with your customized
 | 
			
		||||
tables.
 | 
			
		||||
 | 
			
		||||
When the dftables program is run as a result of --enable-rebuild-chartables,
 | 
			
		||||
it uses the default C locale that is set on your system. It does not pay
 | 
			
		||||
attention to the LC_xxx environment variables. In other words, it uses the
 | 
			
		||||
system's default locale rather than whatever the compiling user happens to have
 | 
			
		||||
set. If you really do want to build a source set of character tables in a
 | 
			
		||||
locale that is specified by the LC_xxx variables, you can run the dftables
 | 
			
		||||
program by hand with the -L option. For example:
 | 
			
		||||
 | 
			
		||||
  ./dftables -L pcre_chartables.c.special
 | 
			
		||||
 | 
			
		||||
The first two 256-byte tables provide lower casing and case flipping functions,
 | 
			
		||||
respectively. The next table consists of three 32-byte bit maps which identify
 | 
			
		||||
digits, "word" characters, and white space, respectively. These are used when
 | 
			
		||||
building 32-byte bit maps that represent character classes for code points less
 | 
			
		||||
than 256.
 | 
			
		||||
 | 
			
		||||
The final 256-byte table has bits indicating various character types, as
 | 
			
		||||
follows:
 | 
			
		||||
 | 
			
		||||
    1   white space character
 | 
			
		||||
    2   letter
 | 
			
		||||
    4   decimal digit
 | 
			
		||||
    8   hexadecimal digit
 | 
			
		||||
   16   alphanumeric or '_'
 | 
			
		||||
  128   regular expression metacharacter or binary zero
 | 
			
		||||
 | 
			
		||||
You should not alter the set of characters that contain the 128 bit, as that
 | 
			
		||||
will cause PCRE to malfunction.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
File manifest
 | 
			
		||||
-------------
 | 
			
		||||
 | 
			
		||||
The distribution should contain the files listed below. Where a file name is
 | 
			
		||||
given as pcre[16|32]_xxx it means that there are three files, one with the name
 | 
			
		||||
pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
 | 
			
		||||
 | 
			
		||||
(A) Source files of the PCRE library functions and their headers:
 | 
			
		||||
 | 
			
		||||
  dftables.c              auxiliary program for building pcre_chartables.c
 | 
			
		||||
                          when --enable-rebuild-chartables is specified
 | 
			
		||||
 | 
			
		||||
  pcre_chartables.c.dist  a default set of character tables that assume ASCII
 | 
			
		||||
                          coding; used, unless --enable-rebuild-chartables is
 | 
			
		||||
                          specified, by copying to pcre[16]_chartables.c
 | 
			
		||||
 | 
			
		||||
  pcreposix.c                )
 | 
			
		||||
  pcre[16|32]_byte_order.c   )
 | 
			
		||||
  pcre[16|32]_compile.c      )
 | 
			
		||||
  pcre[16|32]_config.c       )
 | 
			
		||||
  pcre[16|32]_dfa_exec.c     )
 | 
			
		||||
  pcre[16|32]_exec.c         )
 | 
			
		||||
  pcre[16|32]_fullinfo.c     )
 | 
			
		||||
  pcre[16|32]_get.c          ) sources for the functions in the library,
 | 
			
		||||
  pcre[16|32]_globals.c      )   and some internal functions that they use
 | 
			
		||||
  pcre[16|32]_jit_compile.c  )
 | 
			
		||||
  pcre[16|32]_maketables.c   )
 | 
			
		||||
  pcre[16|32]_newline.c      )
 | 
			
		||||
  pcre[16|32]_refcount.c     )
 | 
			
		||||
  pcre[16|32]_string_utils.c )
 | 
			
		||||
  pcre[16|32]_study.c        )
 | 
			
		||||
  pcre[16|32]_tables.c       )
 | 
			
		||||
  pcre[16|32]_ucd.c          )
 | 
			
		||||
  pcre[16|32]_version.c      )
 | 
			
		||||
  pcre[16|32]_xclass.c       )
 | 
			
		||||
  pcre_ord2utf8.c            )
 | 
			
		||||
  pcre_valid_utf8.c          )
 | 
			
		||||
  pcre16_ord2utf16.c         )
 | 
			
		||||
  pcre16_utf16_utils.c       )
 | 
			
		||||
  pcre16_valid_utf16.c       )
 | 
			
		||||
  pcre32_utf32_utils.c       )
 | 
			
		||||
  pcre32_valid_utf32.c       )
 | 
			
		||||
 | 
			
		||||
  pcre[16|32]_printint.c     ) debugging function that is used by pcretest,
 | 
			
		||||
                             )   and can also be #included in pcre_compile()
 | 
			
		||||
 | 
			
		||||
  pcre.h.in               template for pcre.h when built by "configure"
 | 
			
		||||
  pcreposix.h             header for the external POSIX wrapper API
 | 
			
		||||
  pcre_internal.h         header for internal use
 | 
			
		||||
  sljit/*                 16 files that make up the JIT compiler
 | 
			
		||||
  ucp.h                   header for Unicode property handling
 | 
			
		||||
 | 
			
		||||
  config.h.in             template for config.h, which is built by "configure"
 | 
			
		||||
 | 
			
		||||
  pcrecpp.h               public header file for the C++ wrapper
 | 
			
		||||
  pcrecpparg.h.in         template for another C++ header file
 | 
			
		||||
  pcre_scanner.h          public header file for C++ scanner functions
 | 
			
		||||
  pcrecpp.cc              )
 | 
			
		||||
  pcre_scanner.cc         ) source for the C++ wrapper library
 | 
			
		||||
 | 
			
		||||
  pcre_stringpiece.h.in   template for pcre_stringpiece.h, the header for the
 | 
			
		||||
                            C++ stringpiece functions
 | 
			
		||||
  pcre_stringpiece.cc     source for the C++ stringpiece functions
 | 
			
		||||
 | 
			
		||||
(B) Source files for programs that use PCRE:
 | 
			
		||||
 | 
			
		||||
  pcredemo.c              simple demonstration of coding calls to PCRE
 | 
			
		||||
  pcregrep.c              source of a grep utility that uses PCRE
 | 
			
		||||
  pcretest.c              comprehensive test program
 | 
			
		||||
 | 
			
		||||
(C) Auxiliary files:
 | 
			
		||||
 | 
			
		||||
  132html                 script to turn "man" pages into HTML
 | 
			
		||||
  AUTHORS                 information about the author of PCRE
 | 
			
		||||
  ChangeLog               log of changes to the code
 | 
			
		||||
  CleanTxt                script to clean nroff output for txt man pages
 | 
			
		||||
  Detrail                 script to remove trailing spaces
 | 
			
		||||
  HACKING                 some notes about the internals of PCRE
 | 
			
		||||
  INSTALL                 generic installation instructions
 | 
			
		||||
  LICENCE                 conditions for the use of PCRE
 | 
			
		||||
  COPYING                 the same, using GNU's standard name
 | 
			
		||||
  Makefile.in             ) template for Unix Makefile, which is built by
 | 
			
		||||
                          )   "configure"
 | 
			
		||||
  Makefile.am             ) the automake input that was used to create
 | 
			
		||||
                          )   Makefile.in
 | 
			
		||||
  NEWS                    important changes in this release
 | 
			
		||||
  NON-UNIX-USE            the previous name for NON-AUTOTOOLS-BUILD
 | 
			
		||||
  NON-AUTOTOOLS-BUILD     notes on building PCRE without using autotools
 | 
			
		||||
  PrepareRelease          script to make preparations for "make dist"
 | 
			
		||||
  README                  this file
 | 
			
		||||
  RunTest                 a Unix shell script for running tests
 | 
			
		||||
  RunGrepTest             a Unix shell script for pcregrep tests
 | 
			
		||||
  aclocal.m4              m4 macros (generated by "aclocal")
 | 
			
		||||
  config.guess            ) files used by libtool,
 | 
			
		||||
  config.sub              )   used only when building a shared library
 | 
			
		||||
  configure               a configuring shell script (built by autoconf)
 | 
			
		||||
  configure.ac            ) the autoconf input that was used to build
 | 
			
		||||
                          )   "configure" and config.h
 | 
			
		||||
  depcomp                 ) script to find program dependencies, generated by
 | 
			
		||||
                          )   automake
 | 
			
		||||
  doc/*.3                 man page sources for PCRE
 | 
			
		||||
  doc/*.1                 man page sources for pcregrep and pcretest
 | 
			
		||||
  doc/index.html.src      the base HTML page
 | 
			
		||||
  doc/html/*              HTML documentation
 | 
			
		||||
  doc/pcre.txt            plain text version of the man pages
 | 
			
		||||
  doc/pcretest.txt        plain text documentation of test program
 | 
			
		||||
  doc/perltest.txt        plain text documentation of Perl test program
 | 
			
		||||
  install-sh              a shell script for installing files
 | 
			
		||||
  libpcre16.pc.in         template for libpcre16.pc for pkg-config
 | 
			
		||||
  libpcre32.pc.in         template for libpcre32.pc for pkg-config
 | 
			
		||||
  libpcre.pc.in           template for libpcre.pc for pkg-config
 | 
			
		||||
  libpcreposix.pc.in      template for libpcreposix.pc for pkg-config
 | 
			
		||||
  libpcrecpp.pc.in        template for libpcrecpp.pc for pkg-config
 | 
			
		||||
  ltmain.sh               file used to build a libtool script
 | 
			
		||||
  missing                 ) common stub for a few missing GNU programs while
 | 
			
		||||
                          )   installing, generated by automake
 | 
			
		||||
  mkinstalldirs           script for making install directories
 | 
			
		||||
  perltest.pl             Perl test program
 | 
			
		||||
  pcre-config.in          source of script which retains PCRE information
 | 
			
		||||
  pcre_jit_test.c         test program for the JIT compiler
 | 
			
		||||
  pcrecpp_unittest.cc          )
 | 
			
		||||
  pcre_scanner_unittest.cc     ) test programs for the C++ wrapper
 | 
			
		||||
  pcre_stringpiece_unittest.cc )
 | 
			
		||||
  testdata/testinput*     test data for main library tests
 | 
			
		||||
  testdata/testoutput*    expected test results
 | 
			
		||||
  testdata/grep*          input and output for pcregrep tests
 | 
			
		||||
  testdata/*              other supporting test files
 | 
			
		||||
 | 
			
		||||
(D) Auxiliary files for cmake support
 | 
			
		||||
 | 
			
		||||
  cmake/COPYING-CMAKE-SCRIPTS
 | 
			
		||||
  cmake/FindPackageHandleStandardArgs.cmake
 | 
			
		||||
  cmake/FindEditline.cmake
 | 
			
		||||
  cmake/FindReadline.cmake
 | 
			
		||||
  CMakeLists.txt
 | 
			
		||||
  config-cmake.h.in
 | 
			
		||||
 | 
			
		||||
(E) Auxiliary files for VPASCAL
 | 
			
		||||
 | 
			
		||||
  makevp.bat
 | 
			
		||||
  makevp_c.txt
 | 
			
		||||
  makevp_l.txt
 | 
			
		||||
  pcregexp.pas
 | 
			
		||||
 | 
			
		||||
(F) Auxiliary files for building PCRE "by hand"
 | 
			
		||||
 | 
			
		||||
  pcre.h.generic          ) a version of the public PCRE header file
 | 
			
		||||
                          )   for use in non-"configure" environments
 | 
			
		||||
  config.h.generic        ) a version of config.h for use in non-"configure"
 | 
			
		||||
                          )   environments
 | 
			
		||||
 | 
			
		||||
(F) Miscellaneous
 | 
			
		||||
 | 
			
		||||
  RunTest.bat            a script for running tests under Windows
 | 
			
		||||
 | 
			
		||||
Philip Hazel
 | 
			
		||||
Email local part: ph10
 | 
			
		||||
Email domain: cam.ac.uk
 | 
			
		||||
Last updated: 17 January 2014
 | 
			
		||||
							
								
								
									
										571
									
								
								tools/pcre/RunGrepTest
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										571
									
								
								tools/pcre/RunGrepTest
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,571 @@
 | 
			
		||||
#! /bin/sh
 | 
			
		||||
 | 
			
		||||
# Run pcregrep tests. The assumption is that the PCRE tests check the library
 | 
			
		||||
# itself. What we are checking here is the file handling and options that are
 | 
			
		||||
# supported by pcregrep. This script must be run in the build directory.
 | 
			
		||||
 | 
			
		||||
# Set the C locale, so that sort(1) behaves predictably.
 | 
			
		||||
 | 
			
		||||
LC_ALL=C
 | 
			
		||||
export LC_ALL
 | 
			
		||||
 | 
			
		||||
# Remove any non-default colouring and aliases that the caller may have set.
 | 
			
		||||
 | 
			
		||||
unset PCREGREP_COLOUR PCREGREP_COLOR
 | 
			
		||||
unset cp ls mv rm
 | 
			
		||||
 | 
			
		||||
# Remember the current (build) directory, set the program to be tested, and
 | 
			
		||||
# valgrind settings when requested.
 | 
			
		||||
 | 
			
		||||
builddir=`pwd`
 | 
			
		||||
pcregrep=$builddir/pcregrep
 | 
			
		||||
 | 
			
		||||
valgrind=
 | 
			
		||||
while [ $# -gt 0 ] ; do
 | 
			
		||||
  case $1 in
 | 
			
		||||
    valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all";;
 | 
			
		||||
    *) echo "RunGrepTest: Unknown argument $1"; exit 1;;
 | 
			
		||||
  esac
 | 
			
		||||
  shift
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
echo " "
 | 
			
		||||
pcregrep_version=`$pcregrep -V`
 | 
			
		||||
if [ "$valgrind" = "" ] ; then
 | 
			
		||||
  echo "Testing $pcregrep_version"
 | 
			
		||||
else
 | 
			
		||||
  echo "Testing $pcregrep_version using valgrind"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
# Set up a suitable "diff" command for comparison. Some systems have a diff
 | 
			
		||||
# that lacks a -u option. Try to deal with this; better do the test for the -b
 | 
			
		||||
# option as well.
 | 
			
		||||
 | 
			
		||||
cf="diff"
 | 
			
		||||
diff -b  /dev/null /dev/null 2>/dev/null && cf="diff -b"
 | 
			
		||||
diff -u  /dev/null /dev/null 2>/dev/null && cf="diff -u"
 | 
			
		||||
diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"
 | 
			
		||||
 | 
			
		||||
# If this test is being run from "make check", $srcdir will be set. If not, set
 | 
			
		||||
# it to the current or parent directory, whichever one contains the test data.
 | 
			
		||||
# Subsequently, we run most of the pcregrep tests in the source directory so
 | 
			
		||||
# that the file names in the output are always the same.
 | 
			
		||||
 | 
			
		||||
if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then
 | 
			
		||||
  if [ -d "./testdata" ] ; then
 | 
			
		||||
    srcdir=.
 | 
			
		||||
  elif [ -d "../testdata" ] ; then
 | 
			
		||||
    srcdir=..
 | 
			
		||||
  else
 | 
			
		||||
    echo "Cannot find the testdata directory"
 | 
			
		||||
    exit 1
 | 
			
		||||
  fi
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
# Check for the availability of UTF-8 support
 | 
			
		||||
 | 
			
		||||
./pcretest -C utf >/dev/null
 | 
			
		||||
utf8=$?
 | 
			
		||||
 | 
			
		||||
echo "Testing pcregrep main features"
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 1 ------------------------------" >testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep PATTERN ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 2 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep '^PATTERN' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 3 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 4 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -ic PATTERN ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 5 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 6 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 7 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 8 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 9 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 10 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 11 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -vn pattern ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 12 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -ix pattern ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 13 -----------------------------" >>testtrygrep
 | 
			
		||||
echo seventeen >testtemp1grep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist -f $builddir/testtemp1grep ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 14 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 15 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep 'abc^*' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 16 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtrygrep >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 17 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -M 'the\noutput' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 18 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 19 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -Mix 'Pattern' ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 20 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 21 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -nA3 'four' ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 22 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -nB3 'four' ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 23 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -C3 'four' ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 24 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -A9 'four' ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 25 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -nB9 'four' ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 26 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -A9 -B9 'four' ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 27 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -A10 'four' ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 28 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -nB10 'four' ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 29 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -C12 -B10 'four' ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 30 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 31 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 32 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 33 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 34 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -s 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 35 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 36 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 37 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep  '^(a+)*\d' ./testdata/grepinput) >>testtrygrep 2>teststderrgrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
echo "======== STDERR ========" >>testtrygrep
 | 
			
		||||
cat teststderrgrep >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 38 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep '>\x00<' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 39 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -A1 'before the binary zero' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 40 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -B1 'after the binary zero' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 41 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 42 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 43 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -on 'before|zero|after' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 44 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -on -e before -ezero -e after ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 45 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 46 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 47 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -Fx "AB.VE
 | 
			
		||||
elephant" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 48 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -F "AB.VE
 | 
			
		||||
elephant" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 49 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -F -e DATA -e "AB.VE
 | 
			
		||||
elephant" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 50 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 51 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -Mv "brown\sfox" ./testdata/grepinputv) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 52 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --colour=always jumps ./testdata/grepinputv) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 53 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 54 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 55 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 56 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -c lazy ./testdata/grepinput*) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 57 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -c -l lazy ./testdata/grepinput*) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 58 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --regex=PATTERN ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 59 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --regexp=PATTERN ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 60 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --regex PATTERN ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 61 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --regexp PATTERN ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 62 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 63 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 64 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 65 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 66 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 67 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 68 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 69 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -vn --colour=always pattern ./testdata/grepinputx) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 70 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 71 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 72 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 73 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 74 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -o "^01|02|^03" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 75 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --color=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 76 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 77 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|03" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 78 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 79 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 80 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -o "\b01|\b02" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 81 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 82 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 83 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 84 -----------------------------" >>testtrygrep
 | 
			
		||||
echo testdata/grepinput3 >testtemp1grep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1grep "fox|complete|t7") >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 85 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 86 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 87 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 88 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -v "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 89 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -I "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 90 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 91 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -a "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 92 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --binary-files=text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 93 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 94 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 95 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 96 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 97 -----------------------------" >>testtrygrep
 | 
			
		||||
echo "grepinput$" >testtemp1grep
 | 
			
		||||
echo "grepinput8" >>testtemp1grep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 98 -----------------------------" >>testtrygrep
 | 
			
		||||
echo "grepinput$" >testtemp1grep
 | 
			
		||||
echo "grepinput8" >>testtemp1grep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 99 -----------------------------" >>testtrygrep
 | 
			
		||||
echo "grepinput$" >testtemp1grep
 | 
			
		||||
echo "grepinput8" >testtemp2grep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -L -r --include grepinput --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 100 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 101 ------------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 102 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -n "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 103 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 104 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep -n --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 105 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; $valgrind $pcregrep --colour=always "ipsum|" ./testdata/grepinput3) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
echo "---------------------------- Test 106 -----------------------------" >>testtrygrep
 | 
			
		||||
(cd $srcdir; echo "a" | $valgrind $pcregrep -M "|a" ) >>testtrygrep 2>&1
 | 
			
		||||
echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Now compare the results.
 | 
			
		||||
 | 
			
		||||
$cf $srcdir/testdata/grepoutput testtrygrep
 | 
			
		||||
if [ $? != 0 ] ; then exit 1; fi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# These tests require UTF-8 support
 | 
			
		||||
 | 
			
		||||
if [ $utf8 -ne 0 ] ; then
 | 
			
		||||
  echo "Testing pcregrep UTF-8 features"
 | 
			
		||||
 | 
			
		||||
  echo "---------------------------- Test U1 ------------------------------" >testtrygrep
 | 
			
		||||
  (cd $srcdir; $valgrind $pcregrep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtrygrep
 | 
			
		||||
  echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
  echo "---------------------------- Test U2 ------------------------------" >>testtrygrep
 | 
			
		||||
  (cd $srcdir; $valgrind $pcregrep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep
 | 
			
		||||
  echo "RC=$?" >>testtrygrep
 | 
			
		||||
 | 
			
		||||
  $cf $srcdir/testdata/grepoutput8 testtrygrep
 | 
			
		||||
  if [ $? != 0 ] ; then exit 1; fi
 | 
			
		||||
 | 
			
		||||
else
 | 
			
		||||
  echo "Skipping pcregrep UTF-8 tests: no UTF-8 support in PCRE library"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# We go to some contortions to try to ensure that the tests for the various
 | 
			
		||||
# newline settings will work in environments where the normal newline sequence
 | 
			
		||||
# is not \n. Do not use exported files, whose line endings might be changed.
 | 
			
		||||
# Instead, create an input file using printf so that its contents are exactly
 | 
			
		||||
# what we want. Note the messy fudge to get printf to write a string that
 | 
			
		||||
# starts with a hyphen. These tests are run in the build directory.
 | 
			
		||||
 | 
			
		||||
echo "Testing pcregrep newline settings"
 | 
			
		||||
printf "abc\rdef\r\nghi\njkl" >testNinputgrep
 | 
			
		||||
 | 
			
		||||
printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtrygrep
 | 
			
		||||
$valgrind $pcregrep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
 | 
			
		||||
 | 
			
		||||
printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtrygrep
 | 
			
		||||
$valgrind $pcregrep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
 | 
			
		||||
 | 
			
		||||
printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtrygrep
 | 
			
		||||
pattern=`printf 'def\rjkl'`
 | 
			
		||||
$valgrind $pcregrep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep
 | 
			
		||||
 | 
			
		||||
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtrygrep
 | 
			
		||||
$valgrind $pcregrep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep
 | 
			
		||||
 | 
			
		||||
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtrygrep
 | 
			
		||||
$valgrind $pcregrep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
 | 
			
		||||
 | 
			
		||||
printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtrygrep
 | 
			
		||||
$valgrind $pcregrep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
 | 
			
		||||
 | 
			
		||||
$cf $srcdir/testdata/grepoutputN testtrygrep
 | 
			
		||||
if [ $? != 0 ] ; then exit 1; fi
 | 
			
		||||
 | 
			
		||||
exit 0
 | 
			
		||||
 | 
			
		||||
# End
 | 
			
		||||
							
								
								
									
										1010
									
								
								tools/pcre/RunTest
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1010
									
								
								tools/pcre/RunTest
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										616
									
								
								tools/pcre/RunTest.bat
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										616
									
								
								tools/pcre/RunTest.bat
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,616 @@
 | 
			
		||||
@echo off
 | 
			
		||||
@rem This file must use CRLF linebreaks to function properly
 | 
			
		||||
@rem and requires both pcretest and pcregrep
 | 
			
		||||
@rem  This file was originally contributed by Ralf Junker, and touched up by
 | 
			
		||||
@rem  Daniel Richard G. Tests 10-12 added by Philip H.
 | 
			
		||||
@rem  Philip H also changed test 3 to use "wintest" files.
 | 
			
		||||
@rem
 | 
			
		||||
@rem  Updated by Tom Fortmann to support explicit test numbers on the command line.
 | 
			
		||||
@rem  Added argument validation and added error reporting.
 | 
			
		||||
@rem
 | 
			
		||||
@rem  MS Windows batch file to run pcretest on testfiles with the correct
 | 
			
		||||
@rem  options.
 | 
			
		||||
@rem
 | 
			
		||||
@rem Sheri Pierce added logic to skip feature dependent tests
 | 
			
		||||
@rem tests 4 5 9 15 and 18 require utf support
 | 
			
		||||
@rem tests 6 7 10 16 and 19 require ucp support
 | 
			
		||||
@rem 11 requires ucp and link size 2
 | 
			
		||||
@rem 12 requires presence of jit support
 | 
			
		||||
@rem 13 requires absence of jit support
 | 
			
		||||
@rem Sheri P also added override tests for study and jit testing
 | 
			
		||||
@rem Zoltan Herczeg added libpcre16 support
 | 
			
		||||
@rem Zoltan Herczeg added libpcre32 support
 | 
			
		||||
 | 
			
		||||
setlocal enabledelayedexpansion
 | 
			
		||||
if [%srcdir%]==[] (
 | 
			
		||||
if exist testdata\ set srcdir=.)
 | 
			
		||||
if [%srcdir%]==[] (
 | 
			
		||||
if exist ..\testdata\ set srcdir=..)
 | 
			
		||||
if [%srcdir%]==[] (
 | 
			
		||||
if exist ..\..\testdata\ set srcdir=..\..)
 | 
			
		||||
if NOT exist %srcdir%\testdata\ (
 | 
			
		||||
Error: echo distribution testdata folder not found!
 | 
			
		||||
call :conferror
 | 
			
		||||
exit /b 1
 | 
			
		||||
goto :eof
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
if [%pcretest%]==[] set pcretest=.\pcretest.exe
 | 
			
		||||
 | 
			
		||||
echo source dir is %srcdir%
 | 
			
		||||
echo pcretest=%pcretest%
 | 
			
		||||
 | 
			
		||||
if NOT exist %pcretest% (
 | 
			
		||||
echo Error: %pcretest% not found!
 | 
			
		||||
echo.
 | 
			
		||||
call :conferror
 | 
			
		||||
exit /b 1
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
%pcretest% -C linksize >NUL
 | 
			
		||||
set link_size=%ERRORLEVEL%
 | 
			
		||||
%pcretest% -C pcre8 >NUL
 | 
			
		||||
set support8=%ERRORLEVEL%
 | 
			
		||||
%pcretest% -C pcre16 >NUL
 | 
			
		||||
set support16=%ERRORLEVEL%
 | 
			
		||||
%pcretest% -C pcre32 >NUL
 | 
			
		||||
set support32=%ERRORLEVEL%
 | 
			
		||||
%pcretest% -C utf >NUL
 | 
			
		||||
set utf=%ERRORLEVEL%
 | 
			
		||||
%pcretest% -C ucp >NUL
 | 
			
		||||
set ucp=%ERRORLEVEL%
 | 
			
		||||
%pcretest% -C jit >NUL
 | 
			
		||||
set jit=%ERRORLEVEL%
 | 
			
		||||
 | 
			
		||||
if %support8% EQU 1 (
 | 
			
		||||
if not exist testout8 md testout8
 | 
			
		||||
if not exist testoutstudy8 md testoutstudy8
 | 
			
		||||
if not exist testoutjit8 md testoutjit8
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
if %support16% EQU 1 (
 | 
			
		||||
if not exist testout16 md testout16
 | 
			
		||||
if not exist testoutstudy16 md testoutstudy16
 | 
			
		||||
if not exist testoutjit16 md testoutjit16
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
if %support16% EQU 1 (
 | 
			
		||||
if not exist testout32 md testout32
 | 
			
		||||
if not exist testoutstudy32 md testoutstudy32
 | 
			
		||||
if not exist testoutjit32 md testoutjit32
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
set do1=no
 | 
			
		||||
set do2=no
 | 
			
		||||
set do3=no
 | 
			
		||||
set do4=no
 | 
			
		||||
set do5=no
 | 
			
		||||
set do6=no
 | 
			
		||||
set do7=no
 | 
			
		||||
set do8=no
 | 
			
		||||
set do9=no
 | 
			
		||||
set do10=no
 | 
			
		||||
set do11=no
 | 
			
		||||
set do12=no
 | 
			
		||||
set do13=no
 | 
			
		||||
set do14=no
 | 
			
		||||
set do15=no
 | 
			
		||||
set do16=no
 | 
			
		||||
set do17=no
 | 
			
		||||
set do18=no
 | 
			
		||||
set do19=no
 | 
			
		||||
set do20=no
 | 
			
		||||
set do21=no
 | 
			
		||||
set do22=no
 | 
			
		||||
set do23=no
 | 
			
		||||
set do24=no
 | 
			
		||||
set do25=no
 | 
			
		||||
set do26=no
 | 
			
		||||
set all=yes
 | 
			
		||||
 | 
			
		||||
for %%a in (%*) do (
 | 
			
		||||
  set valid=no
 | 
			
		||||
  for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26) do if %%v == %%a set valid=yes
 | 
			
		||||
  if "!valid!" == "yes" (
 | 
			
		||||
    set do%%a=yes
 | 
			
		||||
    set all=no
 | 
			
		||||
) else (
 | 
			
		||||
    echo Invalid test number - %%a!
 | 
			
		||||
        echo Usage %0 [ test_number ] ...
 | 
			
		||||
        echo Where test_number is one or more optional test numbers 1 through 26, default is all tests.
 | 
			
		||||
        exit /b 1
 | 
			
		||||
)
 | 
			
		||||
)
 | 
			
		||||
set failed="no"
 | 
			
		||||
 | 
			
		||||
if "%all%" == "yes" (
 | 
			
		||||
  set do1=yes
 | 
			
		||||
  set do2=yes
 | 
			
		||||
  set do3=yes
 | 
			
		||||
  set do4=yes
 | 
			
		||||
  set do5=yes
 | 
			
		||||
  set do6=yes
 | 
			
		||||
  set do7=yes
 | 
			
		||||
  set do8=yes
 | 
			
		||||
  set do9=yes
 | 
			
		||||
  set do10=yes
 | 
			
		||||
  set do11=yes
 | 
			
		||||
  set do12=yes
 | 
			
		||||
  set do13=yes
 | 
			
		||||
  set do14=yes
 | 
			
		||||
  set do15=yes
 | 
			
		||||
  set do16=yes
 | 
			
		||||
  set do17=yes
 | 
			
		||||
  set do18=yes
 | 
			
		||||
  set do19=yes
 | 
			
		||||
  set do20=yes
 | 
			
		||||
  set do21=yes
 | 
			
		||||
  set do22=yes
 | 
			
		||||
  set do23=yes
 | 
			
		||||
  set do24=yes
 | 
			
		||||
  set do25=yes
 | 
			
		||||
  set do26=yes
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@echo RunTest.bat's pcretest output is written to newly created subfolders named
 | 
			
		||||
@echo testout, testoutstudy and testoutjit.
 | 
			
		||||
@echo.
 | 
			
		||||
 | 
			
		||||
set mode=
 | 
			
		||||
set bits=8
 | 
			
		||||
 | 
			
		||||
:nextMode
 | 
			
		||||
if "%mode%" == "" (
 | 
			
		||||
  if %support8% EQU 0 goto modeSkip
 | 
			
		||||
  echo.
 | 
			
		||||
  echo ---- Testing 8-bit library ----
 | 
			
		||||
  echo.
 | 
			
		||||
)
 | 
			
		||||
if "%mode%" == "-16" (
 | 
			
		||||
  if %support16% EQU 0 goto modeSkip
 | 
			
		||||
  echo.
 | 
			
		||||
  echo ---- Testing 16-bit library ----
 | 
			
		||||
  echo.
 | 
			
		||||
)
 | 
			
		||||
if "%mode%" == "-32" (
 | 
			
		||||
  if %support32% EQU 0 goto modeSkip
 | 
			
		||||
  echo.
 | 
			
		||||
  echo ---- Testing 32-bit library ----
 | 
			
		||||
  echo.
 | 
			
		||||
)
 | 
			
		||||
if "%do1%" == "yes" call :do1
 | 
			
		||||
if "%do2%" == "yes" call :do2
 | 
			
		||||
if "%do3%" == "yes" call :do3
 | 
			
		||||
if "%do4%" == "yes" call :do4
 | 
			
		||||
if "%do5%" == "yes" call :do5
 | 
			
		||||
if "%do6%" == "yes" call :do6
 | 
			
		||||
if "%do7%" == "yes" call :do7
 | 
			
		||||
if "%do8%" == "yes" call :do8
 | 
			
		||||
if "%do9%" == "yes" call :do9
 | 
			
		||||
if "%do10%" == "yes" call :do10
 | 
			
		||||
if "%do11%" == "yes" call :do11
 | 
			
		||||
if "%do12%" == "yes" call :do12
 | 
			
		||||
if "%do13%" == "yes" call :do13
 | 
			
		||||
if "%do14%" == "yes" call :do14
 | 
			
		||||
if "%do15%" == "yes" call :do15
 | 
			
		||||
if "%do16%" == "yes" call :do16
 | 
			
		||||
if "%do17%" == "yes" call :do17
 | 
			
		||||
if "%do18%" == "yes" call :do18
 | 
			
		||||
if "%do19%" == "yes" call :do19
 | 
			
		||||
if "%do20%" == "yes" call :do20
 | 
			
		||||
if "%do21%" == "yes" call :do21
 | 
			
		||||
if "%do22%" == "yes" call :do22
 | 
			
		||||
if "%do23%" == "yes" call :do23
 | 
			
		||||
if "%do24%" == "yes" call :do24
 | 
			
		||||
if "%do25%" == "yes" call :do25
 | 
			
		||||
if "%do26%" == "yes" call :do26
 | 
			
		||||
:modeSkip
 | 
			
		||||
if "%mode%" == "" (
 | 
			
		||||
  set mode=-16
 | 
			
		||||
  set bits=16
 | 
			
		||||
  goto nextMode
 | 
			
		||||
)
 | 
			
		||||
if "%mode%" == "-16" (
 | 
			
		||||
  set mode=-32
 | 
			
		||||
  set bits=32
 | 
			
		||||
  goto nextMode
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@rem If mode is -32, testing is finished
 | 
			
		||||
if %failed% == "yes" (
 | 
			
		||||
echo In above output, one or more of the various tests failed!
 | 
			
		||||
exit /b 1
 | 
			
		||||
)
 | 
			
		||||
echo All OK
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:runsub
 | 
			
		||||
@rem Function to execute pcretest and compare the output
 | 
			
		||||
@rem Arguments are as follows:
 | 
			
		||||
@rem
 | 
			
		||||
@rem       1 = test number
 | 
			
		||||
@rem       2 = outputdir
 | 
			
		||||
@rem       3 = test name use double quotes
 | 
			
		||||
@rem   4 - 9 = pcretest options
 | 
			
		||||
 | 
			
		||||
if [%1] == [] (
 | 
			
		||||
  echo Missing test number argument!
 | 
			
		||||
  exit /b 1
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
if [%2] == [] (
 | 
			
		||||
  echo Missing outputdir!
 | 
			
		||||
  exit /b 1
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
if [%3] == [] (
 | 
			
		||||
  echo Missing test name argument!
 | 
			
		||||
  exit /b 1
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
set testinput=testinput%1
 | 
			
		||||
set testoutput=testoutput%1
 | 
			
		||||
if exist %srcdir%\testdata\win%testinput% (
 | 
			
		||||
  set testinput=wintestinput%1
 | 
			
		||||
  set testoutput=wintestoutput%1
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
echo Test %1: %3
 | 
			
		||||
%pcretest% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% >%2%bits%\%testoutput%
 | 
			
		||||
if errorlevel 1 (
 | 
			
		||||
  echo.          failed executing command-line:
 | 
			
		||||
  echo.            %pcretest% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% ^>%2%bits%\%testoutput%
 | 
			
		||||
  set failed="yes"
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
set type=
 | 
			
		||||
if [%1]==[11] (
 | 
			
		||||
  set type=-%bits%
 | 
			
		||||
)
 | 
			
		||||
if [%1]==[18] (
 | 
			
		||||
  set type=-%bits%
 | 
			
		||||
)
 | 
			
		||||
if [%1]==[21] (
 | 
			
		||||
  set type=-%bits%
 | 
			
		||||
)
 | 
			
		||||
if [%1]==[22] (
 | 
			
		||||
  set type=-%bits%
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput% >NUL
 | 
			
		||||
 | 
			
		||||
if errorlevel 1 (
 | 
			
		||||
  echo.          failed comparison: fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput%
 | 
			
		||||
  if [%1]==[2] (
 | 
			
		||||
    echo.
 | 
			
		||||
    echo ** Test 2 requires a lot of stack. PCRE can be configured to
 | 
			
		||||
    echo ** use heap for recursion. Otherwise, to pass Test 2
 | 
			
		||||
    echo ** you generally need to allocate 8 mb stack to PCRE.
 | 
			
		||||
    echo ** See the 'pcrestack' page for a discussion of PCRE's
 | 
			
		||||
    echo ** stack usage.
 | 
			
		||||
    echo.
 | 
			
		||||
)
 | 
			
		||||
  if [%1]==[3] (
 | 
			
		||||
    echo.
 | 
			
		||||
    echo ** Test 3 failure usually means french locale is not
 | 
			
		||||
    echo ** available on the system, rather than a bug or problem with PCRE.
 | 
			
		||||
    echo.
 | 
			
		||||
    goto :eof
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
  set failed="yes"
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
echo.          Passed.
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do1
 | 
			
		||||
call :runsub 1 testout "Main functionality (Compatible with Perl >= 5.10)" -q
 | 
			
		||||
call :runsub 1 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
if %jit% EQU 1 call :runsub 1 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do2
 | 
			
		||||
  call :runsub 2 testout "API, errors, internals, and non-Perl stuff" -q
 | 
			
		||||
  call :runsub 2 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
  if %jit% EQU 1 call :runsub 2 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do3
 | 
			
		||||
  call :runsub 3 testout "Locale-specific features" -q
 | 
			
		||||
  call :runsub 3 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
  if %jit% EQU 1 call :runsub 3 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do4
 | 
			
		||||
if %utf% EQU 0 (
 | 
			
		||||
  echo Test 4 Skipped due to absence of UTF-%bits% support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 4 testout "UTF-%bits% support - (Compatible with Perl >= 5.10)" -q
 | 
			
		||||
  call :runsub 4 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
  if %jit% EQU 1 call :runsub 4 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do5
 | 
			
		||||
if %utf% EQU 0 (
 | 
			
		||||
  echo Test 5 Skipped due to absence of UTF-%bits% support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 5 testout "API, internals, and non-Perl stuff for UTF-%bits%" -q
 | 
			
		||||
  call :runsub 5 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
  if %jit% EQU 1 call :runsub 5 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do6
 | 
			
		||||
if %ucp% EQU 0 (
 | 
			
		||||
  echo Test 6 Skipped due to absence of Unicode property support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 6 testout "Unicode property support (Compatible with Perl >= 5.10)" -q
 | 
			
		||||
  call :runsub 6 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
  if %jit% EQU 1 call :runsub 6 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do7
 | 
			
		||||
if %ucp% EQU 0 (
 | 
			
		||||
  echo Test 7 Skipped due to absence of Unicode property support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 7 testout "API, internals, and non-Perl stuff for Unicode property support" -q
 | 
			
		||||
  call :runsub 7 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
  if %jit% EQU 1 call :runsub 7 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do8
 | 
			
		||||
  call :runsub 8 testout "DFA matching main functionality" -q -dfa
 | 
			
		||||
  call :runsub 8 testoutstudy "Test with Study Override" -q -dfa -s
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do9
 | 
			
		||||
if %utf% EQU 0 (
 | 
			
		||||
  echo Test 9 Skipped due to absence of UTF-%bits% support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 9 testout "DFA matching with UTF-%bits%" -q -dfa
 | 
			
		||||
  call :runsub 9 testoutstudy "Test with Study Override" -q -dfa -s
 | 
			
		||||
  goto :eof
 | 
			
		||||
 | 
			
		||||
:do10
 | 
			
		||||
if %ucp% EQU 0 (
 | 
			
		||||
  echo Test 10 Skipped due to absence of Unicode property support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 10 testout "DFA matching with Unicode properties" -q -dfa
 | 
			
		||||
  call :runsub 10 testoutstudy "Test with Study Override" -q -dfa -s
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do11
 | 
			
		||||
if NOT %link_size% EQU 2 (
 | 
			
		||||
  echo Test 11 Skipped because link size is not 2.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
if %ucp% EQU 0 (
 | 
			
		||||
  echo Test 11 Skipped due to absence of Unicode property support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 11 testout "Internal offsets and code size tests" -q
 | 
			
		||||
  call :runsub 11 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do12
 | 
			
		||||
if %jit% EQU 0 (
 | 
			
		||||
  echo Test 12 Skipped due to absence of JIT support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 12 testout "JIT-specific features (JIT available)" -q
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do13
 | 
			
		||||
if %jit% EQU 1 (
 | 
			
		||||
  echo Test 13 Skipped due to presence of JIT support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 13 testout "JIT-specific features (JIT not available)" -q
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do14
 | 
			
		||||
if NOT %bits% EQU 8 (
 | 
			
		||||
  echo Test 14 Skipped when running 16/32-bit tests.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  copy /Y %srcdir%\testdata\saved16 testsaved16
 | 
			
		||||
  copy /Y %srcdir%\testdata\saved32 testsaved32
 | 
			
		||||
  call :runsub 14 testout "Specials for the basic 8-bit library" -q
 | 
			
		||||
  call :runsub 14 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
  if %jit% EQU 1 call :runsub 14 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do15
 | 
			
		||||
if NOT %bits% EQU 8 (
 | 
			
		||||
  echo Test 15 Skipped when running 16/32-bit tests.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
if %utf% EQU 0 (
 | 
			
		||||
  echo Test 15 Skipped due to absence of UTF-%bits% support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 15 testout "Specials for the 8-bit library with UTF-%bits% support" -q
 | 
			
		||||
  call :runsub 15 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
  if %jit% EQU 1 call :runsub 15 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do16
 | 
			
		||||
if NOT %bits% EQU 8 (
 | 
			
		||||
  echo Test 16 Skipped when running 16/32-bit tests.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
if %ucp% EQU 0 (
 | 
			
		||||
  echo Test 16 Skipped due to absence of Unicode property support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 16 testout "Specials for the 8-bit library with Unicode propery support" -q
 | 
			
		||||
  call :runsub 16 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
  if %jit% EQU 1 call :runsub 16 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do17
 | 
			
		||||
if %bits% EQU 8 (
 | 
			
		||||
  echo Test 17 Skipped when running 8-bit tests.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 17 testout "Specials for the basic 16/32-bit library" -q
 | 
			
		||||
  call :runsub 17 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
  if %jit% EQU 1 call :runsub 17 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do18
 | 
			
		||||
if %bits% EQU 8 (
 | 
			
		||||
  echo Test 18 Skipped when running 8-bit tests.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
if %utf% EQU 0 (
 | 
			
		||||
  echo Test 18 Skipped due to absence of UTF-%bits% support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 18 testout "Specials for the 16/32-bit library with UTF-%bits% support" -q
 | 
			
		||||
  call :runsub 18 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
  if %jit% EQU 1 call :runsub 18 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do19
 | 
			
		||||
if %bits% EQU 8 (
 | 
			
		||||
  echo Test 19 Skipped when running 8-bit tests.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
if %ucp% EQU 0 (
 | 
			
		||||
  echo Test 19 Skipped due to absence of Unicode property support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 19 testout "Specials for the 16/32-bit library with Unicode property support" -q
 | 
			
		||||
  call :runsub 19 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
  if %jit% EQU 1 call :runsub 19 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do20
 | 
			
		||||
if %bits% EQU 8 (
 | 
			
		||||
  echo Test 20 Skipped when running 8-bit tests.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
  call :runsub 20 testout "DFA specials for the basic 16/32-bit library" -q -dfa
 | 
			
		||||
  call :runsub 20 testoutstudy "Test with Study Override" -q -dfa -s
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do21
 | 
			
		||||
if %bits% EQU 8 (
 | 
			
		||||
  echo Test 21 Skipped when running 8-bit tests.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
if NOT %link_size% EQU 2 (
 | 
			
		||||
  echo Test 21 Skipped because link size is not 2.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
copy /Y %srcdir%\testdata\saved8 testsaved8
 | 
			
		||||
copy /Y %srcdir%\testdata\saved16LE-1 testsaved16LE-1
 | 
			
		||||
copy /Y %srcdir%\testdata\saved16BE-1 testsaved16BE-1
 | 
			
		||||
copy /Y %srcdir%\testdata\saved32LE-1 testsaved32LE-1
 | 
			
		||||
copy /Y %srcdir%\testdata\saved32BE-1 testsaved32BE-1
 | 
			
		||||
call :runsub 21 testout "Reloads for the basic 16/32-bit library" -q
 | 
			
		||||
call :runsub 21 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
if %jit% EQU 1 call :runsub 21 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do22
 | 
			
		||||
if %bits% EQU 8 (
 | 
			
		||||
  echo Test 22 Skipped when running 8-bit tests.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
if %utf% EQU 0 (
 | 
			
		||||
  echo Test 22 Skipped due to absence of UTF-%bits% support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
if NOT %link_size% EQU 2 (
 | 
			
		||||
  echo Test 22 Skipped because link size is not 2.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
copy /Y %srcdir%\testdata\saved16LE-2 testsaved16LE-2
 | 
			
		||||
copy /Y %srcdir%\testdata\saved16BE-2 testsaved16BE-2
 | 
			
		||||
copy /Y %srcdir%\testdata\saved32LE-2 testsaved32LE-2
 | 
			
		||||
copy /Y %srcdir%\testdata\saved32BE-2 testsaved32BE-2
 | 
			
		||||
call :runsub 22 testout "Reloads for the 16/32-bit library with UTF-16/32 support" -q
 | 
			
		||||
call :runsub 22 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
if %jit% EQU 1 call :runsub 22 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do23
 | 
			
		||||
if NOT %bits% EQU 16 (
 | 
			
		||||
  echo Test 23 Skipped when running 8/32-bit tests.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
call :runsub 23 testout "Specials for the 16-bit library" -q
 | 
			
		||||
call :runsub 23 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
if %jit% EQU 1 call :runsub 23 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do24
 | 
			
		||||
if NOT %bits% EQU 16 (
 | 
			
		||||
  echo Test 24 Skipped when running 8/32-bit tests.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
if %utf% EQU 0 (
 | 
			
		||||
  echo Test 24 Skipped due to absence of UTF-%bits% support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
call :runsub 24 testout "Specials for the 16-bit library with UTF-16 support" -q
 | 
			
		||||
call :runsub 24 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
if %jit% EQU 1 call :runsub 24 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do25
 | 
			
		||||
if NOT %bits% EQU 32 (
 | 
			
		||||
  echo Test 25 Skipped when running 8/16-bit tests.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
call :runsub 25 testout "Specials for the 32-bit library" -q
 | 
			
		||||
call :runsub 25 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
if %jit% EQU 1 call :runsub 25 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:do26
 | 
			
		||||
if NOT %bits% EQU 32 (
 | 
			
		||||
  echo Test 26 Skipped when running 8/16-bit tests.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
if %utf% EQU 0 (
 | 
			
		||||
  echo Test 26 Skipped due to absence of UTF-%bits% support.
 | 
			
		||||
  goto :eof
 | 
			
		||||
)
 | 
			
		||||
call :runsub 26 testout "Specials for the 32-bit library with UTF-32 support" -q
 | 
			
		||||
call :runsub 26 testoutstudy "Test with Study Override" -q -s
 | 
			
		||||
if %jit% EQU 1 call :runsub 26 testoutjit "Test with JIT Override" -q -s+
 | 
			
		||||
goto :eof
 | 
			
		||||
 | 
			
		||||
:conferror
 | 
			
		||||
@echo.
 | 
			
		||||
@echo Either your build is incomplete or you have a configuration error.
 | 
			
		||||
@echo.
 | 
			
		||||
@echo If configured with cmake and executed via "make test" or the MSVC "RUN_TESTS"
 | 
			
		||||
@echo project, pcre_test.bat defines variables and automatically calls RunTest.bat.
 | 
			
		||||
@echo For manual testing of all available features, after configuring with cmake
 | 
			
		||||
@echo and building, you can run the built pcre_test.bat. For best results with
 | 
			
		||||
@echo cmake builds and tests avoid directories with full path names that include
 | 
			
		||||
@echo spaces for source or build.
 | 
			
		||||
@echo.
 | 
			
		||||
@echo Otherwise, if the build dir is in a subdir of the source dir, testdata needed
 | 
			
		||||
@echo for input and verification should be found automatically when (from the
 | 
			
		||||
@echo location of the the built exes) you call RunTest.bat. By default RunTest.bat
 | 
			
		||||
@echo runs all tests compatible with the linked pcre library but it can be given
 | 
			
		||||
@echo a test number as an argument.
 | 
			
		||||
@echo.
 | 
			
		||||
@echo If the build dir is not under the source dir you can either copy your exes
 | 
			
		||||
@echo to the source folder or copy RunTest.bat and the testdata folder to the
 | 
			
		||||
@echo location of your built exes and then run RunTest.bat.
 | 
			
		||||
@echo.
 | 
			
		||||
goto :eof
 | 
			
		||||
							
								
								
									
										1431
									
								
								tools/pcre/aclocal.m4
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1431
									
								
								tools/pcre/aclocal.m4
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										270
									
								
								tools/pcre/ar-lib
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										270
									
								
								tools/pcre/ar-lib
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,270 @@
 | 
			
		||||
#! /bin/sh
 | 
			
		||||
# Wrapper for Microsoft lib.exe
 | 
			
		||||
 | 
			
		||||
me=ar-lib
 | 
			
		||||
scriptversion=2012-03-01.08; # UTC
 | 
			
		||||
 | 
			
		||||
# Copyright (C) 2010-2013 Free Software Foundation, Inc.
 | 
			
		||||
# Written by Peter Rosin <peda@lysator.liu.se>.
 | 
			
		||||
#
 | 
			
		||||
# This program is free software; you can redistribute it and/or modify
 | 
			
		||||
# it under the terms of the GNU General Public License as published by
 | 
			
		||||
# the Free Software Foundation; either version 2, or (at your option)
 | 
			
		||||
# any later version.
 | 
			
		||||
#
 | 
			
		||||
# This program is distributed in the hope that it will be useful,
 | 
			
		||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
# GNU General Public License for more details.
 | 
			
		||||
#
 | 
			
		||||
# You should have received a copy of the GNU General Public License
 | 
			
		||||
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
 | 
			
		||||
# As a special exception to the GNU General Public License, if you
 | 
			
		||||
# distribute this file as part of a program that contains a
 | 
			
		||||
# configuration script generated by Autoconf, you may include it under
 | 
			
		||||
# the same distribution terms that you use for the rest of that program.
 | 
			
		||||
 | 
			
		||||
# This file is maintained in Automake, please report
 | 
			
		||||
# bugs to <bug-automake@gnu.org> or send patches to
 | 
			
		||||
# <automake-patches@gnu.org>.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# func_error message
 | 
			
		||||
func_error ()
 | 
			
		||||
{
 | 
			
		||||
  echo "$me: $1" 1>&2
 | 
			
		||||
  exit 1
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
file_conv=
 | 
			
		||||
 | 
			
		||||
# func_file_conv build_file
 | 
			
		||||
# Convert a $build file to $host form and store it in $file
 | 
			
		||||
# Currently only supports Windows hosts.
 | 
			
		||||
func_file_conv ()
 | 
			
		||||
{
 | 
			
		||||
  file=$1
 | 
			
		||||
  case $file in
 | 
			
		||||
    / | /[!/]*) # absolute file, and not a UNC file
 | 
			
		||||
      if test -z "$file_conv"; then
 | 
			
		||||
	# lazily determine how to convert abs files
 | 
			
		||||
	case `uname -s` in
 | 
			
		||||
	  MINGW*)
 | 
			
		||||
	    file_conv=mingw
 | 
			
		||||
	    ;;
 | 
			
		||||
	  CYGWIN*)
 | 
			
		||||
	    file_conv=cygwin
 | 
			
		||||
	    ;;
 | 
			
		||||
	  *)
 | 
			
		||||
	    file_conv=wine
 | 
			
		||||
	    ;;
 | 
			
		||||
	esac
 | 
			
		||||
      fi
 | 
			
		||||
      case $file_conv in
 | 
			
		||||
	mingw)
 | 
			
		||||
	  file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
 | 
			
		||||
	  ;;
 | 
			
		||||
	cygwin)
 | 
			
		||||
	  file=`cygpath -m "$file" || echo "$file"`
 | 
			
		||||
	  ;;
 | 
			
		||||
	wine)
 | 
			
		||||
	  file=`winepath -w "$file" || echo "$file"`
 | 
			
		||||
	  ;;
 | 
			
		||||
      esac
 | 
			
		||||
      ;;
 | 
			
		||||
  esac
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# func_at_file at_file operation archive
 | 
			
		||||
# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE
 | 
			
		||||
# for each of them.
 | 
			
		||||
# When interpreting the content of the @FILE, do NOT use func_file_conv,
 | 
			
		||||
# since the user would need to supply preconverted file names to
 | 
			
		||||
# binutils ar, at least for MinGW.
 | 
			
		||||
func_at_file ()
 | 
			
		||||
{
 | 
			
		||||
  operation=$2
 | 
			
		||||
  archive=$3
 | 
			
		||||
  at_file_contents=`cat "$1"`
 | 
			
		||||
  eval set x "$at_file_contents"
 | 
			
		||||
  shift
 | 
			
		||||
 | 
			
		||||
  for member
 | 
			
		||||
  do
 | 
			
		||||
    $AR -NOLOGO $operation:"$member" "$archive" || exit $?
 | 
			
		||||
  done
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
case $1 in
 | 
			
		||||
  '')
 | 
			
		||||
     func_error "no command.  Try '$0 --help' for more information."
 | 
			
		||||
     ;;
 | 
			
		||||
  -h | --h*)
 | 
			
		||||
    cat <<EOF
 | 
			
		||||
Usage: $me [--help] [--version] PROGRAM ACTION ARCHIVE [MEMBER...]
 | 
			
		||||
 | 
			
		||||
Members may be specified in a file named with @FILE.
 | 
			
		||||
EOF
 | 
			
		||||
    exit $?
 | 
			
		||||
    ;;
 | 
			
		||||
  -v | --v*)
 | 
			
		||||
    echo "$me, version $scriptversion"
 | 
			
		||||
    exit $?
 | 
			
		||||
    ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
if test $# -lt 3; then
 | 
			
		||||
  func_error "you must specify a program, an action and an archive"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
AR=$1
 | 
			
		||||
shift
 | 
			
		||||
while :
 | 
			
		||||
do
 | 
			
		||||
  if test $# -lt 2; then
 | 
			
		||||
    func_error "you must specify a program, an action and an archive"
 | 
			
		||||
  fi
 | 
			
		||||
  case $1 in
 | 
			
		||||
    -lib | -LIB \
 | 
			
		||||
    | -ltcg | -LTCG \
 | 
			
		||||
    | -machine* | -MACHINE* \
 | 
			
		||||
    | -subsystem* | -SUBSYSTEM* \
 | 
			
		||||
    | -verbose | -VERBOSE \
 | 
			
		||||
    | -wx* | -WX* )
 | 
			
		||||
      AR="$AR $1"
 | 
			
		||||
      shift
 | 
			
		||||
      ;;
 | 
			
		||||
    *)
 | 
			
		||||
      action=$1
 | 
			
		||||
      shift
 | 
			
		||||
      break
 | 
			
		||||
      ;;
 | 
			
		||||
  esac
 | 
			
		||||
done
 | 
			
		||||
orig_archive=$1
 | 
			
		||||
shift
 | 
			
		||||
func_file_conv "$orig_archive"
 | 
			
		||||
archive=$file
 | 
			
		||||
 | 
			
		||||
# strip leading dash in $action
 | 
			
		||||
action=${action#-}
 | 
			
		||||
 | 
			
		||||
delete=
 | 
			
		||||
extract=
 | 
			
		||||
list=
 | 
			
		||||
quick=
 | 
			
		||||
replace=
 | 
			
		||||
index=
 | 
			
		||||
create=
 | 
			
		||||
 | 
			
		||||
while test -n "$action"
 | 
			
		||||
do
 | 
			
		||||
  case $action in
 | 
			
		||||
    d*) delete=yes  ;;
 | 
			
		||||
    x*) extract=yes ;;
 | 
			
		||||
    t*) list=yes    ;;
 | 
			
		||||
    q*) quick=yes   ;;
 | 
			
		||||
    r*) replace=yes ;;
 | 
			
		||||
    s*) index=yes   ;;
 | 
			
		||||
    S*)             ;; # the index is always updated implicitly
 | 
			
		||||
    c*) create=yes  ;;
 | 
			
		||||
    u*)             ;; # TODO: don't ignore the update modifier
 | 
			
		||||
    v*)             ;; # TODO: don't ignore the verbose modifier
 | 
			
		||||
    *)
 | 
			
		||||
      func_error "unknown action specified"
 | 
			
		||||
      ;;
 | 
			
		||||
  esac
 | 
			
		||||
  action=${action#?}
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
case $delete$extract$list$quick$replace,$index in
 | 
			
		||||
  yes,* | ,yes)
 | 
			
		||||
    ;;
 | 
			
		||||
  yesyes*)
 | 
			
		||||
    func_error "more than one action specified"
 | 
			
		||||
    ;;
 | 
			
		||||
  *)
 | 
			
		||||
    func_error "no action specified"
 | 
			
		||||
    ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
if test -n "$delete"; then
 | 
			
		||||
  if test ! -f "$orig_archive"; then
 | 
			
		||||
    func_error "archive not found"
 | 
			
		||||
  fi
 | 
			
		||||
  for member
 | 
			
		||||
  do
 | 
			
		||||
    case $1 in
 | 
			
		||||
      @*)
 | 
			
		||||
        func_at_file "${1#@}" -REMOVE "$archive"
 | 
			
		||||
        ;;
 | 
			
		||||
      *)
 | 
			
		||||
        func_file_conv "$1"
 | 
			
		||||
        $AR -NOLOGO -REMOVE:"$file" "$archive" || exit $?
 | 
			
		||||
        ;;
 | 
			
		||||
    esac
 | 
			
		||||
  done
 | 
			
		||||
 | 
			
		||||
elif test -n "$extract"; then
 | 
			
		||||
  if test ! -f "$orig_archive"; then
 | 
			
		||||
    func_error "archive not found"
 | 
			
		||||
  fi
 | 
			
		||||
  if test $# -gt 0; then
 | 
			
		||||
    for member
 | 
			
		||||
    do
 | 
			
		||||
      case $1 in
 | 
			
		||||
        @*)
 | 
			
		||||
          func_at_file "${1#@}" -EXTRACT "$archive"
 | 
			
		||||
          ;;
 | 
			
		||||
        *)
 | 
			
		||||
          func_file_conv "$1"
 | 
			
		||||
          $AR -NOLOGO -EXTRACT:"$file" "$archive" || exit $?
 | 
			
		||||
          ;;
 | 
			
		||||
      esac
 | 
			
		||||
    done
 | 
			
		||||
  else
 | 
			
		||||
    $AR -NOLOGO -LIST "$archive" | sed -e 's/\\/\\\\/g' | while read member
 | 
			
		||||
    do
 | 
			
		||||
      $AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
 | 
			
		||||
    done
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
elif test -n "$quick$replace"; then
 | 
			
		||||
  if test ! -f "$orig_archive"; then
 | 
			
		||||
    if test -z "$create"; then
 | 
			
		||||
      echo "$me: creating $orig_archive"
 | 
			
		||||
    fi
 | 
			
		||||
    orig_archive=
 | 
			
		||||
  else
 | 
			
		||||
    orig_archive=$archive
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  for member
 | 
			
		||||
  do
 | 
			
		||||
    case $1 in
 | 
			
		||||
    @*)
 | 
			
		||||
      func_file_conv "${1#@}"
 | 
			
		||||
      set x "$@" "@$file"
 | 
			
		||||
      ;;
 | 
			
		||||
    *)
 | 
			
		||||
      func_file_conv "$1"
 | 
			
		||||
      set x "$@" "$file"
 | 
			
		||||
      ;;
 | 
			
		||||
    esac
 | 
			
		||||
    shift
 | 
			
		||||
    shift
 | 
			
		||||
  done
 | 
			
		||||
 | 
			
		||||
  if test -n "$orig_archive"; then
 | 
			
		||||
    $AR -NOLOGO -OUT:"$archive" "$orig_archive" "$@" || exit $?
 | 
			
		||||
  else
 | 
			
		||||
    $AR -NOLOGO -OUT:"$archive" "$@" || exit $?
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
elif test -n "$list"; then
 | 
			
		||||
  if test ! -f "$orig_archive"; then
 | 
			
		||||
    func_error "archive not found"
 | 
			
		||||
  fi
 | 
			
		||||
  $AR -NOLOGO -LIST "$archive" || exit $?
 | 
			
		||||
fi
 | 
			
		||||
							
								
								
									
										22
									
								
								tools/pcre/cmake/COPYING-CMAKE-SCRIPTS
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								tools/pcre/cmake/COPYING-CMAKE-SCRIPTS
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,22 @@
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
modification, are permitted provided that the following conditions
 | 
			
		||||
are met:
 | 
			
		||||
 | 
			
		||||
1. Redistributions of source code must retain the copyright
 | 
			
		||||
   notice, this list of conditions and the following disclaimer.
 | 
			
		||||
2. Redistributions in binary form must reproduce the copyright
 | 
			
		||||
   notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
   documentation and/or other materials provided with the distribution.
 | 
			
		||||
3. The name of the author may not be used to endorse or promote products 
 | 
			
		||||
   derived from this software without specific prior written permission.
 | 
			
		||||
 | 
			
		||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 | 
			
		||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | 
			
		||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | 
			
		||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 | 
			
		||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | 
			
		||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
			
		||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
			
		||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | 
			
		||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
							
								
								
									
										17
									
								
								tools/pcre/cmake/FindEditline.cmake
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								tools/pcre/cmake/FindEditline.cmake
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,17 @@
 | 
			
		||||
# Modified from FindReadline.cmake (PH Feb 2012)
 | 
			
		||||
 | 
			
		||||
if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
 | 
			
		||||
  set(EDITLINE_FOUND TRUE)
 | 
			
		||||
else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
 | 
			
		||||
  FIND_PATH(EDITLINE_INCLUDE_DIR readline.h
 | 
			
		||||
    /usr/include/editline
 | 
			
		||||
    /usr/include/edit/readline  
 | 
			
		||||
    /usr/include/readline
 | 
			
		||||
  )
 | 
			
		||||
  
 | 
			
		||||
  FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit)
 | 
			
		||||
  include(FindPackageHandleStandardArgs)
 | 
			
		||||
  FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY )
 | 
			
		||||
 | 
			
		||||
  MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
 | 
			
		||||
endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
 | 
			
		||||
							
								
								
									
										58
									
								
								tools/pcre/cmake/FindPackageHandleStandardArgs.cmake
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								tools/pcre/cmake/FindPackageHandleStandardArgs.cmake
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,58 @@
 | 
			
		||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... )
 | 
			
		||||
#    This macro is intended to be used in FindXXX.cmake modules files.
 | 
			
		||||
#    It handles the REQUIRED and QUIET argument to FIND_PACKAGE() and
 | 
			
		||||
#    it also sets the <UPPERCASED_NAME>_FOUND variable.
 | 
			
		||||
#    The package is found if all variables listed are TRUE.
 | 
			
		||||
#    Example:
 | 
			
		||||
#
 | 
			
		||||
#    FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR)
 | 
			
		||||
#
 | 
			
		||||
#    LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and 
 | 
			
		||||
#    LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE.
 | 
			
		||||
#    If it is not found and REQUIRED was used, it fails with FATAL_ERROR, 
 | 
			
		||||
#    independent whether QUIET was used or not.
 | 
			
		||||
#    If it is found, the location is reported using the VAR1 argument, so 
 | 
			
		||||
#    here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out.
 | 
			
		||||
#    If the second argument is DEFAULT_MSG, the message in the failure case will 
 | 
			
		||||
#    be "Could NOT find LibXml2", if you don't like this message you can specify
 | 
			
		||||
#    your own custom failure message there.
 | 
			
		||||
 | 
			
		||||
MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 )
 | 
			
		||||
 | 
			
		||||
  IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
 | 
			
		||||
    IF (${_NAME}_FIND_REQUIRED)
 | 
			
		||||
      SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}")
 | 
			
		||||
    ELSE (${_NAME}_FIND_REQUIRED)
 | 
			
		||||
      SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}")
 | 
			
		||||
    ENDIF (${_NAME}_FIND_REQUIRED)
 | 
			
		||||
  ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
 | 
			
		||||
    SET(_FAIL_MESSAGE "${_FAIL_MSG}")
 | 
			
		||||
  ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
 | 
			
		||||
 | 
			
		||||
  STRING(TOUPPER ${_NAME} _NAME_UPPER)
 | 
			
		||||
 | 
			
		||||
  SET(${_NAME_UPPER}_FOUND TRUE)
 | 
			
		||||
  IF(NOT ${_VAR1})
 | 
			
		||||
    SET(${_NAME_UPPER}_FOUND FALSE)
 | 
			
		||||
  ENDIF(NOT ${_VAR1})
 | 
			
		||||
 | 
			
		||||
  FOREACH(_CURRENT_VAR ${ARGN})
 | 
			
		||||
    IF(NOT ${_CURRENT_VAR})
 | 
			
		||||
      SET(${_NAME_UPPER}_FOUND FALSE)
 | 
			
		||||
    ENDIF(NOT ${_CURRENT_VAR})
 | 
			
		||||
  ENDFOREACH(_CURRENT_VAR)
 | 
			
		||||
 | 
			
		||||
  IF (${_NAME_UPPER}_FOUND)
 | 
			
		||||
    IF (NOT ${_NAME}_FIND_QUIETLY)
 | 
			
		||||
        MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}")
 | 
			
		||||
    ENDIF (NOT ${_NAME}_FIND_QUIETLY)
 | 
			
		||||
  ELSE (${_NAME_UPPER}_FOUND)
 | 
			
		||||
    IF (${_NAME}_FIND_REQUIRED)
 | 
			
		||||
        MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}")
 | 
			
		||||
    ELSE (${_NAME}_FIND_REQUIRED)
 | 
			
		||||
      IF (NOT ${_NAME}_FIND_QUIETLY)
 | 
			
		||||
        MESSAGE(STATUS "${_FAIL_MESSAGE}")
 | 
			
		||||
      ENDIF (NOT ${_NAME}_FIND_QUIETLY)
 | 
			
		||||
    ENDIF (${_NAME}_FIND_REQUIRED)
 | 
			
		||||
  ENDIF (${_NAME_UPPER}_FOUND)
 | 
			
		||||
ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS)
 | 
			
		||||
							
								
								
									
										29
									
								
								tools/pcre/cmake/FindReadline.cmake
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								tools/pcre/cmake/FindReadline.cmake
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,29 @@
 | 
			
		||||
# from http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/FindReadline.cmake
 | 
			
		||||
# http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/COPYING-CMAKE-SCRIPTS
 | 
			
		||||
# --> BSD licensed
 | 
			
		||||
#
 | 
			
		||||
# GNU Readline library finder
 | 
			
		||||
if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
 | 
			
		||||
  set(READLINE_FOUND TRUE)
 | 
			
		||||
else(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
 | 
			
		||||
  FIND_PATH(READLINE_INCLUDE_DIR readline/readline.h
 | 
			
		||||
    /usr/include/readline
 | 
			
		||||
  )
 | 
			
		||||
  
 | 
			
		||||
# 2008-04-22 The next clause used to read like this:
 | 
			
		||||
#
 | 
			
		||||
#  FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
 | 
			
		||||
#        FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses )
 | 
			
		||||
#        include(FindPackageHandleStandardArgs)
 | 
			
		||||
#        FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY )
 | 
			
		||||
#
 | 
			
		||||
# I was advised to modify it such that it will find an ncurses library if
 | 
			
		||||
# required, but not if one was explicitly given, that is, it allows the
 | 
			
		||||
# default to be overridden. PH 
 | 
			
		||||
 | 
			
		||||
  FIND_LIBRARY(READLINE_LIBRARY NAMES readline)
 | 
			
		||||
        include(FindPackageHandleStandardArgs)
 | 
			
		||||
        FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY )
 | 
			
		||||
 | 
			
		||||
  MARK_AS_ADVANCED(READLINE_INCLUDE_DIR READLINE_LIBRARY)
 | 
			
		||||
endif(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY)
 | 
			
		||||
							
								
								
									
										347
									
								
								tools/pcre/compile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										347
									
								
								tools/pcre/compile
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,347 @@
 | 
			
		||||
#! /bin/sh
 | 
			
		||||
# Wrapper for compilers which do not understand '-c -o'.
 | 
			
		||||
 | 
			
		||||
scriptversion=2012-10-14.11; # UTC
 | 
			
		||||
 | 
			
		||||
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
 | 
			
		||||
# Written by Tom Tromey <tromey@cygnus.com>.
 | 
			
		||||
#
 | 
			
		||||
# This program is free software; you can redistribute it and/or modify
 | 
			
		||||
# it under the terms of the GNU General Public License as published by
 | 
			
		||||
# the Free Software Foundation; either version 2, or (at your option)
 | 
			
		||||
# any later version.
 | 
			
		||||
#
 | 
			
		||||
# This program is distributed in the hope that it will be useful,
 | 
			
		||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
# GNU General Public License for more details.
 | 
			
		||||
#
 | 
			
		||||
# You should have received a copy of the GNU General Public License
 | 
			
		||||
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
 | 
			
		||||
# As a special exception to the GNU General Public License, if you
 | 
			
		||||
# distribute this file as part of a program that contains a
 | 
			
		||||
# configuration script generated by Autoconf, you may include it under
 | 
			
		||||
# the same distribution terms that you use for the rest of that program.
 | 
			
		||||
 | 
			
		||||
# This file is maintained in Automake, please report
 | 
			
		||||
# bugs to <bug-automake@gnu.org> or send patches to
 | 
			
		||||
# <automake-patches@gnu.org>.
 | 
			
		||||
 | 
			
		||||
nl='
 | 
			
		||||
'
 | 
			
		||||
 | 
			
		||||
# We need space, tab and new line, in precisely that order.  Quoting is
 | 
			
		||||
# there to prevent tools from complaining about whitespace usage.
 | 
			
		||||
IFS=" ""	$nl"
 | 
			
		||||
 | 
			
		||||
file_conv=
 | 
			
		||||
 | 
			
		||||
# func_file_conv build_file lazy
 | 
			
		||||
# Convert a $build file to $host form and store it in $file
 | 
			
		||||
# Currently only supports Windows hosts. If the determined conversion
 | 
			
		||||
# type is listed in (the comma separated) LAZY, no conversion will
 | 
			
		||||
# take place.
 | 
			
		||||
func_file_conv ()
 | 
			
		||||
{
 | 
			
		||||
  file=$1
 | 
			
		||||
  case $file in
 | 
			
		||||
    / | /[!/]*) # absolute file, and not a UNC file
 | 
			
		||||
      if test -z "$file_conv"; then
 | 
			
		||||
	# lazily determine how to convert abs files
 | 
			
		||||
	case `uname -s` in
 | 
			
		||||
	  MINGW*)
 | 
			
		||||
	    file_conv=mingw
 | 
			
		||||
	    ;;
 | 
			
		||||
	  CYGWIN*)
 | 
			
		||||
	    file_conv=cygwin
 | 
			
		||||
	    ;;
 | 
			
		||||
	  *)
 | 
			
		||||
	    file_conv=wine
 | 
			
		||||
	    ;;
 | 
			
		||||
	esac
 | 
			
		||||
      fi
 | 
			
		||||
      case $file_conv/,$2, in
 | 
			
		||||
	*,$file_conv,*)
 | 
			
		||||
	  ;;
 | 
			
		||||
	mingw/*)
 | 
			
		||||
	  file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
 | 
			
		||||
	  ;;
 | 
			
		||||
	cygwin/*)
 | 
			
		||||
	  file=`cygpath -m "$file" || echo "$file"`
 | 
			
		||||
	  ;;
 | 
			
		||||
	wine/*)
 | 
			
		||||
	  file=`winepath -w "$file" || echo "$file"`
 | 
			
		||||
	  ;;
 | 
			
		||||
      esac
 | 
			
		||||
      ;;
 | 
			
		||||
  esac
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# func_cl_dashL linkdir
 | 
			
		||||
# Make cl look for libraries in LINKDIR
 | 
			
		||||
func_cl_dashL ()
 | 
			
		||||
{
 | 
			
		||||
  func_file_conv "$1"
 | 
			
		||||
  if test -z "$lib_path"; then
 | 
			
		||||
    lib_path=$file
 | 
			
		||||
  else
 | 
			
		||||
    lib_path="$lib_path;$file"
 | 
			
		||||
  fi
 | 
			
		||||
  linker_opts="$linker_opts -LIBPATH:$file"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# func_cl_dashl library
 | 
			
		||||
# Do a library search-path lookup for cl
 | 
			
		||||
func_cl_dashl ()
 | 
			
		||||
{
 | 
			
		||||
  lib=$1
 | 
			
		||||
  found=no
 | 
			
		||||
  save_IFS=$IFS
 | 
			
		||||
  IFS=';'
 | 
			
		||||
  for dir in $lib_path $LIB
 | 
			
		||||
  do
 | 
			
		||||
    IFS=$save_IFS
 | 
			
		||||
    if $shared && test -f "$dir/$lib.dll.lib"; then
 | 
			
		||||
      found=yes
 | 
			
		||||
      lib=$dir/$lib.dll.lib
 | 
			
		||||
      break
 | 
			
		||||
    fi
 | 
			
		||||
    if test -f "$dir/$lib.lib"; then
 | 
			
		||||
      found=yes
 | 
			
		||||
      lib=$dir/$lib.lib
 | 
			
		||||
      break
 | 
			
		||||
    fi
 | 
			
		||||
    if test -f "$dir/lib$lib.a"; then
 | 
			
		||||
      found=yes
 | 
			
		||||
      lib=$dir/lib$lib.a
 | 
			
		||||
      break
 | 
			
		||||
    fi
 | 
			
		||||
  done
 | 
			
		||||
  IFS=$save_IFS
 | 
			
		||||
 | 
			
		||||
  if test "$found" != yes; then
 | 
			
		||||
    lib=$lib.lib
 | 
			
		||||
  fi
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# func_cl_wrapper cl arg...
 | 
			
		||||
# Adjust compile command to suit cl
 | 
			
		||||
func_cl_wrapper ()
 | 
			
		||||
{
 | 
			
		||||
  # Assume a capable shell
 | 
			
		||||
  lib_path=
 | 
			
		||||
  shared=:
 | 
			
		||||
  linker_opts=
 | 
			
		||||
  for arg
 | 
			
		||||
  do
 | 
			
		||||
    if test -n "$eat"; then
 | 
			
		||||
      eat=
 | 
			
		||||
    else
 | 
			
		||||
      case $1 in
 | 
			
		||||
	-o)
 | 
			
		||||
	  # configure might choose to run compile as 'compile cc -o foo foo.c'.
 | 
			
		||||
	  eat=1
 | 
			
		||||
	  case $2 in
 | 
			
		||||
	    *.o | *.[oO][bB][jJ])
 | 
			
		||||
	      func_file_conv "$2"
 | 
			
		||||
	      set x "$@" -Fo"$file"
 | 
			
		||||
	      shift
 | 
			
		||||
	      ;;
 | 
			
		||||
	    *)
 | 
			
		||||
	      func_file_conv "$2"
 | 
			
		||||
	      set x "$@" -Fe"$file"
 | 
			
		||||
	      shift
 | 
			
		||||
	      ;;
 | 
			
		||||
	  esac
 | 
			
		||||
	  ;;
 | 
			
		||||
	-I)
 | 
			
		||||
	  eat=1
 | 
			
		||||
	  func_file_conv "$2" mingw
 | 
			
		||||
	  set x "$@" -I"$file"
 | 
			
		||||
	  shift
 | 
			
		||||
	  ;;
 | 
			
		||||
	-I*)
 | 
			
		||||
	  func_file_conv "${1#-I}" mingw
 | 
			
		||||
	  set x "$@" -I"$file"
 | 
			
		||||
	  shift
 | 
			
		||||
	  ;;
 | 
			
		||||
	-l)
 | 
			
		||||
	  eat=1
 | 
			
		||||
	  func_cl_dashl "$2"
 | 
			
		||||
	  set x "$@" "$lib"
 | 
			
		||||
	  shift
 | 
			
		||||
	  ;;
 | 
			
		||||
	-l*)
 | 
			
		||||
	  func_cl_dashl "${1#-l}"
 | 
			
		||||
	  set x "$@" "$lib"
 | 
			
		||||
	  shift
 | 
			
		||||
	  ;;
 | 
			
		||||
	-L)
 | 
			
		||||
	  eat=1
 | 
			
		||||
	  func_cl_dashL "$2"
 | 
			
		||||
	  ;;
 | 
			
		||||
	-L*)
 | 
			
		||||
	  func_cl_dashL "${1#-L}"
 | 
			
		||||
	  ;;
 | 
			
		||||
	-static)
 | 
			
		||||
	  shared=false
 | 
			
		||||
	  ;;
 | 
			
		||||
	-Wl,*)
 | 
			
		||||
	  arg=${1#-Wl,}
 | 
			
		||||
	  save_ifs="$IFS"; IFS=','
 | 
			
		||||
	  for flag in $arg; do
 | 
			
		||||
	    IFS="$save_ifs"
 | 
			
		||||
	    linker_opts="$linker_opts $flag"
 | 
			
		||||
	  done
 | 
			
		||||
	  IFS="$save_ifs"
 | 
			
		||||
	  ;;
 | 
			
		||||
	-Xlinker)
 | 
			
		||||
	  eat=1
 | 
			
		||||
	  linker_opts="$linker_opts $2"
 | 
			
		||||
	  ;;
 | 
			
		||||
	-*)
 | 
			
		||||
	  set x "$@" "$1"
 | 
			
		||||
	  shift
 | 
			
		||||
	  ;;
 | 
			
		||||
	*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
 | 
			
		||||
	  func_file_conv "$1"
 | 
			
		||||
	  set x "$@" -Tp"$file"
 | 
			
		||||
	  shift
 | 
			
		||||
	  ;;
 | 
			
		||||
	*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
 | 
			
		||||
	  func_file_conv "$1" mingw
 | 
			
		||||
	  set x "$@" "$file"
 | 
			
		||||
	  shift
 | 
			
		||||
	  ;;
 | 
			
		||||
	*)
 | 
			
		||||
	  set x "$@" "$1"
 | 
			
		||||
	  shift
 | 
			
		||||
	  ;;
 | 
			
		||||
      esac
 | 
			
		||||
    fi
 | 
			
		||||
    shift
 | 
			
		||||
  done
 | 
			
		||||
  if test -n "$linker_opts"; then
 | 
			
		||||
    linker_opts="-link$linker_opts"
 | 
			
		||||
  fi
 | 
			
		||||
  exec "$@" $linker_opts
 | 
			
		||||
  exit 1
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
eat=
 | 
			
		||||
 | 
			
		||||
case $1 in
 | 
			
		||||
  '')
 | 
			
		||||
     echo "$0: No command.  Try '$0 --help' for more information." 1>&2
 | 
			
		||||
     exit 1;
 | 
			
		||||
     ;;
 | 
			
		||||
  -h | --h*)
 | 
			
		||||
    cat <<\EOF
 | 
			
		||||
Usage: compile [--help] [--version] PROGRAM [ARGS]
 | 
			
		||||
 | 
			
		||||
Wrapper for compilers which do not understand '-c -o'.
 | 
			
		||||
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
 | 
			
		||||
arguments, and rename the output as expected.
 | 
			
		||||
 | 
			
		||||
If you are trying to build a whole package this is not the
 | 
			
		||||
right script to run: please start by reading the file 'INSTALL'.
 | 
			
		||||
 | 
			
		||||
Report bugs to <bug-automake@gnu.org>.
 | 
			
		||||
EOF
 | 
			
		||||
    exit $?
 | 
			
		||||
    ;;
 | 
			
		||||
  -v | --v*)
 | 
			
		||||
    echo "compile $scriptversion"
 | 
			
		||||
    exit $?
 | 
			
		||||
    ;;
 | 
			
		||||
  cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
 | 
			
		||||
    func_cl_wrapper "$@"      # Doesn't return...
 | 
			
		||||
    ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
ofile=
 | 
			
		||||
cfile=
 | 
			
		||||
 | 
			
		||||
for arg
 | 
			
		||||
do
 | 
			
		||||
  if test -n "$eat"; then
 | 
			
		||||
    eat=
 | 
			
		||||
  else
 | 
			
		||||
    case $1 in
 | 
			
		||||
      -o)
 | 
			
		||||
	# configure might choose to run compile as 'compile cc -o foo foo.c'.
 | 
			
		||||
	# So we strip '-o arg' only if arg is an object.
 | 
			
		||||
	eat=1
 | 
			
		||||
	case $2 in
 | 
			
		||||
	  *.o | *.obj)
 | 
			
		||||
	    ofile=$2
 | 
			
		||||
	    ;;
 | 
			
		||||
	  *)
 | 
			
		||||
	    set x "$@" -o "$2"
 | 
			
		||||
	    shift
 | 
			
		||||
	    ;;
 | 
			
		||||
	esac
 | 
			
		||||
	;;
 | 
			
		||||
      *.c)
 | 
			
		||||
	cfile=$1
 | 
			
		||||
	set x "$@" "$1"
 | 
			
		||||
	shift
 | 
			
		||||
	;;
 | 
			
		||||
      *)
 | 
			
		||||
	set x "$@" "$1"
 | 
			
		||||
	shift
 | 
			
		||||
	;;
 | 
			
		||||
    esac
 | 
			
		||||
  fi
 | 
			
		||||
  shift
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
if test -z "$ofile" || test -z "$cfile"; then
 | 
			
		||||
  # If no '-o' option was seen then we might have been invoked from a
 | 
			
		||||
  # pattern rule where we don't need one.  That is ok -- this is a
 | 
			
		||||
  # normal compilation that the losing compiler can handle.  If no
 | 
			
		||||
  # '.c' file was seen then we are probably linking.  That is also
 | 
			
		||||
  # ok.
 | 
			
		||||
  exec "$@"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
# Name of file we expect compiler to create.
 | 
			
		||||
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
 | 
			
		||||
 | 
			
		||||
# Create the lock directory.
 | 
			
		||||
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
 | 
			
		||||
# that we are using for the .o file.  Also, base the name on the expected
 | 
			
		||||
# object file name, since that is what matters with a parallel build.
 | 
			
		||||
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
 | 
			
		||||
while true; do
 | 
			
		||||
  if mkdir "$lockdir" >/dev/null 2>&1; then
 | 
			
		||||
    break
 | 
			
		||||
  fi
 | 
			
		||||
  sleep 1
 | 
			
		||||
done
 | 
			
		||||
# FIXME: race condition here if user kills between mkdir and trap.
 | 
			
		||||
trap "rmdir '$lockdir'; exit 1" 1 2 15
 | 
			
		||||
 | 
			
		||||
# Run the compile.
 | 
			
		||||
"$@"
 | 
			
		||||
ret=$?
 | 
			
		||||
 | 
			
		||||
if test -f "$cofile"; then
 | 
			
		||||
  test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
 | 
			
		||||
elif test -f "${cofile}bj"; then
 | 
			
		||||
  test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
rmdir "$lockdir"
 | 
			
		||||
exit $ret
 | 
			
		||||
 | 
			
		||||
# Local Variables:
 | 
			
		||||
# mode: shell-script
 | 
			
		||||
# sh-indentation: 2
 | 
			
		||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
 | 
			
		||||
# time-stamp-start: "scriptversion="
 | 
			
		||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
 | 
			
		||||
# time-stamp-time-zone: "UTC"
 | 
			
		||||
# time-stamp-end: "; # UTC"
 | 
			
		||||
# End:
 | 
			
		||||
							
								
								
									
										57
									
								
								tools/pcre/config-cmake.h.in
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								tools/pcre/config-cmake.h.in
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,57 @@
 | 
			
		||||
/* config.h for CMake builds */
 | 
			
		||||
 | 
			
		||||
#cmakedefine HAVE_DIRENT_H 1
 | 
			
		||||
#cmakedefine HAVE_SYS_STAT_H 1
 | 
			
		||||
#cmakedefine HAVE_SYS_TYPES_H 1
 | 
			
		||||
#cmakedefine HAVE_UNISTD_H 1
 | 
			
		||||
#cmakedefine HAVE_WINDOWS_H 1
 | 
			
		||||
#cmakedefine HAVE_STDINT_H 1                                                   
 | 
			
		||||
#cmakedefine HAVE_INTTYPES_H 1    
 | 
			
		||||
 | 
			
		||||
#cmakedefine HAVE_TYPE_TRAITS_H 1
 | 
			
		||||
#cmakedefine HAVE_BITS_TYPE_TRAITS_H 1
 | 
			
		||||
 | 
			
		||||
#cmakedefine HAVE_BCOPY 1
 | 
			
		||||
#cmakedefine HAVE_MEMMOVE 1
 | 
			
		||||
#cmakedefine HAVE_STRERROR 1
 | 
			
		||||
#cmakedefine HAVE_STRTOLL 1
 | 
			
		||||
#cmakedefine HAVE_STRTOQ 1
 | 
			
		||||
#cmakedefine HAVE__STRTOI64 1
 | 
			
		||||
 | 
			
		||||
#cmakedefine PCRE_STATIC 1
 | 
			
		||||
 | 
			
		||||
#cmakedefine SUPPORT_PCRE8 1
 | 
			
		||||
#cmakedefine SUPPORT_PCRE16 1
 | 
			
		||||
#cmakedefine SUPPORT_PCRE32 1
 | 
			
		||||
#cmakedefine SUPPORT_JIT 1
 | 
			
		||||
#cmakedefine SUPPORT_PCREGREP_JIT 1
 | 
			
		||||
#cmakedefine SUPPORT_UTF 1
 | 
			
		||||
#cmakedefine SUPPORT_UCP 1
 | 
			
		||||
#cmakedefine EBCDIC 1
 | 
			
		||||
#cmakedefine EBCDIC_NL25 1
 | 
			
		||||
#cmakedefine BSR_ANYCRLF 1
 | 
			
		||||
#cmakedefine NO_RECURSE 1
 | 
			
		||||
 | 
			
		||||
#cmakedefine HAVE_LONG_LONG 1
 | 
			
		||||
#cmakedefine HAVE_UNSIGNED_LONG_LONG 1
 | 
			
		||||
 | 
			
		||||
#cmakedefine SUPPORT_LIBBZ2 1
 | 
			
		||||
#cmakedefine SUPPORT_LIBZ 1
 | 
			
		||||
#cmakedefine SUPPORT_LIBEDIT 1
 | 
			
		||||
#cmakedefine SUPPORT_LIBREADLINE 1
 | 
			
		||||
 | 
			
		||||
#cmakedefine SUPPORT_VALGRIND 1
 | 
			
		||||
#cmakedefine SUPPORT_GCOV 1
 | 
			
		||||
 | 
			
		||||
#define NEWLINE			@NEWLINE@
 | 
			
		||||
#define POSIX_MALLOC_THRESHOLD	@PCRE_POSIX_MALLOC_THRESHOLD@
 | 
			
		||||
#define LINK_SIZE		@PCRE_LINK_SIZE@
 | 
			
		||||
#define PARENS_NEST_LIMIT       @PCRE_PARENS_NEST_LIMIT@
 | 
			
		||||
#define MATCH_LIMIT		@PCRE_MATCH_LIMIT@
 | 
			
		||||
#define MATCH_LIMIT_RECURSION	@PCRE_MATCH_LIMIT_RECURSION@
 | 
			
		||||
#define PCREGREP_BUFSIZE        @PCREGREP_BUFSIZE@
 | 
			
		||||
 | 
			
		||||
#define MAX_NAME_SIZE	32
 | 
			
		||||
#define MAX_NAME_COUNT	10000
 | 
			
		||||
 | 
			
		||||
/* end config.h for CMake builds */
 | 
			
		||||
							
								
								
									
										1568
									
								
								tools/pcre/config.guess
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1568
									
								
								tools/pcre/config.guess
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										350
									
								
								tools/pcre/config.h.generic
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										350
									
								
								tools/pcre/config.h.generic
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,350 @@
 | 
			
		||||
/* config.h.  Generated from config.h.in by configure.  */
 | 
			
		||||
/* config.h.in.  Generated from configure.ac by autoheader.  */
 | 
			
		||||
 | 
			
		||||
/* PCRE is written in Standard C, but there are a few non-standard things it
 | 
			
		||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
 | 
			
		||||
systems.
 | 
			
		||||
 | 
			
		||||
In environments that support the GNU autotools, config.h.in is converted into
 | 
			
		||||
config.h by the "configure" script. In environments that use CMake,
 | 
			
		||||
config-cmake.in is converted into config.h. If you are going to build PCRE "by
 | 
			
		||||
hand" without using "configure" or CMake, you should copy the distributed
 | 
			
		||||
config.h.generic to config.h, and edit the macro definitions to be the way you
 | 
			
		||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
 | 
			
		||||
so that config.h is included at the start of every source.
 | 
			
		||||
 | 
			
		||||
Alternatively, you can avoid editing by using -D on the compiler command line
 | 
			
		||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
 | 
			
		||||
but if you do, default values will be taken from config.h for non-boolean
 | 
			
		||||
macros that are not defined on the command line.
 | 
			
		||||
 | 
			
		||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined
 | 
			
		||||
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
 | 
			
		||||
macros are listed as a commented #undef in config.h.generic. Macros such as
 | 
			
		||||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
 | 
			
		||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
 | 
			
		||||
 | 
			
		||||
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
 | 
			
		||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
 | 
			
		||||
sure both macros are undefined; an emulation function will then be used. */
 | 
			
		||||
 | 
			
		||||
/* By default, the \R escape sequence matches any Unicode line ending
 | 
			
		||||
   character or sequence of characters. If BSR_ANYCRLF is defined (to any
 | 
			
		||||
   value), this is changed so that backslash-R matches only CR, LF, or CRLF.
 | 
			
		||||
   The build-time default can be overridden by the user of PCRE at runtime. */
 | 
			
		||||
/* #undef BSR_ANYCRLF */
 | 
			
		||||
 | 
			
		||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
 | 
			
		||||
   character codes, define this macro to any value. You must also edit the
 | 
			
		||||
   NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15).
 | 
			
		||||
   On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
 | 
			
		||||
   automatically adjusted. When EBCDIC is set, PCRE assumes that all input
 | 
			
		||||
   strings are in EBCDIC. If you do not define this macro, PCRE will assume
 | 
			
		||||
   input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
 | 
			
		||||
   a version of PCRE that supports both EBCDIC and UTF-8/16/32. */
 | 
			
		||||
/* #undef EBCDIC */
 | 
			
		||||
 | 
			
		||||
/* In an EBCDIC environment, define this macro to any value to arrange for the
 | 
			
		||||
   NL character to be 0x25 instead of the default 0x15. NL plays the role that
 | 
			
		||||
   LF does in an ASCII/Unicode environment. The value must also be set in the
 | 
			
		||||
   NEWLINE macro below. On systems that can use "configure" or CMake to set
 | 
			
		||||
   EBCDIC_NL25, the adjustment of NEWLINE is automatic. */
 | 
			
		||||
/* #undef EBCDIC_NL25 */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the `bcopy' function. */
 | 
			
		||||
/* #undef HAVE_BCOPY */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <bits/type_traits.h> header file. */
 | 
			
		||||
/* #undef HAVE_BITS_TYPE_TRAITS_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <bzlib.h> header file. */
 | 
			
		||||
/* #undef HAVE_BZLIB_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <dirent.h> header file. */
 | 
			
		||||
/* #undef HAVE_DIRENT_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <dlfcn.h> header file. */
 | 
			
		||||
/* #undef HAVE_DLFCN_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <editline/readline.h> header file. */
 | 
			
		||||
/* #undef HAVE_EDITLINE_READLINE_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
 | 
			
		||||
/* #undef HAVE_EDIT_READLINE_READLINE_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <inttypes.h> header file. */
 | 
			
		||||
/* #undef HAVE_INTTYPES_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <limits.h> header file. */
 | 
			
		||||
/* #undef HAVE_LIMITS_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if the system has the type `long long'. */
 | 
			
		||||
/* #undef HAVE_LONG_LONG */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the `memmove' function. */
 | 
			
		||||
/* #undef HAVE_MEMMOVE */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <memory.h> header file. */
 | 
			
		||||
/* #undef HAVE_MEMORY_H */
 | 
			
		||||
 | 
			
		||||
/* Define if you have POSIX threads libraries and header files. */
 | 
			
		||||
/* #undef HAVE_PTHREAD */
 | 
			
		||||
 | 
			
		||||
/* Have PTHREAD_PRIO_INHERIT. */
 | 
			
		||||
/* #undef HAVE_PTHREAD_PRIO_INHERIT */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <readline/history.h> header file. */
 | 
			
		||||
/* #undef HAVE_READLINE_HISTORY_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <readline/readline.h> header file. */
 | 
			
		||||
/* #undef HAVE_READLINE_READLINE_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <stdint.h> header file. */
 | 
			
		||||
/* #undef HAVE_STDINT_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <stdlib.h> header file. */
 | 
			
		||||
/* #undef HAVE_STDLIB_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the `strerror' function. */
 | 
			
		||||
/* #undef HAVE_STRERROR */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <string> header file. */
 | 
			
		||||
/* #undef HAVE_STRING */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <strings.h> header file. */
 | 
			
		||||
/* #undef HAVE_STRINGS_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <string.h> header file. */
 | 
			
		||||
/* #undef HAVE_STRING_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have `strtoimax'. */
 | 
			
		||||
/* #undef HAVE_STRTOIMAX */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have `strtoll'. */
 | 
			
		||||
/* #undef HAVE_STRTOLL */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have `strtoq'. */
 | 
			
		||||
/* #undef HAVE_STRTOQ */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <sys/stat.h> header file. */
 | 
			
		||||
/* #undef HAVE_SYS_STAT_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <sys/types.h> header file. */
 | 
			
		||||
/* #undef HAVE_SYS_TYPES_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <type_traits.h> header file. */
 | 
			
		||||
/* #undef HAVE_TYPE_TRAITS_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <unistd.h> header file. */
 | 
			
		||||
/* #undef HAVE_UNISTD_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if the system has the type `unsigned long long'. */
 | 
			
		||||
/* #undef HAVE_UNSIGNED_LONG_LONG */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if the compiler supports simple visibility declarations. */
 | 
			
		||||
/* #undef HAVE_VISIBILITY */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <windows.h> header file. */
 | 
			
		||||
/* #undef HAVE_WINDOWS_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <zlib.h> header file. */
 | 
			
		||||
/* #undef HAVE_ZLIB_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have `_strtoi64'. */
 | 
			
		||||
/* #undef HAVE__STRTOI64 */
 | 
			
		||||
 | 
			
		||||
/* The value of LINK_SIZE determines the number of bytes used to store links
 | 
			
		||||
   as offsets within the compiled regex. The default is 2, which allows for
 | 
			
		||||
   compiled patterns up to 64K long. This covers the vast majority of cases.
 | 
			
		||||
   However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
 | 
			
		||||
   for longer patterns in extreme cases. */
 | 
			
		||||
#ifndef LINK_SIZE
 | 
			
		||||
#define LINK_SIZE 2
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
 | 
			
		||||
   */
 | 
			
		||||
/* This is ignored unless you are using libtool. */
 | 
			
		||||
#ifndef LT_OBJDIR
 | 
			
		||||
#define LT_OBJDIR ".libs/"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* The value of MATCH_LIMIT determines the default number of times the
 | 
			
		||||
   internal match() function can be called during a single execution of
 | 
			
		||||
   pcre_exec(). There is a runtime interface for setting a different limit.
 | 
			
		||||
   The limit exists in order to catch runaway regular expressions that take
 | 
			
		||||
   for ever to determine that they do not match. The default is set very large
 | 
			
		||||
   so that it does not accidentally catch legitimate cases. */
 | 
			
		||||
#ifndef MATCH_LIMIT
 | 
			
		||||
#define MATCH_LIMIT 10000000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* The above limit applies to all calls of match(), whether or not they
 | 
			
		||||
   increase the recursion depth. In some environments it is desirable to limit
 | 
			
		||||
   the depth of recursive calls of match() more strictly, in order to restrict
 | 
			
		||||
   the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
 | 
			
		||||
   used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
 | 
			
		||||
   match(). To have any useful effect, it must be less than the value of
 | 
			
		||||
   MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
 | 
			
		||||
   a runtime method for setting a different limit. */
 | 
			
		||||
#ifndef MATCH_LIMIT_RECURSION
 | 
			
		||||
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* This limit is parameterized just in case anybody ever wants to change it.
 | 
			
		||||
   Care must be taken if it is increased, because it guards against integer
 | 
			
		||||
   overflow caused by enormously large patterns. */
 | 
			
		||||
#ifndef MAX_NAME_COUNT
 | 
			
		||||
#define MAX_NAME_COUNT 10000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* This limit is parameterized just in case anybody ever wants to change it.
 | 
			
		||||
   Care must be taken if it is increased, because it guards against integer
 | 
			
		||||
   overflow caused by enormously large patterns. */
 | 
			
		||||
#ifndef MAX_NAME_SIZE
 | 
			
		||||
#define MAX_NAME_SIZE 32
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* The value of NEWLINE determines the default newline character sequence.
 | 
			
		||||
   PCRE client programs can override this by selecting other values at run
 | 
			
		||||
   time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338
 | 
			
		||||
   (CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or
 | 
			
		||||
   3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and
 | 
			
		||||
   0x25) that are used as the NL line terminator that is equivalent to ASCII
 | 
			
		||||
   LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY),
 | 
			
		||||
   or -2 (ANYCRLF). */
 | 
			
		||||
#ifndef NEWLINE
 | 
			
		||||
#define NEWLINE 10
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* PCRE uses recursive function calls to handle backtracking while matching.
 | 
			
		||||
   This can sometimes be a problem on systems that have stacks of limited
 | 
			
		||||
   size. Define NO_RECURSE to any value to get a version that doesn't use
 | 
			
		||||
   recursion in the match() function; instead it creates its own stack by
 | 
			
		||||
   steam using pcre_recurse_malloc() to obtain memory from the heap. For more
 | 
			
		||||
   detail, see the comments and other stuff just above the match() function.
 | 
			
		||||
   */
 | 
			
		||||
/* #undef NO_RECURSE */
 | 
			
		||||
 | 
			
		||||
/* Name of package */
 | 
			
		||||
#define PACKAGE "pcre"
 | 
			
		||||
 | 
			
		||||
/* Define to the address where bug reports for this package should be sent. */
 | 
			
		||||
#define PACKAGE_BUGREPORT ""
 | 
			
		||||
 | 
			
		||||
/* Define to the full name of this package. */
 | 
			
		||||
#define PACKAGE_NAME "PCRE"
 | 
			
		||||
 | 
			
		||||
/* Define to the full name and version of this package. */
 | 
			
		||||
#define PACKAGE_STRING "PCRE 8.35"
 | 
			
		||||
 | 
			
		||||
/* Define to the one symbol short name of this package. */
 | 
			
		||||
#define PACKAGE_TARNAME "pcre"
 | 
			
		||||
 | 
			
		||||
/* Define to the home page for this package. */
 | 
			
		||||
#define PACKAGE_URL ""
 | 
			
		||||
 | 
			
		||||
/* Define to the version of this package. */
 | 
			
		||||
#define PACKAGE_VERSION "8.35"
 | 
			
		||||
 | 
			
		||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
 | 
			
		||||
   parentheses (of any kind) in a pattern. This limits the amount of system
 | 
			
		||||
   stack that is used while compiling a pattern. */
 | 
			
		||||
#ifndef PARENS_NEST_LIMIT
 | 
			
		||||
#define PARENS_NEST_LIMIT 250
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
 | 
			
		||||
   pcregrep to hold parts of the file it is searching. This is also the
 | 
			
		||||
   minimum value. The actual amount of memory used by pcregrep is three times
 | 
			
		||||
   this number, because it allows for the buffering of "before" and "after"
 | 
			
		||||
   lines. */
 | 
			
		||||
#ifndef PCREGREP_BUFSIZE
 | 
			
		||||
#define PCREGREP_BUFSIZE 20480
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* If you are compiling for a system other than a Unix-like system or
 | 
			
		||||
   Win32, and it needs some magic to be inserted before the definition
 | 
			
		||||
   of a function that is exported by the library, define this macro to
 | 
			
		||||
   contain the relevant magic. If you do not define this macro, a suitable
 | 
			
		||||
    __declspec value is used for Windows systems; in other environments
 | 
			
		||||
   "extern" is used for a C compiler and "extern C" for a C++ compiler.
 | 
			
		||||
   This macro apears at the start of every exported function that is part
 | 
			
		||||
   of the external API. It does not appear on functions that are "external"
 | 
			
		||||
   in the C sense, but which are internal to the library. */
 | 
			
		||||
/* #undef PCRE_EXP_DEFN */
 | 
			
		||||
 | 
			
		||||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
 | 
			
		||||
/* #undef PCRE_STATIC */
 | 
			
		||||
 | 
			
		||||
/* When calling PCRE via the POSIX interface, additional working storage is
 | 
			
		||||
   required for holding the pointers to capturing substrings because PCRE
 | 
			
		||||
   requires three integers per substring, whereas the POSIX interface provides
 | 
			
		||||
   only two. If the number of expected substrings is small, the wrapper
 | 
			
		||||
   function uses space on the stack, because this is faster than using
 | 
			
		||||
   malloc() for each call. The threshold above which the stack is no longer
 | 
			
		||||
   used is defined by POSIX_MALLOC_THRESHOLD. */
 | 
			
		||||
#ifndef POSIX_MALLOC_THRESHOLD
 | 
			
		||||
#define POSIX_MALLOC_THRESHOLD 10
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* Define to necessary symbol if this constant uses a non-standard name on
 | 
			
		||||
   your system. */
 | 
			
		||||
/* #undef PTHREAD_CREATE_JOINABLE */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the ANSI C header files. */
 | 
			
		||||
/* #undef STDC_HEADERS */
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable support for Just-In-Time compiling. */
 | 
			
		||||
/* #undef SUPPORT_JIT */
 | 
			
		||||
 | 
			
		||||
/* Define to any value to allow pcregrep to be linked with libbz2, so that it
 | 
			
		||||
   is able to handle .bz2 files. */
 | 
			
		||||
/* #undef SUPPORT_LIBBZ2 */
 | 
			
		||||
 | 
			
		||||
/* Define to any value to allow pcretest to be linked with libedit. */
 | 
			
		||||
/* #undef SUPPORT_LIBEDIT */
 | 
			
		||||
 | 
			
		||||
/* Define to any value to allow pcretest to be linked with libreadline. */
 | 
			
		||||
/* #undef SUPPORT_LIBREADLINE */
 | 
			
		||||
 | 
			
		||||
/* Define to any value to allow pcregrep to be linked with libz, so that it is
 | 
			
		||||
   able to handle .gz files. */
 | 
			
		||||
/* #undef SUPPORT_LIBZ */
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable the 16 bit PCRE library. */
 | 
			
		||||
/* #undef SUPPORT_PCRE16 */
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable the 32 bit PCRE library. */
 | 
			
		||||
/* #undef SUPPORT_PCRE32 */
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable the 8 bit PCRE library. */
 | 
			
		||||
/* #undef SUPPORT_PCRE8 */
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable JIT support in pcregrep. */
 | 
			
		||||
/* #undef SUPPORT_PCREGREP_JIT */
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable support for Unicode properties. */
 | 
			
		||||
/* #undef SUPPORT_UCP */
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
 | 
			
		||||
   This will work even in an EBCDIC environment, but it is incompatible with
 | 
			
		||||
   the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
 | 
			
		||||
   ASCII/UTF-8/16/32, but not both at once. */
 | 
			
		||||
/* #undef SUPPORT_UTF */
 | 
			
		||||
 | 
			
		||||
/* Define to any value for valgrind support to find invalid memory reads. */
 | 
			
		||||
/* #undef SUPPORT_VALGRIND */
 | 
			
		||||
 | 
			
		||||
/* Version number of package */
 | 
			
		||||
#define VERSION "8.35"
 | 
			
		||||
 | 
			
		||||
/* Define to empty if `const' does not conform to ANSI C. */
 | 
			
		||||
/* #undef const */
 | 
			
		||||
 | 
			
		||||
/* Define to the type of a signed integer type of width exactly 64 bits if
 | 
			
		||||
   such a type exists and the standard includes do not define it. */
 | 
			
		||||
/* #undef int64_t */
 | 
			
		||||
 | 
			
		||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
 | 
			
		||||
/* #undef size_t */
 | 
			
		||||
							
								
								
									
										348
									
								
								tools/pcre/config.h.in
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										348
									
								
								tools/pcre/config.h.in
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,348 @@
 | 
			
		||||
/* config.h.in.  Generated from configure.ac by autoheader.  */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* PCRE is written in Standard C, but there are a few non-standard things it
 | 
			
		||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
 | 
			
		||||
systems.
 | 
			
		||||
 | 
			
		||||
In environments that support the GNU autotools, config.h.in is converted into
 | 
			
		||||
config.h by the "configure" script. In environments that use CMake,
 | 
			
		||||
config-cmake.in is converted into config.h. If you are going to build PCRE "by
 | 
			
		||||
hand" without using "configure" or CMake, you should copy the distributed
 | 
			
		||||
config.h.generic to config.h, and edit the macro definitions to be the way you
 | 
			
		||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
 | 
			
		||||
so that config.h is included at the start of every source.
 | 
			
		||||
 | 
			
		||||
Alternatively, you can avoid editing by using -D on the compiler command line
 | 
			
		||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
 | 
			
		||||
but if you do, default values will be taken from config.h for non-boolean
 | 
			
		||||
macros that are not defined on the command line.
 | 
			
		||||
 | 
			
		||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined
 | 
			
		||||
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
 | 
			
		||||
macros are listed as a commented #undef in config.h.generic. Macros such as
 | 
			
		||||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
 | 
			
		||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
 | 
			
		||||
 | 
			
		||||
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
 | 
			
		||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
 | 
			
		||||
sure both macros are undefined; an emulation function will then be used. */
 | 
			
		||||
 | 
			
		||||
/* By default, the \R escape sequence matches any Unicode line ending
 | 
			
		||||
   character or sequence of characters. If BSR_ANYCRLF is defined (to any
 | 
			
		||||
   value), this is changed so that backslash-R matches only CR, LF, or CRLF.
 | 
			
		||||
   The build-time default can be overridden by the user of PCRE at runtime. */
 | 
			
		||||
#undef BSR_ANYCRLF
 | 
			
		||||
 | 
			
		||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
 | 
			
		||||
   character codes, define this macro to any value. You must also edit the
 | 
			
		||||
   NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15).
 | 
			
		||||
   On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
 | 
			
		||||
   automatically adjusted. When EBCDIC is set, PCRE assumes that all input
 | 
			
		||||
   strings are in EBCDIC. If you do not define this macro, PCRE will assume
 | 
			
		||||
   input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
 | 
			
		||||
   a version of PCRE that supports both EBCDIC and UTF-8/16/32. */
 | 
			
		||||
#undef EBCDIC
 | 
			
		||||
 | 
			
		||||
/* In an EBCDIC environment, define this macro to any value to arrange for the
 | 
			
		||||
   NL character to be 0x25 instead of the default 0x15. NL plays the role that
 | 
			
		||||
   LF does in an ASCII/Unicode environment. The value must also be set in the
 | 
			
		||||
   NEWLINE macro below. On systems that can use "configure" or CMake to set
 | 
			
		||||
   EBCDIC_NL25, the adjustment of NEWLINE is automatic. */
 | 
			
		||||
#undef EBCDIC_NL25
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the `bcopy' function. */
 | 
			
		||||
#undef HAVE_BCOPY
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <bits/type_traits.h> header file. */
 | 
			
		||||
#undef HAVE_BITS_TYPE_TRAITS_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <bzlib.h> header file. */
 | 
			
		||||
#undef HAVE_BZLIB_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <dirent.h> header file. */
 | 
			
		||||
#undef HAVE_DIRENT_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <dlfcn.h> header file. */
 | 
			
		||||
#undef HAVE_DLFCN_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <editline/readline.h> header file. */
 | 
			
		||||
#undef HAVE_EDITLINE_READLINE_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
 | 
			
		||||
#undef HAVE_EDIT_READLINE_READLINE_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <inttypes.h> header file. */
 | 
			
		||||
#undef HAVE_INTTYPES_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <limits.h> header file. */
 | 
			
		||||
#undef HAVE_LIMITS_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if the system has the type `long long'. */
 | 
			
		||||
#undef HAVE_LONG_LONG
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the `memmove' function. */
 | 
			
		||||
#undef HAVE_MEMMOVE
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <memory.h> header file. */
 | 
			
		||||
#undef HAVE_MEMORY_H
 | 
			
		||||
 | 
			
		||||
/* Define if you have POSIX threads libraries and header files. */
 | 
			
		||||
#undef HAVE_PTHREAD
 | 
			
		||||
 | 
			
		||||
/* Have PTHREAD_PRIO_INHERIT. */
 | 
			
		||||
#undef HAVE_PTHREAD_PRIO_INHERIT
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <readline/history.h> header file. */
 | 
			
		||||
#undef HAVE_READLINE_HISTORY_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <readline/readline.h> header file. */
 | 
			
		||||
#undef HAVE_READLINE_READLINE_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <stdint.h> header file. */
 | 
			
		||||
#undef HAVE_STDINT_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <stdlib.h> header file. */
 | 
			
		||||
#undef HAVE_STDLIB_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the `strerror' function. */
 | 
			
		||||
#undef HAVE_STRERROR
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <string> header file. */
 | 
			
		||||
#undef HAVE_STRING
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <strings.h> header file. */
 | 
			
		||||
#undef HAVE_STRINGS_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <string.h> header file. */
 | 
			
		||||
#undef HAVE_STRING_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have `strtoimax'. */
 | 
			
		||||
#undef HAVE_STRTOIMAX
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have `strtoll'. */
 | 
			
		||||
#undef HAVE_STRTOLL
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have `strtoq'. */
 | 
			
		||||
#undef HAVE_STRTOQ
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <sys/stat.h> header file. */
 | 
			
		||||
#undef HAVE_SYS_STAT_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <sys/types.h> header file. */
 | 
			
		||||
#undef HAVE_SYS_TYPES_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <type_traits.h> header file. */
 | 
			
		||||
#undef HAVE_TYPE_TRAITS_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <unistd.h> header file. */
 | 
			
		||||
#undef HAVE_UNISTD_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if the system has the type `unsigned long long'. */
 | 
			
		||||
#undef HAVE_UNSIGNED_LONG_LONG
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if the compiler supports simple visibility declarations. */
 | 
			
		||||
#undef HAVE_VISIBILITY
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <windows.h> header file. */
 | 
			
		||||
#undef HAVE_WINDOWS_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <zlib.h> header file. */
 | 
			
		||||
#undef HAVE_ZLIB_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have `_strtoi64'. */
 | 
			
		||||
#undef HAVE__STRTOI64
 | 
			
		||||
 | 
			
		||||
/* The value of LINK_SIZE determines the number of bytes used to store links
 | 
			
		||||
   as offsets within the compiled regex. The default is 2, which allows for
 | 
			
		||||
   compiled patterns up to 64K long. This covers the vast majority of cases.
 | 
			
		||||
   However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
 | 
			
		||||
   for longer patterns in extreme cases. */
 | 
			
		||||
#undef LINK_SIZE
 | 
			
		||||
 | 
			
		||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
 | 
			
		||||
   */
 | 
			
		||||
#undef LT_OBJDIR
 | 
			
		||||
 | 
			
		||||
/* The value of MATCH_LIMIT determines the default number of times the
 | 
			
		||||
   internal match() function can be called during a single execution of
 | 
			
		||||
   pcre_exec(). There is a runtime interface for setting a different limit.
 | 
			
		||||
   The limit exists in order to catch runaway regular expressions that take
 | 
			
		||||
   for ever to determine that they do not match. The default is set very large
 | 
			
		||||
   so that it does not accidentally catch legitimate cases. */
 | 
			
		||||
#undef MATCH_LIMIT
 | 
			
		||||
 | 
			
		||||
/* The above limit applies to all calls of match(), whether or not they
 | 
			
		||||
   increase the recursion depth. In some environments it is desirable to limit
 | 
			
		||||
   the depth of recursive calls of match() more strictly, in order to restrict
 | 
			
		||||
   the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
 | 
			
		||||
   used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
 | 
			
		||||
   match(). To have any useful effect, it must be less than the value of
 | 
			
		||||
   MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
 | 
			
		||||
   a runtime method for setting a different limit. */
 | 
			
		||||
#undef MATCH_LIMIT_RECURSION
 | 
			
		||||
 | 
			
		||||
/* This limit is parameterized just in case anybody ever wants to change it.
 | 
			
		||||
   Care must be taken if it is increased, because it guards against integer
 | 
			
		||||
   overflow caused by enormously large patterns. */
 | 
			
		||||
#undef MAX_NAME_COUNT
 | 
			
		||||
 | 
			
		||||
/* This limit is parameterized just in case anybody ever wants to change it.
 | 
			
		||||
   Care must be taken if it is increased, because it guards against integer
 | 
			
		||||
   overflow caused by enormously large patterns. */
 | 
			
		||||
#undef MAX_NAME_SIZE
 | 
			
		||||
 | 
			
		||||
/* The value of NEWLINE determines the default newline character sequence.
 | 
			
		||||
   PCRE client programs can override this by selecting other values at run
 | 
			
		||||
   time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338
 | 
			
		||||
   (CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or
 | 
			
		||||
   3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and
 | 
			
		||||
   0x25) that are used as the NL line terminator that is equivalent to ASCII
 | 
			
		||||
   LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY),
 | 
			
		||||
   or -2 (ANYCRLF). */
 | 
			
		||||
#undef NEWLINE
 | 
			
		||||
 | 
			
		||||
/* PCRE uses recursive function calls to handle backtracking while matching.
 | 
			
		||||
   This can sometimes be a problem on systems that have stacks of limited
 | 
			
		||||
   size. Define NO_RECURSE to any value to get a version that doesn't use
 | 
			
		||||
   recursion in the match() function; instead it creates its own stack by
 | 
			
		||||
   steam using pcre_recurse_malloc() to obtain memory from the heap. For more
 | 
			
		||||
   detail, see the comments and other stuff just above the match() function.
 | 
			
		||||
   */
 | 
			
		||||
#undef NO_RECURSE
 | 
			
		||||
 | 
			
		||||
/* Name of package */
 | 
			
		||||
#undef PACKAGE
 | 
			
		||||
 | 
			
		||||
/* Define to the address where bug reports for this package should be sent. */
 | 
			
		||||
#undef PACKAGE_BUGREPORT
 | 
			
		||||
 | 
			
		||||
/* Define to the full name of this package. */
 | 
			
		||||
#undef PACKAGE_NAME
 | 
			
		||||
 | 
			
		||||
/* Define to the full name and version of this package. */
 | 
			
		||||
#undef PACKAGE_STRING
 | 
			
		||||
 | 
			
		||||
/* Define to the one symbol short name of this package. */
 | 
			
		||||
#undef PACKAGE_TARNAME
 | 
			
		||||
 | 
			
		||||
/* Define to the home page for this package. */
 | 
			
		||||
#undef PACKAGE_URL
 | 
			
		||||
 | 
			
		||||
/* Define to the version of this package. */
 | 
			
		||||
#undef PACKAGE_VERSION
 | 
			
		||||
 | 
			
		||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
 | 
			
		||||
   parentheses (of any kind) in a pattern. This limits the amount of system
 | 
			
		||||
   stack that is used while compiling a pattern. */
 | 
			
		||||
#undef PARENS_NEST_LIMIT
 | 
			
		||||
 | 
			
		||||
/* to make a symbol visible */
 | 
			
		||||
#undef PCRECPP_EXP_DECL
 | 
			
		||||
 | 
			
		||||
/* to make a symbol visible */
 | 
			
		||||
#undef PCRECPP_EXP_DEFN
 | 
			
		||||
 | 
			
		||||
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
 | 
			
		||||
   pcregrep to hold parts of the file it is searching. This is also the
 | 
			
		||||
   minimum value. The actual amount of memory used by pcregrep is three times
 | 
			
		||||
   this number, because it allows for the buffering of "before" and "after"
 | 
			
		||||
   lines. */
 | 
			
		||||
#undef PCREGREP_BUFSIZE
 | 
			
		||||
 | 
			
		||||
/* to make a symbol visible */
 | 
			
		||||
#undef PCREPOSIX_EXP_DECL
 | 
			
		||||
 | 
			
		||||
/* to make a symbol visible */
 | 
			
		||||
#undef PCREPOSIX_EXP_DEFN
 | 
			
		||||
 | 
			
		||||
/* to make a symbol visible */
 | 
			
		||||
#undef PCRE_EXP_DATA_DEFN
 | 
			
		||||
 | 
			
		||||
/* to make a symbol visible */
 | 
			
		||||
#undef PCRE_EXP_DECL
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* If you are compiling for a system other than a Unix-like system or
 | 
			
		||||
   Win32, and it needs some magic to be inserted before the definition
 | 
			
		||||
   of a function that is exported by the library, define this macro to
 | 
			
		||||
   contain the relevant magic. If you do not define this macro, a suitable
 | 
			
		||||
    __declspec value is used for Windows systems; in other environments
 | 
			
		||||
   "extern" is used for a C compiler and "extern C" for a C++ compiler.
 | 
			
		||||
   This macro apears at the start of every exported function that is part
 | 
			
		||||
   of the external API. It does not appear on functions that are "external"
 | 
			
		||||
   in the C sense, but which are internal to the library. */
 | 
			
		||||
#undef PCRE_EXP_DEFN
 | 
			
		||||
 | 
			
		||||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
 | 
			
		||||
#undef PCRE_STATIC
 | 
			
		||||
 | 
			
		||||
/* When calling PCRE via the POSIX interface, additional working storage is
 | 
			
		||||
   required for holding the pointers to capturing substrings because PCRE
 | 
			
		||||
   requires three integers per substring, whereas the POSIX interface provides
 | 
			
		||||
   only two. If the number of expected substrings is small, the wrapper
 | 
			
		||||
   function uses space on the stack, because this is faster than using
 | 
			
		||||
   malloc() for each call. The threshold above which the stack is no longer
 | 
			
		||||
   used is defined by POSIX_MALLOC_THRESHOLD. */
 | 
			
		||||
#undef POSIX_MALLOC_THRESHOLD
 | 
			
		||||
 | 
			
		||||
/* Define to necessary symbol if this constant uses a non-standard name on
 | 
			
		||||
   your system. */
 | 
			
		||||
#undef PTHREAD_CREATE_JOINABLE
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the ANSI C header files. */
 | 
			
		||||
#undef STDC_HEADERS
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable support for Just-In-Time compiling. */
 | 
			
		||||
#undef SUPPORT_JIT
 | 
			
		||||
 | 
			
		||||
/* Define to any value to allow pcregrep to be linked with libbz2, so that it
 | 
			
		||||
   is able to handle .bz2 files. */
 | 
			
		||||
#undef SUPPORT_LIBBZ2
 | 
			
		||||
 | 
			
		||||
/* Define to any value to allow pcretest to be linked with libedit. */
 | 
			
		||||
#undef SUPPORT_LIBEDIT
 | 
			
		||||
 | 
			
		||||
/* Define to any value to allow pcretest to be linked with libreadline. */
 | 
			
		||||
#undef SUPPORT_LIBREADLINE
 | 
			
		||||
 | 
			
		||||
/* Define to any value to allow pcregrep to be linked with libz, so that it is
 | 
			
		||||
   able to handle .gz files. */
 | 
			
		||||
#undef SUPPORT_LIBZ
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable the 16 bit PCRE library. */
 | 
			
		||||
#undef SUPPORT_PCRE16
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable the 32 bit PCRE library. */
 | 
			
		||||
#undef SUPPORT_PCRE32
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable the 8 bit PCRE library. */
 | 
			
		||||
#undef SUPPORT_PCRE8
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable JIT support in pcregrep. */
 | 
			
		||||
#undef SUPPORT_PCREGREP_JIT
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable support for Unicode properties. */
 | 
			
		||||
#undef SUPPORT_UCP
 | 
			
		||||
 | 
			
		||||
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
 | 
			
		||||
   This will work even in an EBCDIC environment, but it is incompatible with
 | 
			
		||||
   the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
 | 
			
		||||
   ASCII/UTF-8/16/32, but not both at once. */
 | 
			
		||||
#undef SUPPORT_UTF
 | 
			
		||||
 | 
			
		||||
/* Define to any value for valgrind support to find invalid memory reads. */
 | 
			
		||||
#undef SUPPORT_VALGRIND
 | 
			
		||||
 | 
			
		||||
/* Version number of package */
 | 
			
		||||
#undef VERSION
 | 
			
		||||
 | 
			
		||||
/* Define to empty if `const' does not conform to ANSI C. */
 | 
			
		||||
#undef const
 | 
			
		||||
 | 
			
		||||
/* Define to the type of a signed integer type of width exactly 64 bits if
 | 
			
		||||
   such a type exists and the standard includes do not define it. */
 | 
			
		||||
#undef int64_t
 | 
			
		||||
 | 
			
		||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
 | 
			
		||||
#undef size_t
 | 
			
		||||
							
								
								
									
										1793
									
								
								tools/pcre/config.sub
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1793
									
								
								tools/pcre/config.sub
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										22282
									
								
								tools/pcre/configure
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										22282
									
								
								tools/pcre/configure
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1111
									
								
								tools/pcre/configure.ac
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1111
									
								
								tools/pcre/configure.ac
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										791
									
								
								tools/pcre/depcomp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										791
									
								
								tools/pcre/depcomp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,791 @@
 | 
			
		||||
#! /bin/sh
 | 
			
		||||
# depcomp - compile a program generating dependencies as side-effects
 | 
			
		||||
 | 
			
		||||
scriptversion=2013-05-30.07; # UTC
 | 
			
		||||
 | 
			
		||||
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
 | 
			
		||||
 | 
			
		||||
# This program is free software; you can redistribute it and/or modify
 | 
			
		||||
# it under the terms of the GNU General Public License as published by
 | 
			
		||||
# the Free Software Foundation; either version 2, or (at your option)
 | 
			
		||||
# any later version.
 | 
			
		||||
 | 
			
		||||
# This program is distributed in the hope that it will be useful,
 | 
			
		||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
# GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
# You should have received a copy of the GNU General Public License
 | 
			
		||||
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
 | 
			
		||||
# As a special exception to the GNU General Public License, if you
 | 
			
		||||
# distribute this file as part of a program that contains a
 | 
			
		||||
# configuration script generated by Autoconf, you may include it under
 | 
			
		||||
# the same distribution terms that you use for the rest of that program.
 | 
			
		||||
 | 
			
		||||
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
 | 
			
		||||
 | 
			
		||||
case $1 in
 | 
			
		||||
  '')
 | 
			
		||||
    echo "$0: No command.  Try '$0 --help' for more information." 1>&2
 | 
			
		||||
    exit 1;
 | 
			
		||||
    ;;
 | 
			
		||||
  -h | --h*)
 | 
			
		||||
    cat <<\EOF
 | 
			
		||||
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
 | 
			
		||||
 | 
			
		||||
Run PROGRAMS ARGS to compile a file, generating dependencies
 | 
			
		||||
as side-effects.
 | 
			
		||||
 | 
			
		||||
Environment variables:
 | 
			
		||||
  depmode     Dependency tracking mode.
 | 
			
		||||
  source      Source file read by 'PROGRAMS ARGS'.
 | 
			
		||||
  object      Object file output by 'PROGRAMS ARGS'.
 | 
			
		||||
  DEPDIR      directory where to store dependencies.
 | 
			
		||||
  depfile     Dependency file to output.
 | 
			
		||||
  tmpdepfile  Temporary file to use when outputting dependencies.
 | 
			
		||||
  libtool     Whether libtool is used (yes/no).
 | 
			
		||||
 | 
			
		||||
Report bugs to <bug-automake@gnu.org>.
 | 
			
		||||
EOF
 | 
			
		||||
    exit $?
 | 
			
		||||
    ;;
 | 
			
		||||
  -v | --v*)
 | 
			
		||||
    echo "depcomp $scriptversion"
 | 
			
		||||
    exit $?
 | 
			
		||||
    ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
# Get the directory component of the given path, and save it in the
 | 
			
		||||
# global variables '$dir'.  Note that this directory component will
 | 
			
		||||
# be either empty or ending with a '/' character.  This is deliberate.
 | 
			
		||||
set_dir_from ()
 | 
			
		||||
{
 | 
			
		||||
  case $1 in
 | 
			
		||||
    */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
 | 
			
		||||
      *) dir=;;
 | 
			
		||||
  esac
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Get the suffix-stripped basename of the given path, and save it the
 | 
			
		||||
# global variable '$base'.
 | 
			
		||||
set_base_from ()
 | 
			
		||||
{
 | 
			
		||||
  base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# If no dependency file was actually created by the compiler invocation,
 | 
			
		||||
# we still have to create a dummy depfile, to avoid errors with the
 | 
			
		||||
# Makefile "include basename.Plo" scheme.
 | 
			
		||||
make_dummy_depfile ()
 | 
			
		||||
{
 | 
			
		||||
  echo "#dummy" > "$depfile"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Factor out some common post-processing of the generated depfile.
 | 
			
		||||
# Requires the auxiliary global variable '$tmpdepfile' to be set.
 | 
			
		||||
aix_post_process_depfile ()
 | 
			
		||||
{
 | 
			
		||||
  # If the compiler actually managed to produce a dependency file,
 | 
			
		||||
  # post-process it.
 | 
			
		||||
  if test -f "$tmpdepfile"; then
 | 
			
		||||
    # Each line is of the form 'foo.o: dependency.h'.
 | 
			
		||||
    # Do two passes, one to just change these to
 | 
			
		||||
    #   $object: dependency.h
 | 
			
		||||
    # and one to simply output
 | 
			
		||||
    #   dependency.h:
 | 
			
		||||
    # which is needed to avoid the deleted-header problem.
 | 
			
		||||
    { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
 | 
			
		||||
      sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
 | 
			
		||||
    } > "$depfile"
 | 
			
		||||
    rm -f "$tmpdepfile"
 | 
			
		||||
  else
 | 
			
		||||
    make_dummy_depfile
 | 
			
		||||
  fi
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# A tabulation character.
 | 
			
		||||
tab='	'
 | 
			
		||||
# A newline character.
 | 
			
		||||
nl='
 | 
			
		||||
'
 | 
			
		||||
# Character ranges might be problematic outside the C locale.
 | 
			
		||||
# These definitions help.
 | 
			
		||||
upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
 | 
			
		||||
lower=abcdefghijklmnopqrstuvwxyz
 | 
			
		||||
digits=0123456789
 | 
			
		||||
alpha=${upper}${lower}
 | 
			
		||||
 | 
			
		||||
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
 | 
			
		||||
  echo "depcomp: Variables source, object and depmode must be set" 1>&2
 | 
			
		||||
  exit 1
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
 | 
			
		||||
depfile=${depfile-`echo "$object" |
 | 
			
		||||
  sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
 | 
			
		||||
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
 | 
			
		||||
 | 
			
		||||
rm -f "$tmpdepfile"
 | 
			
		||||
 | 
			
		||||
# Avoid interferences from the environment.
 | 
			
		||||
gccflag= dashmflag=
 | 
			
		||||
 | 
			
		||||
# Some modes work just like other modes, but use different flags.  We
 | 
			
		||||
# parameterize here, but still list the modes in the big case below,
 | 
			
		||||
# to make depend.m4 easier to write.  Note that we *cannot* use a case
 | 
			
		||||
# here, because this file can only contain one case statement.
 | 
			
		||||
if test "$depmode" = hp; then
 | 
			
		||||
  # HP compiler uses -M and no extra arg.
 | 
			
		||||
  gccflag=-M
 | 
			
		||||
  depmode=gcc
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
if test "$depmode" = dashXmstdout; then
 | 
			
		||||
  # This is just like dashmstdout with a different argument.
 | 
			
		||||
  dashmflag=-xM
 | 
			
		||||
  depmode=dashmstdout
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
cygpath_u="cygpath -u -f -"
 | 
			
		||||
if test "$depmode" = msvcmsys; then
 | 
			
		||||
  # This is just like msvisualcpp but w/o cygpath translation.
 | 
			
		||||
  # Just convert the backslash-escaped backslashes to single forward
 | 
			
		||||
  # slashes to satisfy depend.m4
 | 
			
		||||
  cygpath_u='sed s,\\\\,/,g'
 | 
			
		||||
  depmode=msvisualcpp
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
if test "$depmode" = msvc7msys; then
 | 
			
		||||
  # This is just like msvc7 but w/o cygpath translation.
 | 
			
		||||
  # Just convert the backslash-escaped backslashes to single forward
 | 
			
		||||
  # slashes to satisfy depend.m4
 | 
			
		||||
  cygpath_u='sed s,\\\\,/,g'
 | 
			
		||||
  depmode=msvc7
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
if test "$depmode" = xlc; then
 | 
			
		||||
  # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
 | 
			
		||||
  gccflag=-qmakedep=gcc,-MF
 | 
			
		||||
  depmode=gcc
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
case "$depmode" in
 | 
			
		||||
gcc3)
 | 
			
		||||
## gcc 3 implements dependency tracking that does exactly what
 | 
			
		||||
## we want.  Yay!  Note: for some reason libtool 1.4 doesn't like
 | 
			
		||||
## it if -MD -MP comes after the -MF stuff.  Hmm.
 | 
			
		||||
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
 | 
			
		||||
## the command line argument order; so add the flags where they
 | 
			
		||||
## appear in depend2.am.  Note that the slowdown incurred here
 | 
			
		||||
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
 | 
			
		||||
  for arg
 | 
			
		||||
  do
 | 
			
		||||
    case $arg in
 | 
			
		||||
    -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
 | 
			
		||||
    *)  set fnord "$@" "$arg" ;;
 | 
			
		||||
    esac
 | 
			
		||||
    shift # fnord
 | 
			
		||||
    shift # $arg
 | 
			
		||||
  done
 | 
			
		||||
  "$@"
 | 
			
		||||
  stat=$?
 | 
			
		||||
  if test $stat -ne 0; then
 | 
			
		||||
    rm -f "$tmpdepfile"
 | 
			
		||||
    exit $stat
 | 
			
		||||
  fi
 | 
			
		||||
  mv "$tmpdepfile" "$depfile"
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
gcc)
 | 
			
		||||
## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
 | 
			
		||||
## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
 | 
			
		||||
## (see the conditional assignment to $gccflag above).
 | 
			
		||||
## There are various ways to get dependency output from gcc.  Here's
 | 
			
		||||
## why we pick this rather obscure method:
 | 
			
		||||
## - Don't want to use -MD because we'd like the dependencies to end
 | 
			
		||||
##   up in a subdir.  Having to rename by hand is ugly.
 | 
			
		||||
##   (We might end up doing this anyway to support other compilers.)
 | 
			
		||||
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
 | 
			
		||||
##   -MM, not -M (despite what the docs say).  Also, it might not be
 | 
			
		||||
##   supported by the other compilers which use the 'gcc' depmode.
 | 
			
		||||
## - Using -M directly means running the compiler twice (even worse
 | 
			
		||||
##   than renaming).
 | 
			
		||||
  if test -z "$gccflag"; then
 | 
			
		||||
    gccflag=-MD,
 | 
			
		||||
  fi
 | 
			
		||||
  "$@" -Wp,"$gccflag$tmpdepfile"
 | 
			
		||||
  stat=$?
 | 
			
		||||
  if test $stat -ne 0; then
 | 
			
		||||
    rm -f "$tmpdepfile"
 | 
			
		||||
    exit $stat
 | 
			
		||||
  fi
 | 
			
		||||
  rm -f "$depfile"
 | 
			
		||||
  echo "$object : \\" > "$depfile"
 | 
			
		||||
  # The second -e expression handles DOS-style file names with drive
 | 
			
		||||
  # letters.
 | 
			
		||||
  sed -e 's/^[^:]*: / /' \
 | 
			
		||||
      -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
 | 
			
		||||
## This next piece of magic avoids the "deleted header file" problem.
 | 
			
		||||
## The problem is that when a header file which appears in a .P file
 | 
			
		||||
## is deleted, the dependency causes make to die (because there is
 | 
			
		||||
## typically no way to rebuild the header).  We avoid this by adding
 | 
			
		||||
## dummy dependencies for each header file.  Too bad gcc doesn't do
 | 
			
		||||
## this for us directly.
 | 
			
		||||
## Some versions of gcc put a space before the ':'.  On the theory
 | 
			
		||||
## that the space means something, we add a space to the output as
 | 
			
		||||
## well.  hp depmode also adds that space, but also prefixes the VPATH
 | 
			
		||||
## to the object.  Take care to not repeat it in the output.
 | 
			
		||||
## Some versions of the HPUX 10.20 sed can't process this invocation
 | 
			
		||||
## correctly.  Breaking it into two sed invocations is a workaround.
 | 
			
		||||
  tr ' ' "$nl" < "$tmpdepfile" \
 | 
			
		||||
    | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
 | 
			
		||||
    | sed -e 's/$/ :/' >> "$depfile"
 | 
			
		||||
  rm -f "$tmpdepfile"
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
hp)
 | 
			
		||||
  # This case exists only to let depend.m4 do its work.  It works by
 | 
			
		||||
  # looking at the text of this script.  This case will never be run,
 | 
			
		||||
  # since it is checked for above.
 | 
			
		||||
  exit 1
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
sgi)
 | 
			
		||||
  if test "$libtool" = yes; then
 | 
			
		||||
    "$@" "-Wp,-MDupdate,$tmpdepfile"
 | 
			
		||||
  else
 | 
			
		||||
    "$@" -MDupdate "$tmpdepfile"
 | 
			
		||||
  fi
 | 
			
		||||
  stat=$?
 | 
			
		||||
  if test $stat -ne 0; then
 | 
			
		||||
    rm -f "$tmpdepfile"
 | 
			
		||||
    exit $stat
 | 
			
		||||
  fi
 | 
			
		||||
  rm -f "$depfile"
 | 
			
		||||
 | 
			
		||||
  if test -f "$tmpdepfile"; then  # yes, the sourcefile depend on other files
 | 
			
		||||
    echo "$object : \\" > "$depfile"
 | 
			
		||||
    # Clip off the initial element (the dependent).  Don't try to be
 | 
			
		||||
    # clever and replace this with sed code, as IRIX sed won't handle
 | 
			
		||||
    # lines with more than a fixed number of characters (4096 in
 | 
			
		||||
    # IRIX 6.2 sed, 8192 in IRIX 6.5).  We also remove comment lines;
 | 
			
		||||
    # the IRIX cc adds comments like '#:fec' to the end of the
 | 
			
		||||
    # dependency line.
 | 
			
		||||
    tr ' ' "$nl" < "$tmpdepfile" \
 | 
			
		||||
      | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
 | 
			
		||||
      | tr "$nl" ' ' >> "$depfile"
 | 
			
		||||
    echo >> "$depfile"
 | 
			
		||||
    # The second pass generates a dummy entry for each header file.
 | 
			
		||||
    tr ' ' "$nl" < "$tmpdepfile" \
 | 
			
		||||
      | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
 | 
			
		||||
      >> "$depfile"
 | 
			
		||||
  else
 | 
			
		||||
    make_dummy_depfile
 | 
			
		||||
  fi
 | 
			
		||||
  rm -f "$tmpdepfile"
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
xlc)
 | 
			
		||||
  # This case exists only to let depend.m4 do its work.  It works by
 | 
			
		||||
  # looking at the text of this script.  This case will never be run,
 | 
			
		||||
  # since it is checked for above.
 | 
			
		||||
  exit 1
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
aix)
 | 
			
		||||
  # The C for AIX Compiler uses -M and outputs the dependencies
 | 
			
		||||
  # in a .u file.  In older versions, this file always lives in the
 | 
			
		||||
  # current directory.  Also, the AIX compiler puts '$object:' at the
 | 
			
		||||
  # start of each line; $object doesn't have directory information.
 | 
			
		||||
  # Version 6 uses the directory in both cases.
 | 
			
		||||
  set_dir_from "$object"
 | 
			
		||||
  set_base_from "$object"
 | 
			
		||||
  if test "$libtool" = yes; then
 | 
			
		||||
    tmpdepfile1=$dir$base.u
 | 
			
		||||
    tmpdepfile2=$base.u
 | 
			
		||||
    tmpdepfile3=$dir.libs/$base.u
 | 
			
		||||
    "$@" -Wc,-M
 | 
			
		||||
  else
 | 
			
		||||
    tmpdepfile1=$dir$base.u
 | 
			
		||||
    tmpdepfile2=$dir$base.u
 | 
			
		||||
    tmpdepfile3=$dir$base.u
 | 
			
		||||
    "$@" -M
 | 
			
		||||
  fi
 | 
			
		||||
  stat=$?
 | 
			
		||||
  if test $stat -ne 0; then
 | 
			
		||||
    rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
 | 
			
		||||
    exit $stat
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
 | 
			
		||||
  do
 | 
			
		||||
    test -f "$tmpdepfile" && break
 | 
			
		||||
  done
 | 
			
		||||
  aix_post_process_depfile
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
tcc)
 | 
			
		||||
  # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
 | 
			
		||||
  # FIXME: That version still under development at the moment of writing.
 | 
			
		||||
  #        Make that this statement remains true also for stable, released
 | 
			
		||||
  #        versions.
 | 
			
		||||
  # It will wrap lines (doesn't matter whether long or short) with a
 | 
			
		||||
  # trailing '\', as in:
 | 
			
		||||
  #
 | 
			
		||||
  #   foo.o : \
 | 
			
		||||
  #    foo.c \
 | 
			
		||||
  #    foo.h \
 | 
			
		||||
  #
 | 
			
		||||
  # It will put a trailing '\' even on the last line, and will use leading
 | 
			
		||||
  # spaces rather than leading tabs (at least since its commit 0394caf7
 | 
			
		||||
  # "Emit spaces for -MD").
 | 
			
		||||
  "$@" -MD -MF "$tmpdepfile"
 | 
			
		||||
  stat=$?
 | 
			
		||||
  if test $stat -ne 0; then
 | 
			
		||||
    rm -f "$tmpdepfile"
 | 
			
		||||
    exit $stat
 | 
			
		||||
  fi
 | 
			
		||||
  rm -f "$depfile"
 | 
			
		||||
  # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
 | 
			
		||||
  # We have to change lines of the first kind to '$object: \'.
 | 
			
		||||
  sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
 | 
			
		||||
  # And for each line of the second kind, we have to emit a 'dep.h:'
 | 
			
		||||
  # dummy dependency, to avoid the deleted-header problem.
 | 
			
		||||
  sed -n -e 's|^  *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
 | 
			
		||||
  rm -f "$tmpdepfile"
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
## The order of this option in the case statement is important, since the
 | 
			
		||||
## shell code in configure will try each of these formats in the order
 | 
			
		||||
## listed in this file.  A plain '-MD' option would be understood by many
 | 
			
		||||
## compilers, so we must ensure this comes after the gcc and icc options.
 | 
			
		||||
pgcc)
 | 
			
		||||
  # Portland's C compiler understands '-MD'.
 | 
			
		||||
  # Will always output deps to 'file.d' where file is the root name of the
 | 
			
		||||
  # source file under compilation, even if file resides in a subdirectory.
 | 
			
		||||
  # The object file name does not affect the name of the '.d' file.
 | 
			
		||||
  # pgcc 10.2 will output
 | 
			
		||||
  #    foo.o: sub/foo.c sub/foo.h
 | 
			
		||||
  # and will wrap long lines using '\' :
 | 
			
		||||
  #    foo.o: sub/foo.c ... \
 | 
			
		||||
  #     sub/foo.h ... \
 | 
			
		||||
  #     ...
 | 
			
		||||
  set_dir_from "$object"
 | 
			
		||||
  # Use the source, not the object, to determine the base name, since
 | 
			
		||||
  # that's sadly what pgcc will do too.
 | 
			
		||||
  set_base_from "$source"
 | 
			
		||||
  tmpdepfile=$base.d
 | 
			
		||||
 | 
			
		||||
  # For projects that build the same source file twice into different object
 | 
			
		||||
  # files, the pgcc approach of using the *source* file root name can cause
 | 
			
		||||
  # problems in parallel builds.  Use a locking strategy to avoid stomping on
 | 
			
		||||
  # the same $tmpdepfile.
 | 
			
		||||
  lockdir=$base.d-lock
 | 
			
		||||
  trap "
 | 
			
		||||
    echo '$0: caught signal, cleaning up...' >&2
 | 
			
		||||
    rmdir '$lockdir'
 | 
			
		||||
    exit 1
 | 
			
		||||
  " 1 2 13 15
 | 
			
		||||
  numtries=100
 | 
			
		||||
  i=$numtries
 | 
			
		||||
  while test $i -gt 0; do
 | 
			
		||||
    # mkdir is a portable test-and-set.
 | 
			
		||||
    if mkdir "$lockdir" 2>/dev/null; then
 | 
			
		||||
      # This process acquired the lock.
 | 
			
		||||
      "$@" -MD
 | 
			
		||||
      stat=$?
 | 
			
		||||
      # Release the lock.
 | 
			
		||||
      rmdir "$lockdir"
 | 
			
		||||
      break
 | 
			
		||||
    else
 | 
			
		||||
      # If the lock is being held by a different process, wait
 | 
			
		||||
      # until the winning process is done or we timeout.
 | 
			
		||||
      while test -d "$lockdir" && test $i -gt 0; do
 | 
			
		||||
        sleep 1
 | 
			
		||||
        i=`expr $i - 1`
 | 
			
		||||
      done
 | 
			
		||||
    fi
 | 
			
		||||
    i=`expr $i - 1`
 | 
			
		||||
  done
 | 
			
		||||
  trap - 1 2 13 15
 | 
			
		||||
  if test $i -le 0; then
 | 
			
		||||
    echo "$0: failed to acquire lock after $numtries attempts" >&2
 | 
			
		||||
    echo "$0: check lockdir '$lockdir'" >&2
 | 
			
		||||
    exit 1
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  if test $stat -ne 0; then
 | 
			
		||||
    rm -f "$tmpdepfile"
 | 
			
		||||
    exit $stat
 | 
			
		||||
  fi
 | 
			
		||||
  rm -f "$depfile"
 | 
			
		||||
  # Each line is of the form `foo.o: dependent.h',
 | 
			
		||||
  # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
 | 
			
		||||
  # Do two passes, one to just change these to
 | 
			
		||||
  # `$object: dependent.h' and one to simply `dependent.h:'.
 | 
			
		||||
  sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
 | 
			
		||||
  # Some versions of the HPUX 10.20 sed can't process this invocation
 | 
			
		||||
  # correctly.  Breaking it into two sed invocations is a workaround.
 | 
			
		||||
  sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
 | 
			
		||||
    | sed -e 's/$/ :/' >> "$depfile"
 | 
			
		||||
  rm -f "$tmpdepfile"
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
hp2)
 | 
			
		||||
  # The "hp" stanza above does not work with aCC (C++) and HP's ia64
 | 
			
		||||
  # compilers, which have integrated preprocessors.  The correct option
 | 
			
		||||
  # to use with these is +Maked; it writes dependencies to a file named
 | 
			
		||||
  # 'foo.d', which lands next to the object file, wherever that
 | 
			
		||||
  # happens to be.
 | 
			
		||||
  # Much of this is similar to the tru64 case; see comments there.
 | 
			
		||||
  set_dir_from  "$object"
 | 
			
		||||
  set_base_from "$object"
 | 
			
		||||
  if test "$libtool" = yes; then
 | 
			
		||||
    tmpdepfile1=$dir$base.d
 | 
			
		||||
    tmpdepfile2=$dir.libs/$base.d
 | 
			
		||||
    "$@" -Wc,+Maked
 | 
			
		||||
  else
 | 
			
		||||
    tmpdepfile1=$dir$base.d
 | 
			
		||||
    tmpdepfile2=$dir$base.d
 | 
			
		||||
    "$@" +Maked
 | 
			
		||||
  fi
 | 
			
		||||
  stat=$?
 | 
			
		||||
  if test $stat -ne 0; then
 | 
			
		||||
     rm -f "$tmpdepfile1" "$tmpdepfile2"
 | 
			
		||||
     exit $stat
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
 | 
			
		||||
  do
 | 
			
		||||
    test -f "$tmpdepfile" && break
 | 
			
		||||
  done
 | 
			
		||||
  if test -f "$tmpdepfile"; then
 | 
			
		||||
    sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
 | 
			
		||||
    # Add 'dependent.h:' lines.
 | 
			
		||||
    sed -ne '2,${
 | 
			
		||||
               s/^ *//
 | 
			
		||||
               s/ \\*$//
 | 
			
		||||
               s/$/:/
 | 
			
		||||
               p
 | 
			
		||||
             }' "$tmpdepfile" >> "$depfile"
 | 
			
		||||
  else
 | 
			
		||||
    make_dummy_depfile
 | 
			
		||||
  fi
 | 
			
		||||
  rm -f "$tmpdepfile" "$tmpdepfile2"
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
tru64)
 | 
			
		||||
  # The Tru64 compiler uses -MD to generate dependencies as a side
 | 
			
		||||
  # effect.  'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
 | 
			
		||||
  # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
 | 
			
		||||
  # dependencies in 'foo.d' instead, so we check for that too.
 | 
			
		||||
  # Subdirectories are respected.
 | 
			
		||||
  set_dir_from  "$object"
 | 
			
		||||
  set_base_from "$object"
 | 
			
		||||
 | 
			
		||||
  if test "$libtool" = yes; then
 | 
			
		||||
    # Libtool generates 2 separate objects for the 2 libraries.  These
 | 
			
		||||
    # two compilations output dependencies in $dir.libs/$base.o.d and
 | 
			
		||||
    # in $dir$base.o.d.  We have to check for both files, because
 | 
			
		||||
    # one of the two compilations can be disabled.  We should prefer
 | 
			
		||||
    # $dir$base.o.d over $dir.libs/$base.o.d because the latter is
 | 
			
		||||
    # automatically cleaned when .libs/ is deleted, while ignoring
 | 
			
		||||
    # the former would cause a distcleancheck panic.
 | 
			
		||||
    tmpdepfile1=$dir$base.o.d          # libtool 1.5
 | 
			
		||||
    tmpdepfile2=$dir.libs/$base.o.d    # Likewise.
 | 
			
		||||
    tmpdepfile3=$dir.libs/$base.d      # Compaq CCC V6.2-504
 | 
			
		||||
    "$@" -Wc,-MD
 | 
			
		||||
  else
 | 
			
		||||
    tmpdepfile1=$dir$base.d
 | 
			
		||||
    tmpdepfile2=$dir$base.d
 | 
			
		||||
    tmpdepfile3=$dir$base.d
 | 
			
		||||
    "$@" -MD
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  stat=$?
 | 
			
		||||
  if test $stat -ne 0; then
 | 
			
		||||
    rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
 | 
			
		||||
    exit $stat
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
 | 
			
		||||
  do
 | 
			
		||||
    test -f "$tmpdepfile" && break
 | 
			
		||||
  done
 | 
			
		||||
  # Same post-processing that is required for AIX mode.
 | 
			
		||||
  aix_post_process_depfile
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
msvc7)
 | 
			
		||||
  if test "$libtool" = yes; then
 | 
			
		||||
    showIncludes=-Wc,-showIncludes
 | 
			
		||||
  else
 | 
			
		||||
    showIncludes=-showIncludes
 | 
			
		||||
  fi
 | 
			
		||||
  "$@" $showIncludes > "$tmpdepfile"
 | 
			
		||||
  stat=$?
 | 
			
		||||
  grep -v '^Note: including file: ' "$tmpdepfile"
 | 
			
		||||
  if test $stat -ne 0; then
 | 
			
		||||
    rm -f "$tmpdepfile"
 | 
			
		||||
    exit $stat
 | 
			
		||||
  fi
 | 
			
		||||
  rm -f "$depfile"
 | 
			
		||||
  echo "$object : \\" > "$depfile"
 | 
			
		||||
  # The first sed program below extracts the file names and escapes
 | 
			
		||||
  # backslashes for cygpath.  The second sed program outputs the file
 | 
			
		||||
  # name when reading, but also accumulates all include files in the
 | 
			
		||||
  # hold buffer in order to output them again at the end.  This only
 | 
			
		||||
  # works with sed implementations that can handle large buffers.
 | 
			
		||||
  sed < "$tmpdepfile" -n '
 | 
			
		||||
/^Note: including file:  *\(.*\)/ {
 | 
			
		||||
  s//\1/
 | 
			
		||||
  s/\\/\\\\/g
 | 
			
		||||
  p
 | 
			
		||||
}' | $cygpath_u | sort -u | sed -n '
 | 
			
		||||
s/ /\\ /g
 | 
			
		||||
s/\(.*\)/'"$tab"'\1 \\/p
 | 
			
		||||
s/.\(.*\) \\/\1:/
 | 
			
		||||
H
 | 
			
		||||
$ {
 | 
			
		||||
  s/.*/'"$tab"'/
 | 
			
		||||
  G
 | 
			
		||||
  p
 | 
			
		||||
}' >> "$depfile"
 | 
			
		||||
  echo >> "$depfile" # make sure the fragment doesn't end with a backslash
 | 
			
		||||
  rm -f "$tmpdepfile"
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
msvc7msys)
 | 
			
		||||
  # This case exists only to let depend.m4 do its work.  It works by
 | 
			
		||||
  # looking at the text of this script.  This case will never be run,
 | 
			
		||||
  # since it is checked for above.
 | 
			
		||||
  exit 1
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
#nosideeffect)
 | 
			
		||||
  # This comment above is used by automake to tell side-effect
 | 
			
		||||
  # dependency tracking mechanisms from slower ones.
 | 
			
		||||
 | 
			
		||||
dashmstdout)
 | 
			
		||||
  # Important note: in order to support this mode, a compiler *must*
 | 
			
		||||
  # always write the preprocessed file to stdout, regardless of -o.
 | 
			
		||||
  "$@" || exit $?
 | 
			
		||||
 | 
			
		||||
  # Remove the call to Libtool.
 | 
			
		||||
  if test "$libtool" = yes; then
 | 
			
		||||
    while test "X$1" != 'X--mode=compile'; do
 | 
			
		||||
      shift
 | 
			
		||||
    done
 | 
			
		||||
    shift
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  # Remove '-o $object'.
 | 
			
		||||
  IFS=" "
 | 
			
		||||
  for arg
 | 
			
		||||
  do
 | 
			
		||||
    case $arg in
 | 
			
		||||
    -o)
 | 
			
		||||
      shift
 | 
			
		||||
      ;;
 | 
			
		||||
    $object)
 | 
			
		||||
      shift
 | 
			
		||||
      ;;
 | 
			
		||||
    *)
 | 
			
		||||
      set fnord "$@" "$arg"
 | 
			
		||||
      shift # fnord
 | 
			
		||||
      shift # $arg
 | 
			
		||||
      ;;
 | 
			
		||||
    esac
 | 
			
		||||
  done
 | 
			
		||||
 | 
			
		||||
  test -z "$dashmflag" && dashmflag=-M
 | 
			
		||||
  # Require at least two characters before searching for ':'
 | 
			
		||||
  # in the target name.  This is to cope with DOS-style filenames:
 | 
			
		||||
  # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
 | 
			
		||||
  "$@" $dashmflag |
 | 
			
		||||
    sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
 | 
			
		||||
  rm -f "$depfile"
 | 
			
		||||
  cat < "$tmpdepfile" > "$depfile"
 | 
			
		||||
  # Some versions of the HPUX 10.20 sed can't process this sed invocation
 | 
			
		||||
  # correctly.  Breaking it into two sed invocations is a workaround.
 | 
			
		||||
  tr ' ' "$nl" < "$tmpdepfile" \
 | 
			
		||||
    | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
 | 
			
		||||
    | sed -e 's/$/ :/' >> "$depfile"
 | 
			
		||||
  rm -f "$tmpdepfile"
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
dashXmstdout)
 | 
			
		||||
  # This case only exists to satisfy depend.m4.  It is never actually
 | 
			
		||||
  # run, as this mode is specially recognized in the preamble.
 | 
			
		||||
  exit 1
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
makedepend)
 | 
			
		||||
  "$@" || exit $?
 | 
			
		||||
  # Remove any Libtool call
 | 
			
		||||
  if test "$libtool" = yes; then
 | 
			
		||||
    while test "X$1" != 'X--mode=compile'; do
 | 
			
		||||
      shift
 | 
			
		||||
    done
 | 
			
		||||
    shift
 | 
			
		||||
  fi
 | 
			
		||||
  # X makedepend
 | 
			
		||||
  shift
 | 
			
		||||
  cleared=no eat=no
 | 
			
		||||
  for arg
 | 
			
		||||
  do
 | 
			
		||||
    case $cleared in
 | 
			
		||||
    no)
 | 
			
		||||
      set ""; shift
 | 
			
		||||
      cleared=yes ;;
 | 
			
		||||
    esac
 | 
			
		||||
    if test $eat = yes; then
 | 
			
		||||
      eat=no
 | 
			
		||||
      continue
 | 
			
		||||
    fi
 | 
			
		||||
    case "$arg" in
 | 
			
		||||
    -D*|-I*)
 | 
			
		||||
      set fnord "$@" "$arg"; shift ;;
 | 
			
		||||
    # Strip any option that makedepend may not understand.  Remove
 | 
			
		||||
    # the object too, otherwise makedepend will parse it as a source file.
 | 
			
		||||
    -arch)
 | 
			
		||||
      eat=yes ;;
 | 
			
		||||
    -*|$object)
 | 
			
		||||
      ;;
 | 
			
		||||
    *)
 | 
			
		||||
      set fnord "$@" "$arg"; shift ;;
 | 
			
		||||
    esac
 | 
			
		||||
  done
 | 
			
		||||
  obj_suffix=`echo "$object" | sed 's/^.*\././'`
 | 
			
		||||
  touch "$tmpdepfile"
 | 
			
		||||
  ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
 | 
			
		||||
  rm -f "$depfile"
 | 
			
		||||
  # makedepend may prepend the VPATH from the source file name to the object.
 | 
			
		||||
  # No need to regex-escape $object, excess matching of '.' is harmless.
 | 
			
		||||
  sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
 | 
			
		||||
  # Some versions of the HPUX 10.20 sed can't process the last invocation
 | 
			
		||||
  # correctly.  Breaking it into two sed invocations is a workaround.
 | 
			
		||||
  sed '1,2d' "$tmpdepfile" \
 | 
			
		||||
    | tr ' ' "$nl" \
 | 
			
		||||
    | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
 | 
			
		||||
    | sed -e 's/$/ :/' >> "$depfile"
 | 
			
		||||
  rm -f "$tmpdepfile" "$tmpdepfile".bak
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
cpp)
 | 
			
		||||
  # Important note: in order to support this mode, a compiler *must*
 | 
			
		||||
  # always write the preprocessed file to stdout.
 | 
			
		||||
  "$@" || exit $?
 | 
			
		||||
 | 
			
		||||
  # Remove the call to Libtool.
 | 
			
		||||
  if test "$libtool" = yes; then
 | 
			
		||||
    while test "X$1" != 'X--mode=compile'; do
 | 
			
		||||
      shift
 | 
			
		||||
    done
 | 
			
		||||
    shift
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  # Remove '-o $object'.
 | 
			
		||||
  IFS=" "
 | 
			
		||||
  for arg
 | 
			
		||||
  do
 | 
			
		||||
    case $arg in
 | 
			
		||||
    -o)
 | 
			
		||||
      shift
 | 
			
		||||
      ;;
 | 
			
		||||
    $object)
 | 
			
		||||
      shift
 | 
			
		||||
      ;;
 | 
			
		||||
    *)
 | 
			
		||||
      set fnord "$@" "$arg"
 | 
			
		||||
      shift # fnord
 | 
			
		||||
      shift # $arg
 | 
			
		||||
      ;;
 | 
			
		||||
    esac
 | 
			
		||||
  done
 | 
			
		||||
 | 
			
		||||
  "$@" -E \
 | 
			
		||||
    | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
 | 
			
		||||
             -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
 | 
			
		||||
    | sed '$ s: \\$::' > "$tmpdepfile"
 | 
			
		||||
  rm -f "$depfile"
 | 
			
		||||
  echo "$object : \\" > "$depfile"
 | 
			
		||||
  cat < "$tmpdepfile" >> "$depfile"
 | 
			
		||||
  sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
 | 
			
		||||
  rm -f "$tmpdepfile"
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
msvisualcpp)
 | 
			
		||||
  # Important note: in order to support this mode, a compiler *must*
 | 
			
		||||
  # always write the preprocessed file to stdout.
 | 
			
		||||
  "$@" || exit $?
 | 
			
		||||
 | 
			
		||||
  # Remove the call to Libtool.
 | 
			
		||||
  if test "$libtool" = yes; then
 | 
			
		||||
    while test "X$1" != 'X--mode=compile'; do
 | 
			
		||||
      shift
 | 
			
		||||
    done
 | 
			
		||||
    shift
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  IFS=" "
 | 
			
		||||
  for arg
 | 
			
		||||
  do
 | 
			
		||||
    case "$arg" in
 | 
			
		||||
    -o)
 | 
			
		||||
      shift
 | 
			
		||||
      ;;
 | 
			
		||||
    $object)
 | 
			
		||||
      shift
 | 
			
		||||
      ;;
 | 
			
		||||
    "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
 | 
			
		||||
        set fnord "$@"
 | 
			
		||||
        shift
 | 
			
		||||
        shift
 | 
			
		||||
        ;;
 | 
			
		||||
    *)
 | 
			
		||||
        set fnord "$@" "$arg"
 | 
			
		||||
        shift
 | 
			
		||||
        shift
 | 
			
		||||
        ;;
 | 
			
		||||
    esac
 | 
			
		||||
  done
 | 
			
		||||
  "$@" -E 2>/dev/null |
 | 
			
		||||
  sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
 | 
			
		||||
  rm -f "$depfile"
 | 
			
		||||
  echo "$object : \\" > "$depfile"
 | 
			
		||||
  sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
 | 
			
		||||
  echo "$tab" >> "$depfile"
 | 
			
		||||
  sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
 | 
			
		||||
  rm -f "$tmpdepfile"
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
msvcmsys)
 | 
			
		||||
  # This case exists only to let depend.m4 do its work.  It works by
 | 
			
		||||
  # looking at the text of this script.  This case will never be run,
 | 
			
		||||
  # since it is checked for above.
 | 
			
		||||
  exit 1
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
none)
 | 
			
		||||
  exec "$@"
 | 
			
		||||
  ;;
 | 
			
		||||
 | 
			
		||||
*)
 | 
			
		||||
  echo "Unknown depmode $depmode" 1>&2
 | 
			
		||||
  exit 1
 | 
			
		||||
  ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
exit 0
 | 
			
		||||
 | 
			
		||||
# Local Variables:
 | 
			
		||||
# mode: shell-script
 | 
			
		||||
# sh-indentation: 2
 | 
			
		||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
 | 
			
		||||
# time-stamp-start: "scriptversion="
 | 
			
		||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
 | 
			
		||||
# time-stamp-time-zone: "UTC"
 | 
			
		||||
# time-stamp-end: "; # UTC"
 | 
			
		||||
# End:
 | 
			
		||||
							
								
								
									
										212
									
								
								tools/pcre/dftables.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										212
									
								
								tools/pcre/dftables.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,212 @@
 | 
			
		||||
/*************************************************
 | 
			
		||||
*      Perl-Compatible Regular Expressions       *
 | 
			
		||||
*************************************************/
 | 
			
		||||
 | 
			
		||||
/* PCRE is a library of functions to support regular expressions whose syntax
 | 
			
		||||
and semantics are as close as possible to those of the Perl 5 language.
 | 
			
		||||
 | 
			
		||||
                       Written by Philip Hazel
 | 
			
		||||
           Copyright (c) 1997-2012 University of Cambridge
 | 
			
		||||
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
modification, are permitted provided that the following conditions are met:
 | 
			
		||||
 | 
			
		||||
    * Redistributions of source code must retain the above copyright notice,
 | 
			
		||||
      this list of conditions and the following disclaimer.
 | 
			
		||||
 | 
			
		||||
    * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
      notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
      documentation and/or other materials provided with the distribution.
 | 
			
		||||
 | 
			
		||||
    * Neither the name of the University of Cambridge nor the names of its
 | 
			
		||||
      contributors may be used to endorse or promote products derived from
 | 
			
		||||
      this software without specific prior written permission.
 | 
			
		||||
 | 
			
		||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | 
			
		||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
			
		||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 | 
			
		||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 | 
			
		||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 | 
			
		||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 | 
			
		||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 | 
			
		||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 | 
			
		||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 | 
			
		||||
POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
-----------------------------------------------------------------------------
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* This is a freestanding support program to generate a file containing
 | 
			
		||||
character tables for PCRE. The tables are built according to the current
 | 
			
		||||
locale. Now that pcre_maketables is a function visible to the outside world, we
 | 
			
		||||
make use of its code from here in order to be consistent. */
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_CONFIG_H
 | 
			
		||||
#include "config.h"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include <ctype.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <locale.h>
 | 
			
		||||
 | 
			
		||||
#include "pcre_internal.h"
 | 
			
		||||
 | 
			
		||||
#define DFTABLES          /* pcre_maketables.c notices this */
 | 
			
		||||
#include "pcre_maketables.c"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int main(int argc, char **argv)
 | 
			
		||||
{
 | 
			
		||||
FILE *f;
 | 
			
		||||
int i = 1;
 | 
			
		||||
const unsigned char *tables;
 | 
			
		||||
const unsigned char *base_of_tables;
 | 
			
		||||
 | 
			
		||||
/* By default, the default C locale is used rather than what the building user
 | 
			
		||||
happens to have set. However, if the -L option is given, set the locale from
 | 
			
		||||
the LC_xxx environment variables. */
 | 
			
		||||
 | 
			
		||||
if (argc > 1 && strcmp(argv[1], "-L") == 0)
 | 
			
		||||
  {
 | 
			
		||||
  setlocale(LC_ALL, "");        /* Set from environment variables */
 | 
			
		||||
  i++;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
if (argc < i + 1)
 | 
			
		||||
  {
 | 
			
		||||
  fprintf(stderr, "dftables: one filename argument is required\n");
 | 
			
		||||
  return 1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
tables = pcre_maketables();
 | 
			
		||||
base_of_tables = tables;
 | 
			
		||||
 | 
			
		||||
f = fopen(argv[i], "wb");
 | 
			
		||||
if (f == NULL)
 | 
			
		||||
  {
 | 
			
		||||
  fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
 | 
			
		||||
  return 1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
/* There are several fprintf() calls here, because gcc in pedantic mode
 | 
			
		||||
complains about the very long string otherwise. */
 | 
			
		||||
 | 
			
		||||
fprintf(f,
 | 
			
		||||
  "/*************************************************\n"
 | 
			
		||||
  "*      Perl-Compatible Regular Expressions       *\n"
 | 
			
		||||
  "*************************************************/\n\n"
 | 
			
		||||
  "/* This file was automatically written by the dftables auxiliary\n"
 | 
			
		||||
  "program. It contains character tables that are used when no external\n"
 | 
			
		||||
  "tables are passed to PCRE by the application that calls it. The tables\n"
 | 
			
		||||
  "are used only for characters whose code values are less than 256.\n\n");
 | 
			
		||||
fprintf(f,
 | 
			
		||||
  "The following #includes are present because without them gcc 4.x may remove\n"
 | 
			
		||||
  "the array definition from the final binary if PCRE is built into a static\n"
 | 
			
		||||
  "library and dead code stripping is activated. This leads to link errors.\n"
 | 
			
		||||
  "Pulling in the header ensures that the array gets flagged as \"someone\n"
 | 
			
		||||
  "outside this compilation unit might reference this\" and so it will always\n"
 | 
			
		||||
  "be supplied to the linker. */\n\n");
 | 
			
		||||
 | 
			
		||||
/* Force config.h in z/OS */
 | 
			
		||||
 | 
			
		||||
#if defined NATIVE_ZOS
 | 
			
		||||
fprintf(f,
 | 
			
		||||
  "/* For z/OS, config.h is forced */\n"
 | 
			
		||||
  "#ifndef HAVE_CONFIG_H\n"
 | 
			
		||||
  "#define HAVE_CONFIG_H 1\n"
 | 
			
		||||
  "#endif\n\n");
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
fprintf(f,
 | 
			
		||||
  "#ifdef HAVE_CONFIG_H\n"
 | 
			
		||||
  "#include \"config.h\"\n"
 | 
			
		||||
  "#endif\n\n"
 | 
			
		||||
  "#include \"pcre_internal.h\"\n\n");
 | 
			
		||||
 | 
			
		||||
fprintf(f,
 | 
			
		||||
  "const pcre_uint8 PRIV(default_tables)[] = {\n\n"
 | 
			
		||||
  "/* This table is a lower casing table. */\n\n");
 | 
			
		||||
 | 
			
		||||
fprintf(f, "  ");
 | 
			
		||||
for (i = 0; i < 256; i++)
 | 
			
		||||
  {
 | 
			
		||||
  if ((i & 7) == 0 && i != 0) fprintf(f, "\n  ");
 | 
			
		||||
  fprintf(f, "%3d", *tables++);
 | 
			
		||||
  if (i != 255) fprintf(f, ",");
 | 
			
		||||
  }
 | 
			
		||||
fprintf(f, ",\n\n");
 | 
			
		||||
 | 
			
		||||
fprintf(f, "/* This table is a case flipping table. */\n\n");
 | 
			
		||||
 | 
			
		||||
fprintf(f, "  ");
 | 
			
		||||
for (i = 0; i < 256; i++)
 | 
			
		||||
  {
 | 
			
		||||
  if ((i & 7) == 0 && i != 0) fprintf(f, "\n  ");
 | 
			
		||||
  fprintf(f, "%3d", *tables++);
 | 
			
		||||
  if (i != 255) fprintf(f, ",");
 | 
			
		||||
  }
 | 
			
		||||
fprintf(f, ",\n\n");
 | 
			
		||||
 | 
			
		||||
fprintf(f,
 | 
			
		||||
  "/* This table contains bit maps for various character classes.\n"
 | 
			
		||||
  "Each map is 32 bytes long and the bits run from the least\n"
 | 
			
		||||
  "significant end of each byte. The classes that have their own\n"
 | 
			
		||||
  "maps are: space, xdigit, digit, upper, lower, word, graph\n"
 | 
			
		||||
  "print, punct, and cntrl. Other classes are built from combinations. */\n\n");
 | 
			
		||||
 | 
			
		||||
fprintf(f, "  ");
 | 
			
		||||
for (i = 0; i < cbit_length; i++)
 | 
			
		||||
  {
 | 
			
		||||
  if ((i & 7) == 0 && i != 0)
 | 
			
		||||
    {
 | 
			
		||||
    if ((i & 31) == 0) fprintf(f, "\n");
 | 
			
		||||
    fprintf(f, "\n  ");
 | 
			
		||||
    }
 | 
			
		||||
  fprintf(f, "0x%02x", *tables++);
 | 
			
		||||
  if (i != cbit_length - 1) fprintf(f, ",");
 | 
			
		||||
  }
 | 
			
		||||
fprintf(f, ",\n\n");
 | 
			
		||||
 | 
			
		||||
fprintf(f,
 | 
			
		||||
  "/* This table identifies various classes of character by individual bits:\n"
 | 
			
		||||
  "  0x%02x   white space character\n"
 | 
			
		||||
  "  0x%02x   letter\n"
 | 
			
		||||
  "  0x%02x   decimal digit\n"
 | 
			
		||||
  "  0x%02x   hexadecimal digit\n"
 | 
			
		||||
  "  0x%02x   alphanumeric or '_'\n"
 | 
			
		||||
  "  0x%02x   regular expression metacharacter or binary zero\n*/\n\n",
 | 
			
		||||
  ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word,
 | 
			
		||||
  ctype_meta);
 | 
			
		||||
 | 
			
		||||
fprintf(f, "  ");
 | 
			
		||||
for (i = 0; i < 256; i++)
 | 
			
		||||
  {
 | 
			
		||||
  if ((i & 7) == 0 && i != 0)
 | 
			
		||||
    {
 | 
			
		||||
    fprintf(f, " /* ");
 | 
			
		||||
    if (isprint(i-8)) fprintf(f, " %c -", i-8);
 | 
			
		||||
      else fprintf(f, "%3d-", i-8);
 | 
			
		||||
    if (isprint(i-1)) fprintf(f, " %c ", i-1);
 | 
			
		||||
      else fprintf(f, "%3d", i-1);
 | 
			
		||||
    fprintf(f, " */\n  ");
 | 
			
		||||
    }
 | 
			
		||||
  fprintf(f, "0x%02x", *tables++);
 | 
			
		||||
  if (i != 255) fprintf(f, ",");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
fprintf(f, "};/* ");
 | 
			
		||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
 | 
			
		||||
  else fprintf(f, "%3d-", i-8);
 | 
			
		||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
 | 
			
		||||
  else fprintf(f, "%3d", i-1);
 | 
			
		||||
fprintf(f, " */\n\n/* End of pcre_chartables.c */\n");
 | 
			
		||||
 | 
			
		||||
fclose(f);
 | 
			
		||||
free((void *)base_of_tables);
 | 
			
		||||
return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* End of dftables.c */
 | 
			
		||||
							
								
								
									
										764
									
								
								tools/pcre/doc/html/NON-AUTOTOOLS-BUILD.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										764
									
								
								tools/pcre/doc/html/NON-AUTOTOOLS-BUILD.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,764 @@
 | 
			
		||||
Building PCRE without using autotools
 | 
			
		||||
-------------------------------------
 | 
			
		||||
 | 
			
		||||
This document contains the following sections:
 | 
			
		||||
 | 
			
		||||
  General
 | 
			
		||||
  Generic instructions for the PCRE C library
 | 
			
		||||
  The C++ wrapper functions
 | 
			
		||||
  Building for virtual Pascal
 | 
			
		||||
  Stack size in Windows environments
 | 
			
		||||
  Linking programs in Windows environments
 | 
			
		||||
  Calling conventions in Windows environments
 | 
			
		||||
  Comments about Win32 builds
 | 
			
		||||
  Building PCRE on Windows with CMake
 | 
			
		||||
  Use of relative paths with CMake on Windows
 | 
			
		||||
  Testing with RunTest.bat
 | 
			
		||||
  Building under Windows CE with Visual Studio 200x
 | 
			
		||||
  Building under Windows with BCC5.5
 | 
			
		||||
  Building using Borland C++ Builder 2007 (CB2007) and higher
 | 
			
		||||
  Building PCRE on OpenVMS
 | 
			
		||||
  Building PCRE on Stratus OpenVOS
 | 
			
		||||
  Building PCRE on native z/OS and z/VM
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
GENERAL
 | 
			
		||||
 | 
			
		||||
I (Philip Hazel) have no experience of Windows or VMS sytems and how their
 | 
			
		||||
libraries work. The items in the PCRE distribution and Makefile that relate to
 | 
			
		||||
anything other than Linux systems are untested by me.
 | 
			
		||||
 | 
			
		||||
There are some other comments and files (including some documentation in CHM
 | 
			
		||||
format) in the Contrib directory on the FTP site:
 | 
			
		||||
 | 
			
		||||
  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
 | 
			
		||||
 | 
			
		||||
The basic PCRE library consists entirely of code written in Standard C, and so
 | 
			
		||||
should compile successfully on any system that has a Standard C compiler and
 | 
			
		||||
library. The C++ wrapper functions are a separate issue (see below).
 | 
			
		||||
 | 
			
		||||
The PCRE distribution includes a "configure" file for use by the configure/make
 | 
			
		||||
(autotools) build system, as found in many Unix-like environments. The README
 | 
			
		||||
file contains information about the options for "configure".
 | 
			
		||||
 | 
			
		||||
There is also support for CMake, which some users prefer, especially in Windows
 | 
			
		||||
environments, though it can also be run in Unix-like environments. See the
 | 
			
		||||
section entitled "Building PCRE on Windows with CMake" below.
 | 
			
		||||
 | 
			
		||||
Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
 | 
			
		||||
names config.h.generic and pcre.h.generic. These are provided for those who
 | 
			
		||||
build PCRE without using "configure" or CMake. If you use "configure" or CMake,
 | 
			
		||||
the .generic versions are not used.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
 | 
			
		||||
 | 
			
		||||
The following are generic instructions for building the PCRE C library "by
 | 
			
		||||
hand". If you are going to use CMake, this section does not apply to you; you
 | 
			
		||||
can skip ahead to the CMake section.
 | 
			
		||||
 | 
			
		||||
 (1) Copy or rename the file config.h.generic as config.h, and edit the macro
 | 
			
		||||
     settings that it contains to whatever is appropriate for your environment.
 | 
			
		||||
 | 
			
		||||
     In particular, you can alter the definition of the NEWLINE macro to
 | 
			
		||||
     specify what character(s) you want to be interpreted as line terminators.
 | 
			
		||||
     In an EBCDIC environment, you MUST change NEWLINE, because its default
 | 
			
		||||
     value is 10, an ASCII LF. The usual EBCDIC newline character is 21 (0x15,
 | 
			
		||||
     NL), though in some cases it may be 37 (0x25).
 | 
			
		||||
 | 
			
		||||
     When you compile any of the PCRE modules, you must specify -DHAVE_CONFIG_H
 | 
			
		||||
     to your compiler so that config.h is included in the sources.
 | 
			
		||||
 | 
			
		||||
     An alternative approach is not to edit config.h, but to use -D on the
 | 
			
		||||
     compiler command line to make any changes that you need to the
 | 
			
		||||
     configuration options. In this case -DHAVE_CONFIG_H must not be set.
 | 
			
		||||
 | 
			
		||||
     NOTE: There have been occasions when the way in which certain parameters
 | 
			
		||||
     in config.h are used has changed between releases. (In the configure/make
 | 
			
		||||
     world, this is handled automatically.) When upgrading to a new release,
 | 
			
		||||
     you are strongly advised to review config.h.generic before re-using what
 | 
			
		||||
     you had previously.
 | 
			
		||||
 | 
			
		||||
 (2) Copy or rename the file pcre.h.generic as pcre.h.
 | 
			
		||||
 | 
			
		||||
 (3) EITHER:
 | 
			
		||||
       Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
 | 
			
		||||
 | 
			
		||||
     OR:
 | 
			
		||||
       Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if
 | 
			
		||||
       you have set up config.h), and then run it with the single argument
 | 
			
		||||
       "pcre_chartables.c". This generates a set of standard character tables
 | 
			
		||||
       and writes them to that file. The tables are generated using the default
 | 
			
		||||
       C locale for your system. If you want to use a locale that is specified
 | 
			
		||||
       by LC_xxx environment variables, add the -L option to the dftables
 | 
			
		||||
       command. You must use this method if you are building on a system that
 | 
			
		||||
       uses EBCDIC code.
 | 
			
		||||
 | 
			
		||||
     The tables in pcre_chartables.c are defaults. The caller of PCRE can
 | 
			
		||||
     specify alternative tables at run time.
 | 
			
		||||
 | 
			
		||||
 (4) Ensure that you have the following header files:
 | 
			
		||||
 | 
			
		||||
       pcre_internal.h
 | 
			
		||||
       ucp.h
 | 
			
		||||
 | 
			
		||||
 (5) For an 8-bit library, compile the following source files, setting
 | 
			
		||||
     -DHAVE_CONFIG_H as a compiler option if you have set up config.h with your
 | 
			
		||||
     configuration, or else use other -D settings to change the configuration
 | 
			
		||||
     as required.
 | 
			
		||||
 | 
			
		||||
       pcre_byte_order.c
 | 
			
		||||
       pcre_chartables.c
 | 
			
		||||
       pcre_compile.c
 | 
			
		||||
       pcre_config.c
 | 
			
		||||
       pcre_dfa_exec.c
 | 
			
		||||
       pcre_exec.c
 | 
			
		||||
       pcre_fullinfo.c
 | 
			
		||||
       pcre_get.c
 | 
			
		||||
       pcre_globals.c
 | 
			
		||||
       pcre_jit_compile.c
 | 
			
		||||
       pcre_maketables.c
 | 
			
		||||
       pcre_newline.c
 | 
			
		||||
       pcre_ord2utf8.c
 | 
			
		||||
       pcre_refcount.c
 | 
			
		||||
       pcre_string_utils.c
 | 
			
		||||
       pcre_study.c
 | 
			
		||||
       pcre_tables.c
 | 
			
		||||
       pcre_ucd.c
 | 
			
		||||
       pcre_valid_utf8.c
 | 
			
		||||
       pcre_version.c
 | 
			
		||||
       pcre_xclass.c
 | 
			
		||||
 | 
			
		||||
     Make sure that you include -I. in the compiler command (or equivalent for
 | 
			
		||||
     an unusual compiler) so that all included PCRE header files are first
 | 
			
		||||
     sought in the current directory. Otherwise you run the risk of picking up
 | 
			
		||||
     a previously-installed file from somewhere else.
 | 
			
		||||
 | 
			
		||||
     Note that you must still compile pcre_jit_compile.c, even if you have not
 | 
			
		||||
     defined SUPPORT_JIT in config.h, because when JIT support is not
 | 
			
		||||
     configured, dummy functions are compiled. When JIT support IS configured,
 | 
			
		||||
     pcre_jit_compile.c #includes sources from the sljit subdirectory, where
 | 
			
		||||
     there should be 16 files, all of whose names begin with "sljit".
 | 
			
		||||
 | 
			
		||||
 (6) Now link all the compiled code into an object library in whichever form
 | 
			
		||||
     your system keeps such libraries. This is the basic PCRE C 8-bit library.
 | 
			
		||||
     If your system has static and shared libraries, you may have to do this
 | 
			
		||||
     once for each type.
 | 
			
		||||
 | 
			
		||||
 (7) If you want to build a 16-bit library (as well as, or instead of the 8-bit
 | 
			
		||||
     or 32-bit libraries) repeat steps 5-6 with the following files:
 | 
			
		||||
 | 
			
		||||
       pcre16_byte_order.c
 | 
			
		||||
       pcre16_chartables.c
 | 
			
		||||
       pcre16_compile.c
 | 
			
		||||
       pcre16_config.c
 | 
			
		||||
       pcre16_dfa_exec.c
 | 
			
		||||
       pcre16_exec.c
 | 
			
		||||
       pcre16_fullinfo.c
 | 
			
		||||
       pcre16_get.c
 | 
			
		||||
       pcre16_globals.c
 | 
			
		||||
       pcre16_jit_compile.c
 | 
			
		||||
       pcre16_maketables.c
 | 
			
		||||
       pcre16_newline.c
 | 
			
		||||
       pcre16_ord2utf16.c
 | 
			
		||||
       pcre16_refcount.c
 | 
			
		||||
       pcre16_string_utils.c
 | 
			
		||||
       pcre16_study.c
 | 
			
		||||
       pcre16_tables.c
 | 
			
		||||
       pcre16_ucd.c
 | 
			
		||||
       pcre16_utf16_utils.c
 | 
			
		||||
       pcre16_valid_utf16.c
 | 
			
		||||
       pcre16_version.c
 | 
			
		||||
       pcre16_xclass.c
 | 
			
		||||
 | 
			
		||||
 (8) If you want to build a 32-bit library (as well as, or instead of the 8-bit
 | 
			
		||||
     or 16-bit libraries) repeat steps 5-6 with the following files:
 | 
			
		||||
 | 
			
		||||
       pcre32_byte_order.c
 | 
			
		||||
       pcre32_chartables.c
 | 
			
		||||
       pcre32_compile.c
 | 
			
		||||
       pcre32_config.c
 | 
			
		||||
       pcre32_dfa_exec.c
 | 
			
		||||
       pcre32_exec.c
 | 
			
		||||
       pcre32_fullinfo.c
 | 
			
		||||
       pcre32_get.c
 | 
			
		||||
       pcre32_globals.c
 | 
			
		||||
       pcre32_jit_compile.c
 | 
			
		||||
       pcre32_maketables.c
 | 
			
		||||
       pcre32_newline.c
 | 
			
		||||
       pcre32_ord2utf32.c
 | 
			
		||||
       pcre32_refcount.c
 | 
			
		||||
       pcre32_string_utils.c
 | 
			
		||||
       pcre32_study.c
 | 
			
		||||
       pcre32_tables.c
 | 
			
		||||
       pcre32_ucd.c
 | 
			
		||||
       pcre32_utf32_utils.c
 | 
			
		||||
       pcre32_valid_utf32.c
 | 
			
		||||
       pcre32_version.c
 | 
			
		||||
       pcre32_xclass.c
 | 
			
		||||
 | 
			
		||||
 (9) If you want to build the POSIX wrapper functions (which apply only to the
 | 
			
		||||
     8-bit library), ensure that you have the pcreposix.h file and then compile
 | 
			
		||||
     pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result
 | 
			
		||||
     (on its own) as the pcreposix library.
 | 
			
		||||
 | 
			
		||||
(10) The pcretest program can be linked with any combination of the 8-bit,
 | 
			
		||||
     16-bit and 32-bit libraries (depending on what you selected in config.h).
 | 
			
		||||
     Compile pcretest.c and pcre_printint.c (again, don't forget
 | 
			
		||||
     -DHAVE_CONFIG_H) and link them together with the appropriate library/ies.
 | 
			
		||||
     If you compiled an 8-bit library, pcretest also needs the pcreposix
 | 
			
		||||
     wrapper library unless you compiled it with -DNOPOSIX.
 | 
			
		||||
 | 
			
		||||
(11) Run pcretest on the testinput files in the testdata directory, and check
 | 
			
		||||
     that the output matches the corresponding testoutput files. There are
 | 
			
		||||
     comments about what each test does in the section entitled "Testing PCRE"
 | 
			
		||||
     in the README file. If you compiled more than one of the 8-bit, 16-bit and
 | 
			
		||||
     32-bit libraries, you need to run pcretest with the -16 option to do
 | 
			
		||||
     16-bit tests and with the -32 option to do 32-bit tests.
 | 
			
		||||
 | 
			
		||||
     Some tests are relevant only when certain build-time options are selected.
 | 
			
		||||
     For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run
 | 
			
		||||
     if you have built PCRE without it. See the comments at the start of each
 | 
			
		||||
     testinput file. If you have a suitable Unix-like shell, the RunTest script
 | 
			
		||||
     will run the appropriate tests for you. The command "RunTest list" will
 | 
			
		||||
     output a list of all the tests.
 | 
			
		||||
 | 
			
		||||
     Note that the supplied files are in Unix format, with just LF characters
 | 
			
		||||
     as line terminators. You may need to edit them to change this if your
 | 
			
		||||
     system uses a different convention. If you are using Windows, you probably
 | 
			
		||||
     should use the wintestinput3 file instead of testinput3 (and the
 | 
			
		||||
     corresponding output file). This is a locale test; wintestinput3 sets the
 | 
			
		||||
     locale to "french" rather than "fr_FR", and there some minor output
 | 
			
		||||
     differences.
 | 
			
		||||
 | 
			
		||||
(12) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested
 | 
			
		||||
     by the testdata files. However, you might also like to build and run
 | 
			
		||||
     the freestanding JIT test program, pcre_jit_test.c.
 | 
			
		||||
 | 
			
		||||
(13) If you want to use the pcregrep command, compile and link pcregrep.c; it
 | 
			
		||||
     uses only the basic 8-bit PCRE library (it does not need the pcreposix
 | 
			
		||||
     library).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
THE C++ WRAPPER FUNCTIONS
 | 
			
		||||
 | 
			
		||||
The PCRE distribution also contains some C++ wrapper functions and tests,
 | 
			
		||||
applicable to the 8-bit library, which were contributed by Google Inc. On a
 | 
			
		||||
system that can use "configure" and "make", the functions are automatically
 | 
			
		||||
built into a library called pcrecpp. It should be straightforward to compile
 | 
			
		||||
the .cc files manually on other systems. The files called xxx_unittest.cc are
 | 
			
		||||
test programs for each of the corresponding xxx.cc files.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING FOR VIRTUAL PASCAL
 | 
			
		||||
 | 
			
		||||
A script for building PCRE using Borland's C++ compiler for use with VPASCAL
 | 
			
		||||
was contributed by Alexander Tokarev. Stefan Weber updated the script and added
 | 
			
		||||
additional files. The following files in the distribution are for building PCRE
 | 
			
		||||
for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
STACK SIZE IN WINDOWS ENVIRONMENTS
 | 
			
		||||
 | 
			
		||||
The default processor stack size of 1Mb in some Windows environments is too
 | 
			
		||||
small for matching patterns that need much recursion. In particular, test 2 may
 | 
			
		||||
fail because of this. Normally, running out of stack causes a crash, but there
 | 
			
		||||
have been cases where the test program has just died silently. See your linker
 | 
			
		||||
documentation for how to increase stack size if you experience problems. The
 | 
			
		||||
Linux default of 8Mb is a reasonable choice for the stack, though even that can
 | 
			
		||||
be too small for some pattern/subject combinations.
 | 
			
		||||
 | 
			
		||||
PCRE has a compile configuration option to disable the use of stack for
 | 
			
		||||
recursion so that heap is used instead. However, pattern matching is
 | 
			
		||||
significantly slower when this is done. There is more about stack usage in the
 | 
			
		||||
"pcrestack" documentation.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
 | 
			
		||||
 | 
			
		||||
If you want to statically link a program against a PCRE library in the form of
 | 
			
		||||
a non-dll .a file, you must define PCRE_STATIC before including pcre.h or
 | 
			
		||||
pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will
 | 
			
		||||
be declared __declspec(dllimport), with unwanted results.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
 | 
			
		||||
 | 
			
		||||
It is possible to compile programs to use different calling conventions using
 | 
			
		||||
MSVC. Search the web for "calling conventions" for more information. To make it
 | 
			
		||||
easier to change the calling convention for the exported functions in the
 | 
			
		||||
PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external
 | 
			
		||||
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
 | 
			
		||||
not set, it defaults to empty; the default calling convention is then used
 | 
			
		||||
(which is what is wanted most of the time).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE ON WINDOWS WITH CMAKE")
 | 
			
		||||
 | 
			
		||||
There are two ways of building PCRE using the "configure, make, make install"
 | 
			
		||||
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
 | 
			
		||||
the same thing; they are completely different from each other. There is also
 | 
			
		||||
support for building using CMake, which some users find a more straightforward
 | 
			
		||||
way of building PCRE under Windows.
 | 
			
		||||
 | 
			
		||||
The MinGW home page (http://www.mingw.org/) says this:
 | 
			
		||||
 | 
			
		||||
  MinGW: A collection of freely available and freely distributable Windows
 | 
			
		||||
  specific header files and import libraries combined with GNU toolsets that
 | 
			
		||||
  allow one to produce native Windows programs that do not rely on any
 | 
			
		||||
  3rd-party C runtime DLLs.
 | 
			
		||||
 | 
			
		||||
The Cygwin home page (http://www.cygwin.com/) says this:
 | 
			
		||||
 | 
			
		||||
  Cygwin is a Linux-like environment for Windows. It consists of two parts:
 | 
			
		||||
 | 
			
		||||
  . A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
 | 
			
		||||
    substantial Linux API functionality
 | 
			
		||||
 | 
			
		||||
  . A collection of tools which provide Linux look and feel.
 | 
			
		||||
 | 
			
		||||
  The Cygwin DLL currently works with all recent, commercially released x86 32
 | 
			
		||||
  bit and 64 bit versions of Windows, with the exception of Windows CE.
 | 
			
		||||
 | 
			
		||||
On both MinGW and Cygwin, PCRE should build correctly using:
 | 
			
		||||
 | 
			
		||||
  ./configure && make && make install
 | 
			
		||||
 | 
			
		||||
This should create two libraries called libpcre and libpcreposix, and, if you
 | 
			
		||||
have enabled building the C++ wrapper, a third one called libpcrecpp. These are
 | 
			
		||||
independent libraries: when you link with libpcreposix or libpcrecpp you must
 | 
			
		||||
also link with libpcre, which contains the basic functions. (Some earlier
 | 
			
		||||
releases of PCRE included the basic libpcre functions in libpcreposix. This no
 | 
			
		||||
longer happens.)
 | 
			
		||||
 | 
			
		||||
A user submitted a special-purpose patch that makes it easy to create
 | 
			
		||||
"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll"
 | 
			
		||||
as a special target. If you use this target, no other files are built, and in
 | 
			
		||||
particular, the pcretest and pcregrep programs are not built. An example of how
 | 
			
		||||
this might be used is:
 | 
			
		||||
 | 
			
		||||
  ./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll
 | 
			
		||||
 | 
			
		||||
Using Cygwin's compiler generates libraries and executables that depend on
 | 
			
		||||
cygwin1.dll. If a library that is generated this way is distributed,
 | 
			
		||||
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
 | 
			
		||||
licence, this forces not only PCRE to be under the GPL, but also the entire
 | 
			
		||||
application. A distributor who wants to keep their own code proprietary must
 | 
			
		||||
purchase an appropriate Cygwin licence.
 | 
			
		||||
 | 
			
		||||
MinGW has no such restrictions. The MinGW compiler generates a library or
 | 
			
		||||
executable that can run standalone on Windows without any third party dll or
 | 
			
		||||
licensing issues.
 | 
			
		||||
 | 
			
		||||
But there is more complication:
 | 
			
		||||
 | 
			
		||||
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
 | 
			
		||||
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
 | 
			
		||||
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
 | 
			
		||||
gcc and MinGW's gcc). So, a user can:
 | 
			
		||||
 | 
			
		||||
. Build native binaries by using MinGW or by getting Cygwin and using
 | 
			
		||||
  -mno-cygwin.
 | 
			
		||||
 | 
			
		||||
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
 | 
			
		||||
  compiler flags.
 | 
			
		||||
 | 
			
		||||
The test files that are supplied with PCRE are in UNIX format, with LF
 | 
			
		||||
characters as line terminators. Unless your PCRE library uses a default newline
 | 
			
		||||
option that includes LF as a valid newline, it may be necessary to change the
 | 
			
		||||
line terminators in the test files to get some of the tests to work.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING PCRE ON WINDOWS WITH CMAKE
 | 
			
		||||
 | 
			
		||||
CMake is an alternative configuration facility that can be used instead of
 | 
			
		||||
"configure". CMake creates project files (make files, solution files, etc.)
 | 
			
		||||
tailored to numerous development environments, including Visual Studio,
 | 
			
		||||
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
 | 
			
		||||
spaces in the names for your CMake installation and your PCRE source and build
 | 
			
		||||
directories.
 | 
			
		||||
 | 
			
		||||
The following instructions were contributed by a PCRE user. If they are not
 | 
			
		||||
followed exactly, errors may occur. In the event that errors do occur, it is
 | 
			
		||||
recommended that you delete the CMake cache before attempting to repeat the
 | 
			
		||||
CMake build process. In the CMake GUI, the cache can be deleted by selecting
 | 
			
		||||
"File > Delete Cache".
 | 
			
		||||
 | 
			
		||||
1.  Install the latest CMake version available from http://www.cmake.org/, and
 | 
			
		||||
    ensure that cmake\bin is on your path.
 | 
			
		||||
 | 
			
		||||
2.  Unzip (retaining folder structure) the PCRE source tree into a source
 | 
			
		||||
    directory such as C:\pcre. You should ensure your local date and time
 | 
			
		||||
    is not earlier than the file dates in your source dir if the release is
 | 
			
		||||
    very new.
 | 
			
		||||
 | 
			
		||||
3.  Create a new, empty build directory, preferably a subdirectory of the
 | 
			
		||||
    source dir. For example, C:\pcre\pcre-xx\build.
 | 
			
		||||
 | 
			
		||||
4.  Run cmake-gui from the Shell envirornment of your build tool, for example,
 | 
			
		||||
    Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
 | 
			
		||||
    to start Cmake from the Windows Start menu, as this can lead to errors.
 | 
			
		||||
 | 
			
		||||
5.  Enter C:\pcre\pcre-xx and C:\pcre\pcre-xx\build for the source and build
 | 
			
		||||
    directories, respectively.
 | 
			
		||||
 | 
			
		||||
6.  Hit the "Configure" button.
 | 
			
		||||
 | 
			
		||||
7.  Select the particular IDE / build tool that you are using (Visual
 | 
			
		||||
    Studio, MSYS makefiles, MinGW makefiles, etc.)
 | 
			
		||||
 | 
			
		||||
8.  The GUI will then list several configuration options. This is where
 | 
			
		||||
    you can enable UTF-8 support or other PCRE optional features.
 | 
			
		||||
 | 
			
		||||
9.  Hit "Configure" again. The adjacent "Generate" button should now be
 | 
			
		||||
    active.
 | 
			
		||||
 | 
			
		||||
10. Hit "Generate".
 | 
			
		||||
 | 
			
		||||
11. The build directory should now contain a usable build system, be it a
 | 
			
		||||
    solution file for Visual Studio, makefiles for MinGW, etc. Exit from
 | 
			
		||||
    cmake-gui and use the generated build system with your compiler or IDE.
 | 
			
		||||
    E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE
 | 
			
		||||
    solution, select the desired configuration (Debug, or Release, etc.) and
 | 
			
		||||
    build the ALL_BUILD project.
 | 
			
		||||
 | 
			
		||||
12. If during configuration with cmake-gui you've elected to build the test
 | 
			
		||||
    programs, you can execute them by building the test project. E.g., for
 | 
			
		||||
    MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
 | 
			
		||||
    most recent build configuration is targeted by the tests. A summary of
 | 
			
		||||
    test results is presented. Complete test output is subsequently
 | 
			
		||||
    available for review in Testing\Temporary under your build dir.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
 | 
			
		||||
 | 
			
		||||
A PCRE user comments as follows: I thought that others may want to know the
 | 
			
		||||
current state of CMAKE_USE_RELATIVE_PATHS support on Windows. Here it is:
 | 
			
		||||
 | 
			
		||||
-- AdditionalIncludeDirectories is only partially modified (only the
 | 
			
		||||
   first path - see below)
 | 
			
		||||
-- Only some of the contained file paths are modified - shown below for
 | 
			
		||||
   pcre.vcproj
 | 
			
		||||
-- It properly modifies
 | 
			
		||||
 | 
			
		||||
I am sure CMake people can fix that if they want to. Until then one will
 | 
			
		||||
need to replace existing absolute paths in project files with relative
 | 
			
		||||
paths manually (e.g. from VS) - relative to project file location. I did
 | 
			
		||||
just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big
 | 
			
		||||
deal.
 | 
			
		||||
 | 
			
		||||
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
 | 
			
		||||
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
 | 
			
		||||
 | 
			
		||||
RelativePath="pcre.h"
 | 
			
		||||
RelativePath="pcre_chartables.c"
 | 
			
		||||
RelativePath="pcre_chartables.c.rule"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
TESTING WITH RUNTEST.BAT
 | 
			
		||||
 | 
			
		||||
If configured with CMake, building the test project ("make test" or building
 | 
			
		||||
ALL_TESTS in Visual Studio) creates (and runs) pcre_test.bat (and depending
 | 
			
		||||
on your configuration options, possibly other test programs) in the build
 | 
			
		||||
directory. Pcre_test.bat runs RunTest.Bat with correct source and exe paths.
 | 
			
		||||
 | 
			
		||||
For manual testing with RunTest.bat, provided the build dir is a subdirectory
 | 
			
		||||
of the source directory: Open command shell window. Chdir to the location
 | 
			
		||||
of your pcretest.exe and pcregrep.exe programs. Call RunTest.bat with
 | 
			
		||||
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
 | 
			
		||||
 | 
			
		||||
To run only a particular test with RunTest.Bat provide a test number argument.
 | 
			
		||||
 | 
			
		||||
Otherwise:
 | 
			
		||||
 | 
			
		||||
1. Copy RunTest.bat into the directory where pcretest.exe and pcregrep.exe
 | 
			
		||||
   have been created.
 | 
			
		||||
 | 
			
		||||
2. Edit RunTest.bat to indentify the full or relative location of
 | 
			
		||||
   the pcre source (wherein which the testdata folder resides), e.g.:
 | 
			
		||||
 | 
			
		||||
   set srcdir=C:\pcre\pcre-8.20
 | 
			
		||||
 | 
			
		||||
3. In a Windows command environment, chdir to the location of your bat and
 | 
			
		||||
   exe programs.
 | 
			
		||||
 | 
			
		||||
4. Run RunTest.bat. Test outputs will automatically be compared to expected
 | 
			
		||||
   results, and discrepancies will be identified in the console output.
 | 
			
		||||
 | 
			
		||||
To independently test the just-in-time compiler, run pcre_jit_test.exe.
 | 
			
		||||
To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
 | 
			
		||||
pcre_scanner_unittest.exe.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
 | 
			
		||||
 | 
			
		||||
Vincent Richomme sent a zip archive of files to help with this process. They
 | 
			
		||||
can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
 | 
			
		||||
site.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING UNDER WINDOWS WITH BCC5.5
 | 
			
		||||
 | 
			
		||||
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
 | 
			
		||||
 | 
			
		||||
Some of the core BCC libraries have a version of PCRE from 1998 built in, which
 | 
			
		||||
can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a version
 | 
			
		||||
mismatch. I'm including an easy workaround below, if you'd like to include it
 | 
			
		||||
in the non-unix instructions:
 | 
			
		||||
 | 
			
		||||
When linking a project with BCC5.5, pcre.lib must be included before any of the
 | 
			
		||||
libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command line.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING USING BORLAND C++ BUILDER 2007 (CB2007) AND HIGHER
 | 
			
		||||
 | 
			
		||||
A PCRE user sent these comments about this environment (see also the comment
 | 
			
		||||
from another user that follows them):
 | 
			
		||||
 | 
			
		||||
The XE versions of C++ Builder come with a RegularExpressionsCore class which
 | 
			
		||||
contain a version of TPerlRegEx. However, direct use of the C PCRE library may
 | 
			
		||||
be desirable.
 | 
			
		||||
 | 
			
		||||
The default makevp.bat, however, supplied with PCRE builds a version of PCRE
 | 
			
		||||
that is not usable with any version of C++ Builder because the compiler ships
 | 
			
		||||
with an embedded version of PCRE, version 2.01 from 1998! [See also the note
 | 
			
		||||
about BCC5.5 above.] If you want to use PCRE you'll need to rename the
 | 
			
		||||
functions (pcre_compile to pcre_compile_bcc, etc) or do as I have done and just
 | 
			
		||||
use the 16 bit versions. I'm using std::wstring everywhere anyway. Since the
 | 
			
		||||
embedded version of PCRE does not have the 16 bit function names, there is no
 | 
			
		||||
conflict.
 | 
			
		||||
 | 
			
		||||
Building PCRE using a C++ Builder static library project file (recommended):
 | 
			
		||||
 | 
			
		||||
1. Rename or remove pcre.h, pcreposi.h, and pcreposix.h from your C++ Builder
 | 
			
		||||
original include path.
 | 
			
		||||
 | 
			
		||||
2. Download PCRE from pcre.org and extract to a directory.
 | 
			
		||||
 | 
			
		||||
3. Rename pcre_chartables.c.dist to pcre_chartables.c, pcre.h.generic to
 | 
			
		||||
pcre.h, and config.h.generic to config.h.
 | 
			
		||||
 | 
			
		||||
4. Edit pcre.h and pcre_config.c so that they include config.h.
 | 
			
		||||
 | 
			
		||||
5. Edit config.h like so:
 | 
			
		||||
 | 
			
		||||
Comment out the following lines:
 | 
			
		||||
#define PACKAGE "pcre"
 | 
			
		||||
#define PACKAGE_BUGREPORT ""
 | 
			
		||||
#define PACKAGE_NAME "PCRE"
 | 
			
		||||
#define PACKAGE_STRING "PCRE 8.32"
 | 
			
		||||
#define PACKAGE_TARNAME "pcre"
 | 
			
		||||
#define PACKAGE_URL ""
 | 
			
		||||
#define PACKAGE_VERSION "8.32"
 | 
			
		||||
 | 
			
		||||
Add the following lines:
 | 
			
		||||
#ifndef SUPPORT_UTF
 | 
			
		||||
#define SUPPORT_UTF 100 // any value is fine
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef SUPPORT_UCP
 | 
			
		||||
#define SUPPORT_UCP 101 // any value is fine
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef SUPPORT_UCP
 | 
			
		||||
#define SUPPORT_PCRE16 102 // any value is fine
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef SUPPORT_UTF8
 | 
			
		||||
#define SUPPORT_UTF8 103 // any value is fine
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
6. Build a C++ Builder project using the IDE. Go to File / New / Other and
 | 
			
		||||
choose Static Library. You can name it pcre.cbproj or whatever. Now set your
 | 
			
		||||
paths by going to Project / Options. Set the Include path. Do this from the
 | 
			
		||||
"Base" option to apply to both Release and Debug builds. Now add the following
 | 
			
		||||
files to the project:
 | 
			
		||||
 | 
			
		||||
pcre.h
 | 
			
		||||
pcre16_byte_order.c
 | 
			
		||||
pcre16_chartables.c
 | 
			
		||||
pcre16_compile.c
 | 
			
		||||
pcre16_config.c
 | 
			
		||||
pcre16_dfa_exec.c
 | 
			
		||||
pcre16_exec.c
 | 
			
		||||
pcre16_fullinfo.c
 | 
			
		||||
pcre16_get.c
 | 
			
		||||
pcre16_globals.c
 | 
			
		||||
pcre16_maketables.c
 | 
			
		||||
pcre16_newline.c
 | 
			
		||||
pcre16_ord2utf16.c
 | 
			
		||||
pcre16_printint.c
 | 
			
		||||
pcre16_refcount.c
 | 
			
		||||
pcre16_string_utils.c
 | 
			
		||||
pcre16_study.c
 | 
			
		||||
pcre16_tables.c
 | 
			
		||||
pcre16_ucd.c
 | 
			
		||||
pcre16_utf16_utils.c
 | 
			
		||||
pcre16_valid_utf16.c
 | 
			
		||||
pcre16_version.c
 | 
			
		||||
pcre16_xclass.c
 | 
			
		||||
 | 
			
		||||
//Optional
 | 
			
		||||
pcre_version.c
 | 
			
		||||
 | 
			
		||||
7. After compiling the .lib file, copy the .lib and header files to a project
 | 
			
		||||
you want to use PCRE with. Enjoy.
 | 
			
		||||
 | 
			
		||||
Optional ... Building PCRE using the makevp.bat file:
 | 
			
		||||
 | 
			
		||||
1. Edit makevp_c.txt and makevp_l.txt and change all the names to the 16 bit
 | 
			
		||||
versions.
 | 
			
		||||
 | 
			
		||||
2. Edit makevp.bat and set the path to C++ Builder. Run makevp.bat.
 | 
			
		||||
 | 
			
		||||
Another PCRE user added this comment:
 | 
			
		||||
 | 
			
		||||
Another approach I successfully used for some years with BCB 5 and 6 was to
 | 
			
		||||
make sure that include and library paths of PCRE are configured before the
 | 
			
		||||
default paths of the IDE in the dialogs where one can manage those paths.
 | 
			
		||||
Afterwards one can open the project files using a text editor and manually add
 | 
			
		||||
the self created library for pcre itself, pcrecpp doesn't ship with the IDE, in
 | 
			
		||||
the library nodes where the IDE manages its own libraries to link against in
 | 
			
		||||
front of the IDE-own libraries. This way one can use the default PCRE function
 | 
			
		||||
names without getting access violations on runtime.
 | 
			
		||||
 | 
			
		||||
  <ALLLIB value="libpcre.lib $(LIBFILES) $(LIBRARIES) import32.lib cp32mt.lib"/>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING PCRE ON OPENVMS
 | 
			
		||||
 | 
			
		||||
Stephen Hoffman sent the following, in December 2012:
 | 
			
		||||
 | 
			
		||||
"Here <http://labs.hoffmanlabs.com/node/1847> is a very short write-up on the
 | 
			
		||||
OpenVMS port and here
 | 
			
		||||
 | 
			
		||||
<http://labs.hoffmanlabs.com/labsnotes/pcre-vms-8_32.zip>
 | 
			
		||||
 | 
			
		||||
is a zip with the OpenVMS files, and with one modified testing-related PCRE
 | 
			
		||||
file." This is a port of PCRE 8.32.
 | 
			
		||||
 | 
			
		||||
Earlier, Dan Mooney sent the following comments about building PCRE on OpenVMS.
 | 
			
		||||
They relate to an older version of PCRE that used fewer source files, so the
 | 
			
		||||
exact commands will need changing. See the current list of source files above.
 | 
			
		||||
 | 
			
		||||
"It was quite easy to compile and link the library. I don't have a formal
 | 
			
		||||
make file but the attached file [reproduced below] contains the OpenVMS DCL
 | 
			
		||||
commands I used to build the library. I had to add #define
 | 
			
		||||
POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere.
 | 
			
		||||
 | 
			
		||||
The library was built on:
 | 
			
		||||
O/S: HP OpenVMS v7.3-1
 | 
			
		||||
Compiler: Compaq C v6.5-001-48BCD
 | 
			
		||||
Linker: vA13-01
 | 
			
		||||
 | 
			
		||||
The test results did not match 100% due to the issues you mention in your
 | 
			
		||||
documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I
 | 
			
		||||
modified some of the character tables temporarily and was able to get the
 | 
			
		||||
results to match. Tests using the fr locale did not match since I don't have
 | 
			
		||||
that locale loaded. The study size was always reported to be 3 less than the
 | 
			
		||||
value in the standard test output files."
 | 
			
		||||
 | 
			
		||||
=========================
 | 
			
		||||
$! This DCL procedure builds PCRE on OpenVMS
 | 
			
		||||
$!
 | 
			
		||||
$! I followed the instructions in the non-unix-use file in the distribution.
 | 
			
		||||
$!
 | 
			
		||||
$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES
 | 
			
		||||
$ COMPILE DFTABLES.C
 | 
			
		||||
$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ
 | 
			
		||||
$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C
 | 
			
		||||
$ COMPILE MAKETABLES.C
 | 
			
		||||
$ COMPILE GET.C
 | 
			
		||||
$ COMPILE STUDY.C
 | 
			
		||||
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
 | 
			
		||||
$! did not seem to be defined anywhere.
 | 
			
		||||
$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support.
 | 
			
		||||
$ COMPILE PCRE.C
 | 
			
		||||
$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ
 | 
			
		||||
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
 | 
			
		||||
$! did not seem to be defined anywhere.
 | 
			
		||||
$ COMPILE PCREPOSIX.C
 | 
			
		||||
$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ
 | 
			
		||||
$ COMPILE PCRETEST.C
 | 
			
		||||
$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB
 | 
			
		||||
$! C programs that want access to command line arguments must be
 | 
			
		||||
$! defined as a symbol
 | 
			
		||||
$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE"
 | 
			
		||||
$! Arguments must be enclosed in quotes.
 | 
			
		||||
$ PCRETEST "-C"
 | 
			
		||||
$! Test results:
 | 
			
		||||
$!
 | 
			
		||||
$!   The test results did not match 100%. The functions isprint(), iscntrl(),
 | 
			
		||||
$!   isgraph() and ispunct() on OpenVMS must not produce the same results
 | 
			
		||||
$!   as the system that built the test output files provided with the
 | 
			
		||||
$!   distribution.
 | 
			
		||||
$!
 | 
			
		||||
$!   The study size did not match and was always 3 less on OpenVMS.
 | 
			
		||||
$!
 | 
			
		||||
$!   Locale could not be set to fr
 | 
			
		||||
$!
 | 
			
		||||
=========================
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING PCRE ON STRATUS OPENVOS
 | 
			
		||||
 | 
			
		||||
These notes on the port of PCRE to VOS (lightly edited) were supplied by
 | 
			
		||||
Ashutosh Warikoo, whose email address has the local part awarikoo and the
 | 
			
		||||
domain nse.co.in. The port was for version 7.9 in August 2009.
 | 
			
		||||
 | 
			
		||||
1.   Building PCRE
 | 
			
		||||
 | 
			
		||||
I built pcre on OpenVOS Release 17.0.1at using GNU Tools 3.4a without any
 | 
			
		||||
problems. I used the following packages to build PCRE:
 | 
			
		||||
 | 
			
		||||
  ftp://ftp.stratus.com/pub/vos/posix/ga/posix.save.evf.gz
 | 
			
		||||
 | 
			
		||||
Please read and follow the instructions that come with these packages. To start
 | 
			
		||||
the build of pcre, from the root of the package type:
 | 
			
		||||
 | 
			
		||||
  ./build.sh
 | 
			
		||||
 | 
			
		||||
2. Installing PCRE
 | 
			
		||||
 | 
			
		||||
Once you have successfully built PCRE, login to the SysAdmin group, switch to
 | 
			
		||||
the root user, and type
 | 
			
		||||
 | 
			
		||||
  [ !create_dir (master_disk)>usr   --if needed ]
 | 
			
		||||
  [ !create_dir (master_disk)>usr>local   --if needed ]
 | 
			
		||||
    !gmake install
 | 
			
		||||
 | 
			
		||||
This installs PCRE and its man pages into /usr/local. You can add
 | 
			
		||||
(master_disk)>usr>local>bin to your command search paths, or if you are in
 | 
			
		||||
BASH, add /usr/local/bin to the PATH environment variable.
 | 
			
		||||
 | 
			
		||||
4. Restrictions
 | 
			
		||||
 | 
			
		||||
This port requires readline library optionally. However during the build I
 | 
			
		||||
faced some yet unexplored errors while linking with readline. As it was an
 | 
			
		||||
optional component I chose to disable it.
 | 
			
		||||
 | 
			
		||||
5. Known Problems
 | 
			
		||||
 | 
			
		||||
I ran the test suite, but you will have to be your own judge of whether this
 | 
			
		||||
command, and this port, suits your purposes. If you find any problems that
 | 
			
		||||
appear to be related to the port itself, please let me know. Please see the
 | 
			
		||||
build.log file in the root of the package also.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
BUILDING PCRE ON NATIVE Z/OS AND Z/VM
 | 
			
		||||
 | 
			
		||||
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
 | 
			
		||||
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
 | 
			
		||||
applications can be supported through UNIX System Services, and in such an
 | 
			
		||||
environment PCRE can be built in the same way as in other systems. However, in
 | 
			
		||||
native z/OS (without UNIX System Services) and in z/VM, special ports are
 | 
			
		||||
required. For details, please see this web site:
 | 
			
		||||
 | 
			
		||||
  http://www.zaconsultants.net
 | 
			
		||||
 | 
			
		||||
There is also a mirror here:
 | 
			
		||||
 | 
			
		||||
  http://www.vsoft-software.com/downloads.html
 | 
			
		||||
 | 
			
		||||
==========================
 | 
			
		||||
Last Updated: 14 May 2013
 | 
			
		||||
							
								
								
									
										991
									
								
								tools/pcre/doc/html/README.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										991
									
								
								tools/pcre/doc/html/README.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,991 @@
 | 
			
		||||
README file for PCRE (Perl-compatible regular expression library)
 | 
			
		||||
-----------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
The latest release of PCRE is always available in three alternative formats
 | 
			
		||||
from:
 | 
			
		||||
 | 
			
		||||
  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
 | 
			
		||||
  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.bz2
 | 
			
		||||
  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
 | 
			
		||||
 | 
			
		||||
There is a mailing list for discussion about the development of PCRE at
 | 
			
		||||
pcre-dev@exim.org. You can access the archives and subscribe or manage your
 | 
			
		||||
subscription here:
 | 
			
		||||
 | 
			
		||||
   https://lists.exim.org/mailman/listinfo/pcre-dev
 | 
			
		||||
 | 
			
		||||
Please read the NEWS file if you are upgrading from a previous release.
 | 
			
		||||
The contents of this README file are:
 | 
			
		||||
 | 
			
		||||
  The PCRE APIs
 | 
			
		||||
  Documentation for PCRE
 | 
			
		||||
  Contributions by users of PCRE
 | 
			
		||||
  Building PCRE on non-Unix-like systems
 | 
			
		||||
  Building PCRE without using autotools
 | 
			
		||||
  Building PCRE using autotools
 | 
			
		||||
  Retrieving configuration information
 | 
			
		||||
  Shared libraries
 | 
			
		||||
  Cross-compiling using autotools
 | 
			
		||||
  Using HP's ANSI C++ compiler (aCC)
 | 
			
		||||
  Compiling in Tru64 using native compilers
 | 
			
		||||
  Using Sun's compilers for Solaris
 | 
			
		||||
  Using PCRE from MySQL
 | 
			
		||||
  Making new tarballs
 | 
			
		||||
  Testing PCRE
 | 
			
		||||
  Character tables
 | 
			
		||||
  File manifest
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
The PCRE APIs
 | 
			
		||||
-------------
 | 
			
		||||
 | 
			
		||||
PCRE is written in C, and it has its own API. There are three sets of
 | 
			
		||||
functions, one for the 8-bit library, which processes strings of bytes, one for
 | 
			
		||||
the 16-bit library, which processes strings of 16-bit values, and one for the
 | 
			
		||||
32-bit library, which processes strings of 32-bit values. The distribution also
 | 
			
		||||
includes a set of C++ wrapper functions (see the pcrecpp man page for details),
 | 
			
		||||
courtesy of Google Inc., which can be used to call the 8-bit PCRE library from
 | 
			
		||||
C++.
 | 
			
		||||
 | 
			
		||||
In addition, there is a set of C wrapper functions (again, just for the 8-bit
 | 
			
		||||
library) that are based on the POSIX regular expression API (see the pcreposix
 | 
			
		||||
man page). These end up in the library called libpcreposix. Note that this just
 | 
			
		||||
provides a POSIX calling interface to PCRE; the regular expressions themselves
 | 
			
		||||
still follow Perl syntax and semantics. The POSIX API is restricted, and does
 | 
			
		||||
not give full access to all of PCRE's facilities.
 | 
			
		||||
 | 
			
		||||
The header file for the POSIX-style functions is called pcreposix.h. The
 | 
			
		||||
official POSIX name is regex.h, but I did not want to risk possible problems
 | 
			
		||||
with existing files of that name by distributing it that way. To use PCRE with
 | 
			
		||||
an existing program that uses the POSIX API, pcreposix.h will have to be
 | 
			
		||||
renamed or pointed at by a link.
 | 
			
		||||
 | 
			
		||||
If you are using the POSIX interface to PCRE and there is already a POSIX regex
 | 
			
		||||
library installed on your system, as well as worrying about the regex.h header
 | 
			
		||||
file (as mentioned above), you must also take care when linking programs to
 | 
			
		||||
ensure that they link with PCRE's libpcreposix library. Otherwise they may pick
 | 
			
		||||
up the POSIX functions of the same name from the other library.
 | 
			
		||||
 | 
			
		||||
One way of avoiding this confusion is to compile PCRE with the addition of
 | 
			
		||||
-Dregcomp=PCREregcomp (and similarly for the other POSIX functions) to the
 | 
			
		||||
compiler flags (CFLAGS if you are using "configure" -- see below). This has the
 | 
			
		||||
effect of renaming the functions so that the names no longer clash. Of course,
 | 
			
		||||
you have to do the same thing for your applications, or write them using the
 | 
			
		||||
new names.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Documentation for PCRE
 | 
			
		||||
----------------------
 | 
			
		||||
 | 
			
		||||
If you install PCRE in the normal way on a Unix-like system, you will end up
 | 
			
		||||
with a set of man pages whose names all start with "pcre". The one that is just
 | 
			
		||||
called "pcre" lists all the others. In addition to these man pages, the PCRE
 | 
			
		||||
documentation is supplied in two other forms:
 | 
			
		||||
 | 
			
		||||
  1. There are files called doc/pcre.txt, doc/pcregrep.txt, and
 | 
			
		||||
     doc/pcretest.txt in the source distribution. The first of these is a
 | 
			
		||||
     concatenation of the text forms of all the section 3 man pages except
 | 
			
		||||
     the listing of pcredemo.c and those that summarize individual functions.
 | 
			
		||||
     The other two are the text forms of the section 1 man pages for the
 | 
			
		||||
     pcregrep and pcretest commands. These text forms are provided for ease of
 | 
			
		||||
     scanning with text editors or similar tools. They are installed in
 | 
			
		||||
     <prefix>/share/doc/pcre, where <prefix> is the installation prefix
 | 
			
		||||
     (defaulting to /usr/local).
 | 
			
		||||
 | 
			
		||||
  2. A set of files containing all the documentation in HTML form, hyperlinked
 | 
			
		||||
     in various ways, and rooted in a file called index.html, is distributed in
 | 
			
		||||
     doc/html and installed in <prefix>/share/doc/pcre/html.
 | 
			
		||||
 | 
			
		||||
Users of PCRE have contributed files containing the documentation for various
 | 
			
		||||
releases in CHM format. These can be found in the Contrib directory of the FTP
 | 
			
		||||
site (see next section).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Contributions by users of PCRE
 | 
			
		||||
------------------------------
 | 
			
		||||
 | 
			
		||||
You can find contributions from PCRE users in the directory
 | 
			
		||||
 | 
			
		||||
  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
 | 
			
		||||
 | 
			
		||||
There is a README file giving brief descriptions of what they are. Some are
 | 
			
		||||
complete in themselves; others are pointers to URLs containing relevant files.
 | 
			
		||||
Some of this material is likely to be well out-of-date. Several of the earlier
 | 
			
		||||
contributions provided support for compiling PCRE on various flavours of
 | 
			
		||||
Windows (I myself do not use Windows). Nowadays there is more Windows support
 | 
			
		||||
in the standard distribution, so these contibutions have been archived.
 | 
			
		||||
 | 
			
		||||
A PCRE user maintains downloadable Windows binaries of the pcregrep and
 | 
			
		||||
pcretest programs here:
 | 
			
		||||
 | 
			
		||||
  http://www.rexegg.com/pcregrep-pcretest.html
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Building PCRE on non-Unix-like systems
 | 
			
		||||
--------------------------------------
 | 
			
		||||
 | 
			
		||||
For a non-Unix-like system, please read the comments in the file
 | 
			
		||||
NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
 | 
			
		||||
"make" you may be able to build PCRE using autotools in the same way as for
 | 
			
		||||
many Unix-like systems.
 | 
			
		||||
 | 
			
		||||
PCRE can also be configured using the GUI facility provided by CMake's
 | 
			
		||||
cmake-gui command. This creates Makefiles, solution files, etc. The file
 | 
			
		||||
NON-AUTOTOOLS-BUILD has information about CMake.
 | 
			
		||||
 | 
			
		||||
PCRE has been compiled on many different operating systems. It should be
 | 
			
		||||
straightforward to build PCRE on any system that has a Standard C compiler and
 | 
			
		||||
library, because it uses only Standard C functions.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Building PCRE without using autotools
 | 
			
		||||
-------------------------------------
 | 
			
		||||
 | 
			
		||||
The use of autotools (in particular, libtool) is problematic in some
 | 
			
		||||
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
 | 
			
		||||
file for ways of building PCRE without using autotools.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Building PCRE using autotools
 | 
			
		||||
-----------------------------
 | 
			
		||||
 | 
			
		||||
If you are using HP's ANSI C++ compiler (aCC), please see the special note
 | 
			
		||||
in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.
 | 
			
		||||
 | 
			
		||||
The following instructions assume the use of the widely used "configure; make;
 | 
			
		||||
make install" (autotools) process.
 | 
			
		||||
 | 
			
		||||
To build PCRE on system that supports autotools, first run the "configure"
 | 
			
		||||
command from the PCRE distribution directory, with your current directory set
 | 
			
		||||
to the directory where you want the files to be created. This command is a
 | 
			
		||||
standard GNU "autoconf" configuration script, for which generic instructions
 | 
			
		||||
are supplied in the file INSTALL.
 | 
			
		||||
 | 
			
		||||
Most commonly, people build PCRE within its own distribution directory, and in
 | 
			
		||||
this case, on many systems, just running "./configure" is sufficient. However,
 | 
			
		||||
the usual methods of changing standard defaults are available. For example:
 | 
			
		||||
 | 
			
		||||
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
 | 
			
		||||
 | 
			
		||||
This command specifies that the C compiler should be run with the flags '-O2
 | 
			
		||||
-Wall' instead of the default, and that "make install" should install PCRE
 | 
			
		||||
under /opt/local instead of the default /usr/local.
 | 
			
		||||
 | 
			
		||||
If you want to build in a different directory, just run "configure" with that
 | 
			
		||||
directory as current. For example, suppose you have unpacked the PCRE source
 | 
			
		||||
into /source/pcre/pcre-xxx, but you want to build it in /build/pcre/pcre-xxx:
 | 
			
		||||
 | 
			
		||||
cd /build/pcre/pcre-xxx
 | 
			
		||||
/source/pcre/pcre-xxx/configure
 | 
			
		||||
 | 
			
		||||
PCRE is written in C and is normally compiled as a C library. However, it is
 | 
			
		||||
possible to build it as a C++ library, though the provided building apparatus
 | 
			
		||||
does not have any features to support this.
 | 
			
		||||
 | 
			
		||||
There are some optional features that can be included or omitted from the PCRE
 | 
			
		||||
library. They are also documented in the pcrebuild man page.
 | 
			
		||||
 | 
			
		||||
. By default, both shared and static libraries are built. You can change this
 | 
			
		||||
  by adding one of these options to the "configure" command:
 | 
			
		||||
 | 
			
		||||
  --disable-shared
 | 
			
		||||
  --disable-static
 | 
			
		||||
 | 
			
		||||
  (See also "Shared libraries on Unix-like systems" below.)
 | 
			
		||||
 | 
			
		||||
. By default, only the 8-bit library is built. If you add --enable-pcre16 to
 | 
			
		||||
  the "configure" command, the 16-bit library is also built. If you add
 | 
			
		||||
  --enable-pcre32 to the "configure" command, the 32-bit library is also built.
 | 
			
		||||
  If you want only the 16-bit or 32-bit library, use --disable-pcre8 to disable
 | 
			
		||||
  building the 8-bit library.
 | 
			
		||||
 | 
			
		||||
. If you are building the 8-bit library and want to suppress the building of
 | 
			
		||||
  the C++ wrapper library, you can add --disable-cpp to the "configure"
 | 
			
		||||
  command. Otherwise, when "configure" is run without --disable-pcre8, it will
 | 
			
		||||
  try to find a C++ compiler and C++ header files, and if it succeeds, it will
 | 
			
		||||
  try to build the C++ wrapper.
 | 
			
		||||
 | 
			
		||||
. If you want to include support for just-in-time compiling, which can give
 | 
			
		||||
  large performance improvements on certain platforms, add --enable-jit to the
 | 
			
		||||
  "configure" command. This support is available only for certain hardware
 | 
			
		||||
  architectures. If you try to enable it on an unsupported architecture, there
 | 
			
		||||
  will be a compile time error.
 | 
			
		||||
 | 
			
		||||
. When JIT support is enabled, pcregrep automatically makes use of it, unless
 | 
			
		||||
  you add --disable-pcregrep-jit to the "configure" command.
 | 
			
		||||
 | 
			
		||||
. If you want to make use of the support for UTF-8 Unicode character strings in
 | 
			
		||||
  the 8-bit library, or UTF-16 Unicode character strings in the 16-bit library,
 | 
			
		||||
  or UTF-32 Unicode character strings in the 32-bit library, you must add
 | 
			
		||||
  --enable-utf to the "configure" command. Without it, the code for handling
 | 
			
		||||
  UTF-8, UTF-16 and UTF-8 is not included in the relevant library. Even
 | 
			
		||||
  when --enable-utf is included, the use of a UTF encoding still has to be
 | 
			
		||||
  enabled by an option at run time. When PCRE is compiled with this option, its
 | 
			
		||||
  input can only either be ASCII or UTF-8/16/32, even when running on EBCDIC
 | 
			
		||||
  platforms. It is not possible to use both --enable-utf and --enable-ebcdic at
 | 
			
		||||
  the same time.
 | 
			
		||||
 | 
			
		||||
. There are no separate options for enabling UTF-8, UTF-16 and UTF-32
 | 
			
		||||
  independently because that would allow ridiculous settings such as requesting
 | 
			
		||||
  UTF-16 support while building only the 8-bit library. However, the option
 | 
			
		||||
  --enable-utf8 is retained for backwards compatibility with earlier releases
 | 
			
		||||
  that did not support 16-bit or 32-bit character strings. It is synonymous with
 | 
			
		||||
  --enable-utf. It is not possible to configure one library with UTF support
 | 
			
		||||
  and the other without in the same configuration.
 | 
			
		||||
 | 
			
		||||
. If, in addition to support for UTF-8/16/32 character strings, you want to
 | 
			
		||||
  include support for the \P, \p, and \X sequences that recognize Unicode
 | 
			
		||||
  character properties, you must add --enable-unicode-properties to the
 | 
			
		||||
  "configure" command. This adds about 30K to the size of the library (in the
 | 
			
		||||
  form of a property table); only the basic two-letter properties such as Lu
 | 
			
		||||
  are supported.
 | 
			
		||||
 | 
			
		||||
. You can build PCRE to recognize either CR or LF or the sequence CRLF or any
 | 
			
		||||
  of the preceding, or any of the Unicode newline sequences as indicating the
 | 
			
		||||
  end of a line. Whatever you specify at build time is the default; the caller
 | 
			
		||||
  of PCRE can change the selection at run time. The default newline indicator
 | 
			
		||||
  is a single LF character (the Unix standard). You can specify the default
 | 
			
		||||
  newline indicator by adding --enable-newline-is-cr or --enable-newline-is-lf
 | 
			
		||||
  or --enable-newline-is-crlf or --enable-newline-is-anycrlf or
 | 
			
		||||
  --enable-newline-is-any to the "configure" command, respectively.
 | 
			
		||||
 | 
			
		||||
  If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
 | 
			
		||||
  the standard tests will fail, because the lines in the test files end with
 | 
			
		||||
  LF. Even if the files are edited to change the line endings, there are likely
 | 
			
		||||
  to be some failures. With --enable-newline-is-anycrlf or
 | 
			
		||||
  --enable-newline-is-any, many tests should succeed, but there may be some
 | 
			
		||||
  failures.
 | 
			
		||||
 | 
			
		||||
. By default, the sequence \R in a pattern matches any Unicode line ending
 | 
			
		||||
  sequence. This is independent of the option specifying what PCRE considers to
 | 
			
		||||
  be the end of a line (see above). However, the caller of PCRE can restrict \R
 | 
			
		||||
  to match only CR, LF, or CRLF. You can make this the default by adding
 | 
			
		||||
  --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
 | 
			
		||||
 | 
			
		||||
. When called via the POSIX interface, PCRE uses malloc() to get additional
 | 
			
		||||
  storage for processing capturing parentheses if there are more than 10 of
 | 
			
		||||
  them in a pattern. You can increase this threshold by setting, for example,
 | 
			
		||||
 | 
			
		||||
  --with-posix-malloc-threshold=20
 | 
			
		||||
 | 
			
		||||
  on the "configure" command.
 | 
			
		||||
 | 
			
		||||
. PCRE has a counter that limits the depth of nesting of parentheses in a
 | 
			
		||||
  pattern. This limits the amount of system stack that a pattern uses when it
 | 
			
		||||
  is compiled. The default is 250, but you can change it by setting, for
 | 
			
		||||
  example,
 | 
			
		||||
 | 
			
		||||
  --with-parens-nest-limit=500
 | 
			
		||||
 | 
			
		||||
. PCRE has a counter that can be set to limit the amount of resources it uses
 | 
			
		||||
  when matching a pattern. If the limit is exceeded during a match, the match
 | 
			
		||||
  fails. The default is ten million. You can change the default by setting, for
 | 
			
		||||
  example,
 | 
			
		||||
 | 
			
		||||
  --with-match-limit=500000
 | 
			
		||||
 | 
			
		||||
  on the "configure" command. This is just the default; individual calls to
 | 
			
		||||
  pcre_exec() can supply their own value. There is more discussion on the
 | 
			
		||||
  pcreapi man page.
 | 
			
		||||
 | 
			
		||||
. There is a separate counter that limits the depth of recursive function calls
 | 
			
		||||
  during a matching process. This also has a default of ten million, which is
 | 
			
		||||
  essentially "unlimited". You can change the default by setting, for example,
 | 
			
		||||
 | 
			
		||||
  --with-match-limit-recursion=500000
 | 
			
		||||
 | 
			
		||||
  Recursive function calls use up the runtime stack; running out of stack can
 | 
			
		||||
  cause programs to crash in strange ways. There is a discussion about stack
 | 
			
		||||
  sizes in the pcrestack man page.
 | 
			
		||||
 | 
			
		||||
. The default maximum compiled pattern size is around 64K. You can increase
 | 
			
		||||
  this by adding --with-link-size=3 to the "configure" command. In the 8-bit
 | 
			
		||||
  library, PCRE then uses three bytes instead of two for offsets to different
 | 
			
		||||
  parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
 | 
			
		||||
  the same as --with-link-size=4, which (in both libraries) uses four-byte
 | 
			
		||||
  offsets. Increasing the internal link size reduces performance. In the 32-bit
 | 
			
		||||
  library, the only supported link size is 4.
 | 
			
		||||
 | 
			
		||||
. You can build PCRE so that its internal match() function that is called from
 | 
			
		||||
  pcre_exec() does not call itself recursively. Instead, it uses memory blocks
 | 
			
		||||
  obtained from the heap via the special functions pcre_stack_malloc() and
 | 
			
		||||
  pcre_stack_free() to save data that would otherwise be saved on the stack. To
 | 
			
		||||
  build PCRE like this, use
 | 
			
		||||
 | 
			
		||||
  --disable-stack-for-recursion
 | 
			
		||||
 | 
			
		||||
  on the "configure" command. PCRE runs more slowly in this mode, but it may be
 | 
			
		||||
  necessary in environments with limited stack sizes. This applies only to the
 | 
			
		||||
  normal execution of the pcre_exec() function; if JIT support is being
 | 
			
		||||
  successfully used, it is not relevant. Equally, it does not apply to
 | 
			
		||||
  pcre_dfa_exec(), which does not use deeply nested recursion. There is a
 | 
			
		||||
  discussion about stack sizes in the pcrestack man page.
 | 
			
		||||
 | 
			
		||||
. For speed, PCRE uses four tables for manipulating and identifying characters
 | 
			
		||||
  whose code point values are less than 256. By default, it uses a set of
 | 
			
		||||
  tables for ASCII encoding that is part of the distribution. If you specify
 | 
			
		||||
 | 
			
		||||
  --enable-rebuild-chartables
 | 
			
		||||
 | 
			
		||||
  a program called dftables is compiled and run in the default C locale when
 | 
			
		||||
  you obey "make". It builds a source file called pcre_chartables.c. If you do
 | 
			
		||||
  not specify this option, pcre_chartables.c is created as a copy of
 | 
			
		||||
  pcre_chartables.c.dist. See "Character tables" below for further information.
 | 
			
		||||
 | 
			
		||||
. It is possible to compile PCRE for use on systems that use EBCDIC as their
 | 
			
		||||
  character code (as opposed to ASCII/Unicode) by specifying
 | 
			
		||||
 | 
			
		||||
  --enable-ebcdic
 | 
			
		||||
 | 
			
		||||
  This automatically implies --enable-rebuild-chartables (see above). However,
 | 
			
		||||
  when PCRE is built this way, it always operates in EBCDIC. It cannot support
 | 
			
		||||
  both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
 | 
			
		||||
  which specifies that the code value for the EBCDIC NL character is 0x25
 | 
			
		||||
  instead of the default 0x15.
 | 
			
		||||
 | 
			
		||||
. In environments where valgrind is installed, if you specify
 | 
			
		||||
 | 
			
		||||
  --enable-valgrind
 | 
			
		||||
 | 
			
		||||
  PCRE will use valgrind annotations to mark certain memory regions as
 | 
			
		||||
  unaddressable. This allows it to detect invalid memory accesses, and is
 | 
			
		||||
  mostly useful for debugging PCRE itself.
 | 
			
		||||
 | 
			
		||||
. In environments where the gcc compiler is used and lcov version 1.6 or above
 | 
			
		||||
  is installed, if you specify
 | 
			
		||||
 | 
			
		||||
  --enable-coverage
 | 
			
		||||
 | 
			
		||||
  the build process implements a code coverage report for the test suite. The
 | 
			
		||||
  report is generated by running "make coverage". If ccache is installed on
 | 
			
		||||
  your system, it must be disabled when building PCRE for coverage reporting.
 | 
			
		||||
  You can do this by setting the environment variable CCACHE_DISABLE=1 before
 | 
			
		||||
  running "make" to build PCRE. There is more information about coverage
 | 
			
		||||
  reporting in the "pcrebuild" documentation.
 | 
			
		||||
 | 
			
		||||
. The pcregrep program currently supports only 8-bit data files, and so
 | 
			
		||||
  requires the 8-bit PCRE library. It is possible to compile pcregrep to use
 | 
			
		||||
  libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
 | 
			
		||||
  specifying one or both of
 | 
			
		||||
 | 
			
		||||
  --enable-pcregrep-libz
 | 
			
		||||
  --enable-pcregrep-libbz2
 | 
			
		||||
 | 
			
		||||
  Of course, the relevant libraries must be installed on your system.
 | 
			
		||||
 | 
			
		||||
. The default size (in bytes) of the internal buffer used by pcregrep can be
 | 
			
		||||
  set by, for example:
 | 
			
		||||
 | 
			
		||||
  --with-pcregrep-bufsize=51200
 | 
			
		||||
 | 
			
		||||
  The value must be a plain integer. The default is 20480.
 | 
			
		||||
 | 
			
		||||
. It is possible to compile pcretest so that it links with the libreadline
 | 
			
		||||
  or libedit libraries, by specifying, respectively,
 | 
			
		||||
 | 
			
		||||
  --enable-pcretest-libreadline or --enable-pcretest-libedit
 | 
			
		||||
 | 
			
		||||
  If this is done, when pcretest's input is from a terminal, it reads it using
 | 
			
		||||
  the readline() function. This provides line-editing and history facilities.
 | 
			
		||||
  Note that libreadline is GPL-licenced, so if you distribute a binary of
 | 
			
		||||
  pcretest linked in this way, there may be licensing issues. These can be
 | 
			
		||||
  avoided by linking with libedit (which has a BSD licence) instead.
 | 
			
		||||
 | 
			
		||||
  Enabling libreadline causes the -lreadline option to be added to the pcretest
 | 
			
		||||
  build. In many operating environments with a sytem-installed readline
 | 
			
		||||
  library this is sufficient. However, in some environments (e.g. if an
 | 
			
		||||
  unmodified distribution version of readline is in use), it may be necessary
 | 
			
		||||
  to specify something like LIBS="-lncurses" as well. This is because, to quote
 | 
			
		||||
  the readline INSTALL, "Readline uses the termcap functions, but does not link
 | 
			
		||||
  with the termcap or curses library itself, allowing applications which link
 | 
			
		||||
  with readline the to choose an appropriate library." If you get error
 | 
			
		||||
  messages about missing functions tgetstr, tgetent, tputs, tgetflag, or tgoto,
 | 
			
		||||
  this is the problem, and linking with the ncurses library should fix it.
 | 
			
		||||
 | 
			
		||||
The "configure" script builds the following files for the basic C library:
 | 
			
		||||
 | 
			
		||||
. Makefile             the makefile that builds the library
 | 
			
		||||
. config.h             build-time configuration options for the library
 | 
			
		||||
. pcre.h               the public PCRE header file
 | 
			
		||||
. pcre-config          script that shows the building settings such as CFLAGS
 | 
			
		||||
                         that were set for "configure"
 | 
			
		||||
. libpcre.pc         ) data for the pkg-config command
 | 
			
		||||
. libpcre16.pc       )
 | 
			
		||||
. libpcre32.pc       )
 | 
			
		||||
. libpcreposix.pc    )
 | 
			
		||||
. libtool              script that builds shared and/or static libraries
 | 
			
		||||
 | 
			
		||||
Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
 | 
			
		||||
names config.h.generic and pcre.h.generic. These are provided for those who
 | 
			
		||||
have to built PCRE without using "configure" or CMake. If you use "configure"
 | 
			
		||||
or CMake, the .generic versions are not used.
 | 
			
		||||
 | 
			
		||||
When building the 8-bit library, if a C++ compiler is found, the following
 | 
			
		||||
files are also built:
 | 
			
		||||
 | 
			
		||||
. libpcrecpp.pc        data for the pkg-config command
 | 
			
		||||
. pcrecpparg.h         header file for calling PCRE via the C++ wrapper
 | 
			
		||||
. pcre_stringpiece.h   header for the C++ "stringpiece" functions
 | 
			
		||||
 | 
			
		||||
The "configure" script also creates config.status, which is an executable
 | 
			
		||||
script that can be run to recreate the configuration, and config.log, which
 | 
			
		||||
contains compiler output from tests that "configure" runs.
 | 
			
		||||
 | 
			
		||||
Once "configure" has run, you can run "make". This builds the the libraries
 | 
			
		||||
libpcre, libpcre16 and/or libpcre32, and a test program called pcretest. If you
 | 
			
		||||
enabled JIT support with --enable-jit, a test program called pcre_jit_test is
 | 
			
		||||
built as well.
 | 
			
		||||
 | 
			
		||||
If the 8-bit library is built, libpcreposix and the pcregrep command are also
 | 
			
		||||
built, and if a C++ compiler was found on your system, and you did not disable
 | 
			
		||||
it with --disable-cpp, "make" builds the C++ wrapper library, which is called
 | 
			
		||||
libpcrecpp, as well as some test programs called pcrecpp_unittest,
 | 
			
		||||
pcre_scanner_unittest, and pcre_stringpiece_unittest.
 | 
			
		||||
 | 
			
		||||
The command "make check" runs all the appropriate tests. Details of the PCRE
 | 
			
		||||
tests are given below in a separate section of this document.
 | 
			
		||||
 | 
			
		||||
You can use "make install" to install PCRE into live directories on your
 | 
			
		||||
system. The following are installed (file names are all relative to the
 | 
			
		||||
<prefix> that is set when "configure" is run):
 | 
			
		||||
 | 
			
		||||
  Commands (bin):
 | 
			
		||||
    pcretest
 | 
			
		||||
    pcregrep (if 8-bit support is enabled)
 | 
			
		||||
    pcre-config
 | 
			
		||||
 | 
			
		||||
  Libraries (lib):
 | 
			
		||||
    libpcre16     (if 16-bit support is enabled)
 | 
			
		||||
    libpcre32     (if 32-bit support is enabled)
 | 
			
		||||
    libpcre       (if 8-bit support is enabled)
 | 
			
		||||
    libpcreposix  (if 8-bit support is enabled)
 | 
			
		||||
    libpcrecpp    (if 8-bit and C++ support is enabled)
 | 
			
		||||
 | 
			
		||||
  Configuration information (lib/pkgconfig):
 | 
			
		||||
    libpcre16.pc
 | 
			
		||||
    libpcre32.pc
 | 
			
		||||
    libpcre.pc
 | 
			
		||||
    libpcreposix.pc
 | 
			
		||||
    libpcrecpp.pc (if C++ support is enabled)
 | 
			
		||||
 | 
			
		||||
  Header files (include):
 | 
			
		||||
    pcre.h
 | 
			
		||||
    pcreposix.h
 | 
			
		||||
    pcre_scanner.h      )
 | 
			
		||||
    pcre_stringpiece.h  ) if C++ support is enabled
 | 
			
		||||
    pcrecpp.h           )
 | 
			
		||||
    pcrecpparg.h        )
 | 
			
		||||
 | 
			
		||||
  Man pages (share/man/man{1,3}):
 | 
			
		||||
    pcregrep.1
 | 
			
		||||
    pcretest.1
 | 
			
		||||
    pcre-config.1
 | 
			
		||||
    pcre.3
 | 
			
		||||
    pcre*.3 (lots more pages, all starting "pcre")
 | 
			
		||||
 | 
			
		||||
  HTML documentation (share/doc/pcre/html):
 | 
			
		||||
    index.html
 | 
			
		||||
    *.html (lots more pages, hyperlinked from index.html)
 | 
			
		||||
 | 
			
		||||
  Text file documentation (share/doc/pcre):
 | 
			
		||||
    AUTHORS
 | 
			
		||||
    COPYING
 | 
			
		||||
    ChangeLog
 | 
			
		||||
    LICENCE
 | 
			
		||||
    NEWS
 | 
			
		||||
    README
 | 
			
		||||
    pcre.txt         (a concatenation of the man(3) pages)
 | 
			
		||||
    pcretest.txt     the pcretest man page
 | 
			
		||||
    pcregrep.txt     the pcregrep man page
 | 
			
		||||
    pcre-config.txt  the pcre-config man page
 | 
			
		||||
 | 
			
		||||
If you want to remove PCRE from your system, you can run "make uninstall".
 | 
			
		||||
This removes all the files that "make install" installed. However, it does not
 | 
			
		||||
remove any directories, because these are often shared with other programs.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Retrieving configuration information
 | 
			
		||||
------------------------------------
 | 
			
		||||
 | 
			
		||||
Running "make install" installs the command pcre-config, which can be used to
 | 
			
		||||
recall information about the PCRE configuration and installation. For example:
 | 
			
		||||
 | 
			
		||||
  pcre-config --version
 | 
			
		||||
 | 
			
		||||
prints the version number, and
 | 
			
		||||
 | 
			
		||||
  pcre-config --libs
 | 
			
		||||
 | 
			
		||||
outputs information about where the library is installed. This command can be
 | 
			
		||||
included in makefiles for programs that use PCRE, saving the programmer from
 | 
			
		||||
having to remember too many details.
 | 
			
		||||
 | 
			
		||||
The pkg-config command is another system for saving and retrieving information
 | 
			
		||||
about installed libraries. Instead of separate commands for each library, a
 | 
			
		||||
single command is used. For example:
 | 
			
		||||
 | 
			
		||||
  pkg-config --cflags pcre
 | 
			
		||||
 | 
			
		||||
The data is held in *.pc files that are installed in a directory called
 | 
			
		||||
<prefix>/lib/pkgconfig.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Shared libraries
 | 
			
		||||
----------------
 | 
			
		||||
 | 
			
		||||
The default distribution builds PCRE as shared libraries and static libraries,
 | 
			
		||||
as long as the operating system supports shared libraries. Shared library
 | 
			
		||||
support relies on the "libtool" script which is built as part of the
 | 
			
		||||
"configure" process.
 | 
			
		||||
 | 
			
		||||
The libtool script is used to compile and link both shared and static
 | 
			
		||||
libraries. They are placed in a subdirectory called .libs when they are newly
 | 
			
		||||
built. The programs pcretest and pcregrep are built to use these uninstalled
 | 
			
		||||
libraries (by means of wrapper scripts in the case of shared libraries). When
 | 
			
		||||
you use "make install" to install shared libraries, pcregrep and pcretest are
 | 
			
		||||
automatically re-built to use the newly installed shared libraries before being
 | 
			
		||||
installed themselves. However, the versions left in the build directory still
 | 
			
		||||
use the uninstalled libraries.
 | 
			
		||||
 | 
			
		||||
To build PCRE using static libraries only you must use --disable-shared when
 | 
			
		||||
configuring it. For example:
 | 
			
		||||
 | 
			
		||||
./configure --prefix=/usr/gnu --disable-shared
 | 
			
		||||
 | 
			
		||||
Then run "make" in the usual way. Similarly, you can use --disable-static to
 | 
			
		||||
build only shared libraries.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Cross-compiling using autotools
 | 
			
		||||
-------------------------------
 | 
			
		||||
 | 
			
		||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
 | 
			
		||||
order to cross-compile PCRE for some other host. However, you should NOT
 | 
			
		||||
specify --enable-rebuild-chartables, because if you do, the dftables.c source
 | 
			
		||||
file is compiled and run on the local host, in order to generate the inbuilt
 | 
			
		||||
character tables (the pcre_chartables.c file). This will probably not work,
 | 
			
		||||
because dftables.c needs to be compiled with the local compiler, not the cross
 | 
			
		||||
compiler.
 | 
			
		||||
 | 
			
		||||
When --enable-rebuild-chartables is not specified, pcre_chartables.c is created
 | 
			
		||||
by making a copy of pcre_chartables.c.dist, which is a default set of tables
 | 
			
		||||
that assumes ASCII code. Cross-compiling with the default tables should not be
 | 
			
		||||
a problem.
 | 
			
		||||
 | 
			
		||||
If you need to modify the character tables when cross-compiling, you should
 | 
			
		||||
move pcre_chartables.c.dist out of the way, then compile dftables.c by hand and
 | 
			
		||||
run it on the local host to make a new version of pcre_chartables.c.dist.
 | 
			
		||||
Then when you cross-compile PCRE this new version of the tables will be used.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Using HP's ANSI C++ compiler (aCC)
 | 
			
		||||
----------------------------------
 | 
			
		||||
 | 
			
		||||
Unless C++ support is disabled by specifying the "--disable-cpp" option of the
 | 
			
		||||
"configure" script, you must include the "-AA" option in the CXXFLAGS
 | 
			
		||||
environment variable in order for the C++ components to compile correctly.
 | 
			
		||||
 | 
			
		||||
Also, note that the aCC compiler on PA-RISC platforms may have a defect whereby
 | 
			
		||||
needed libraries fail to get included when specifying the "-AA" compiler
 | 
			
		||||
option. If you experience unresolved symbols when linking the C++ programs,
 | 
			
		||||
use the workaround of specifying the following environment variable prior to
 | 
			
		||||
running the "configure" script:
 | 
			
		||||
 | 
			
		||||
  CXXLDFLAGS="-lstd_v2 -lCsup_v2"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Compiling in Tru64 using native compilers
 | 
			
		||||
-----------------------------------------
 | 
			
		||||
 | 
			
		||||
The following error may occur when compiling with native compilers in the Tru64
 | 
			
		||||
operating system:
 | 
			
		||||
 | 
			
		||||
  CXX    libpcrecpp_la-pcrecpp.lo
 | 
			
		||||
cxx: Error: /usr/lib/cmplrs/cxx/V7.1-006/include/cxx/iosfwd, line 58: #error
 | 
			
		||||
          directive: "cannot include iosfwd -- define __USE_STD_IOSTREAM to
 | 
			
		||||
          override default - see section 7.1.2 of the C++ Using Guide"
 | 
			
		||||
#error "cannot include iosfwd -- define __USE_STD_IOSTREAM to override default
 | 
			
		||||
- see section 7.1.2 of the C++ Using Guide"
 | 
			
		||||
 | 
			
		||||
This may be followed by other errors, complaining that 'namespace "std" has no
 | 
			
		||||
member'. The solution to this is to add the line
 | 
			
		||||
 | 
			
		||||
#define __USE_STD_IOSTREAM 1
 | 
			
		||||
 | 
			
		||||
to the config.h file.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Using Sun's compilers for Solaris
 | 
			
		||||
---------------------------------
 | 
			
		||||
 | 
			
		||||
A user reports that the following configurations work on Solaris 9 sparcv9 and
 | 
			
		||||
Solaris 9 x86 (32-bit):
 | 
			
		||||
 | 
			
		||||
  Solaris 9 sparcv9: ./configure --disable-cpp CC=/bin/cc CFLAGS="-m64 -g"
 | 
			
		||||
  Solaris 9 x86:     ./configure --disable-cpp CC=/bin/cc CFLAGS="-g"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Using PCRE from MySQL
 | 
			
		||||
---------------------
 | 
			
		||||
 | 
			
		||||
On systems where both PCRE and MySQL are installed, it is possible to make use
 | 
			
		||||
of PCRE from within MySQL, as an alternative to the built-in pattern matching.
 | 
			
		||||
There is a web page that tells you how to do this:
 | 
			
		||||
 | 
			
		||||
  http://www.mysqludf.org/lib_mysqludf_preg/index.php
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Making new tarballs
 | 
			
		||||
-------------------
 | 
			
		||||
 | 
			
		||||
The command "make dist" creates three PCRE tarballs, in tar.gz, tar.bz2, and
 | 
			
		||||
zip formats. The command "make distcheck" does the same, but then does a trial
 | 
			
		||||
build of the new distribution to ensure that it works.
 | 
			
		||||
 | 
			
		||||
If you have modified any of the man page sources in the doc directory, you
 | 
			
		||||
should first run the PrepareRelease script before making a distribution. This
 | 
			
		||||
script creates the .txt and HTML forms of the documentation from the man pages.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Testing PCRE
 | 
			
		||||
------------
 | 
			
		||||
 | 
			
		||||
To test the basic PCRE library on a Unix-like system, run the RunTest script.
 | 
			
		||||
There is another script called RunGrepTest that tests the options of the
 | 
			
		||||
pcregrep command. If the C++ wrapper library is built, three test programs
 | 
			
		||||
called pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest
 | 
			
		||||
are also built. When JIT support is enabled, another test program called
 | 
			
		||||
pcre_jit_test is built.
 | 
			
		||||
 | 
			
		||||
Both the scripts and all the program tests are run if you obey "make check" or
 | 
			
		||||
"make test". For other environments, see the instructions in
 | 
			
		||||
NON-AUTOTOOLS-BUILD.
 | 
			
		||||
 | 
			
		||||
The RunTest script runs the pcretest test program (which is documented in its
 | 
			
		||||
own man page) on each of the relevant testinput files in the testdata
 | 
			
		||||
directory, and compares the output with the contents of the corresponding
 | 
			
		||||
testoutput files. RunTest uses a file called testtry to hold the main output
 | 
			
		||||
from pcretest. Other files whose names begin with "test" are used as working
 | 
			
		||||
files in some tests.
 | 
			
		||||
 | 
			
		||||
Some tests are relevant only when certain build-time options were selected. For
 | 
			
		||||
example, the tests for UTF-8/16/32 support are run only if --enable-utf was
 | 
			
		||||
used. RunTest outputs a comment when it skips a test.
 | 
			
		||||
 | 
			
		||||
Many of the tests that are not skipped are run up to three times. The second
 | 
			
		||||
run forces pcre_study() to be called for all patterns except for a few in some
 | 
			
		||||
tests that are marked "never study" (see the pcretest program for how this is
 | 
			
		||||
done). If JIT support is available, the non-DFA tests are run a third time,
 | 
			
		||||
this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option.
 | 
			
		||||
This testing can be suppressed by putting "nojit" on the RunTest command line.
 | 
			
		||||
 | 
			
		||||
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
 | 
			
		||||
libraries that are enabled. If you want to run just one set of tests, call
 | 
			
		||||
RunTest with either the -8, -16 or -32 option.
 | 
			
		||||
 | 
			
		||||
If valgrind is installed, you can run the tests under it by putting "valgrind"
 | 
			
		||||
on the RunTest command line. To run pcretest on just one or more specific test
 | 
			
		||||
files, give their numbers as arguments to RunTest, for example:
 | 
			
		||||
 | 
			
		||||
  RunTest 2 7 11
 | 
			
		||||
 | 
			
		||||
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
 | 
			
		||||
end), or a number preceded by ~ to exclude a test. For example:
 | 
			
		||||
 | 
			
		||||
  Runtest 3-15 ~10
 | 
			
		||||
 | 
			
		||||
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
 | 
			
		||||
except test 13. Whatever order the arguments are in, the tests are always run
 | 
			
		||||
in numerical order.
 | 
			
		||||
 | 
			
		||||
You can also call RunTest with the single argument "list" to cause it to output
 | 
			
		||||
a list of tests.
 | 
			
		||||
 | 
			
		||||
The first test file can be fed directly into the perltest.pl script to check
 | 
			
		||||
that Perl gives the same results. The only difference you should see is in the
 | 
			
		||||
first few lines, where the Perl version is given instead of the PCRE version.
 | 
			
		||||
 | 
			
		||||
The second set of tests check pcre_fullinfo(), pcre_study(),
 | 
			
		||||
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
 | 
			
		||||
detection, and run-time flags that are specific to PCRE, as well as the POSIX
 | 
			
		||||
wrapper API. It also uses the debugging flags to check some of the internals of
 | 
			
		||||
pcre_compile().
 | 
			
		||||
 | 
			
		||||
If you build PCRE with a locale setting that is not the standard C locale, the
 | 
			
		||||
character tables may be different (see next paragraph). In some cases, this may
 | 
			
		||||
cause failures in the second set of tests. For example, in a locale where the
 | 
			
		||||
isprint() function yields TRUE for characters in the range 128-255, the use of
 | 
			
		||||
[:isascii:] inside a character class defines a different set of characters, and
 | 
			
		||||
this shows up in this test as a difference in the compiled code, which is being
 | 
			
		||||
listed for checking. Where the comparison test output contains [\x00-\x7f] the
 | 
			
		||||
test will contain [\x00-\xff], and similarly in some other cases. This is not a
 | 
			
		||||
bug in PCRE.
 | 
			
		||||
 | 
			
		||||
The third set of tests checks pcre_maketables(), the facility for building a
 | 
			
		||||
set of character tables for a specific locale and using them instead of the
 | 
			
		||||
default tables. The tests make use of the "fr_FR" (French) locale. Before
 | 
			
		||||
running the test, the script checks for the presence of this locale by running
 | 
			
		||||
the "locale" command. If that command fails, or if it doesn't include "fr_FR"
 | 
			
		||||
in the list of available locales, the third test cannot be run, and a comment
 | 
			
		||||
is output to say why. If running this test produces instances of the error
 | 
			
		||||
 | 
			
		||||
  ** Failed to set locale "fr_FR"
 | 
			
		||||
 | 
			
		||||
in the comparison output, it means that locale is not available on your system,
 | 
			
		||||
despite being listed by "locale". This does not mean that PCRE is broken.
 | 
			
		||||
 | 
			
		||||
[If you are trying to run this test on Windows, you may be able to get it to
 | 
			
		||||
work by changing "fr_FR" to "french" everywhere it occurs. Alternatively, use
 | 
			
		||||
RunTest.bat. The version of RunTest.bat included with PCRE 7.4 and above uses
 | 
			
		||||
Windows versions of test 2. More info on using RunTest.bat is included in the
 | 
			
		||||
document entitled NON-UNIX-USE.]
 | 
			
		||||
 | 
			
		||||
The fourth and fifth tests check the UTF-8/16/32 support and error handling and
 | 
			
		||||
internal UTF features of PCRE that are not relevant to Perl, respectively. The
 | 
			
		||||
sixth and seventh tests do the same for Unicode character properties support.
 | 
			
		||||
 | 
			
		||||
The eighth, ninth, and tenth tests check the pcre_dfa_exec() alternative
 | 
			
		||||
matching function, in non-UTF-8/16/32 mode, UTF-8/16/32 mode, and UTF-8/16/32
 | 
			
		||||
mode with Unicode property support, respectively.
 | 
			
		||||
 | 
			
		||||
The eleventh test checks some internal offsets and code size features; it is
 | 
			
		||||
run only when the default "link size" of 2 is set (in other cases the sizes
 | 
			
		||||
change) and when Unicode property support is enabled.
 | 
			
		||||
 | 
			
		||||
The twelfth test is run only when JIT support is available, and the thirteenth
 | 
			
		||||
test is run only when JIT support is not available. They test some JIT-specific
 | 
			
		||||
features such as information output from pcretest about JIT compilation.
 | 
			
		||||
 | 
			
		||||
The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and
 | 
			
		||||
the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit
 | 
			
		||||
mode. These are tests that generate different output in the two modes. They are
 | 
			
		||||
for general cases, UTF-8/16/32 support, and Unicode property support,
 | 
			
		||||
respectively.
 | 
			
		||||
 | 
			
		||||
The twentieth test is run only in 16/32-bit mode. It tests some specific
 | 
			
		||||
16/32-bit features of the DFA matching engine.
 | 
			
		||||
 | 
			
		||||
The twenty-first and twenty-second tests are run only in 16/32-bit mode, when
 | 
			
		||||
the link size is set to 2 for the 16-bit library. They test reloading
 | 
			
		||||
pre-compiled patterns.
 | 
			
		||||
 | 
			
		||||
The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are
 | 
			
		||||
for general cases, and UTF-16 support, respectively.
 | 
			
		||||
 | 
			
		||||
The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are
 | 
			
		||||
for general cases, and UTF-32 support, respectively.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Character tables
 | 
			
		||||
----------------
 | 
			
		||||
 | 
			
		||||
For speed, PCRE uses four tables for manipulating and identifying characters
 | 
			
		||||
whose code point values are less than 256. The final argument of the
 | 
			
		||||
pcre_compile() function is a pointer to a block of memory containing the
 | 
			
		||||
concatenated tables. A call to pcre_maketables() can be used to generate a set
 | 
			
		||||
of tables in the current locale. If the final argument for pcre_compile() is
 | 
			
		||||
passed as NULL, a set of default tables that is built into the binary is used.
 | 
			
		||||
 | 
			
		||||
The source file called pcre_chartables.c contains the default set of tables. By
 | 
			
		||||
default, this is created as a copy of pcre_chartables.c.dist, which contains
 | 
			
		||||
tables for ASCII coding. However, if --enable-rebuild-chartables is specified
 | 
			
		||||
for ./configure, a different version of pcre_chartables.c is built by the
 | 
			
		||||
program dftables (compiled from dftables.c), which uses the ANSI C character
 | 
			
		||||
handling functions such as isalnum(), isalpha(), isupper(), islower(), etc. to
 | 
			
		||||
build the table sources. This means that the default C locale which is set for
 | 
			
		||||
your system will control the contents of these default tables. You can change
 | 
			
		||||
the default tables by editing pcre_chartables.c and then re-building PCRE. If
 | 
			
		||||
you do this, you should take care to ensure that the file does not get
 | 
			
		||||
automatically re-generated. The best way to do this is to move
 | 
			
		||||
pcre_chartables.c.dist out of the way and replace it with your customized
 | 
			
		||||
tables.
 | 
			
		||||
 | 
			
		||||
When the dftables program is run as a result of --enable-rebuild-chartables,
 | 
			
		||||
it uses the default C locale that is set on your system. It does not pay
 | 
			
		||||
attention to the LC_xxx environment variables. In other words, it uses the
 | 
			
		||||
system's default locale rather than whatever the compiling user happens to have
 | 
			
		||||
set. If you really do want to build a source set of character tables in a
 | 
			
		||||
locale that is specified by the LC_xxx variables, you can run the dftables
 | 
			
		||||
program by hand with the -L option. For example:
 | 
			
		||||
 | 
			
		||||
  ./dftables -L pcre_chartables.c.special
 | 
			
		||||
 | 
			
		||||
The first two 256-byte tables provide lower casing and case flipping functions,
 | 
			
		||||
respectively. The next table consists of three 32-byte bit maps which identify
 | 
			
		||||
digits, "word" characters, and white space, respectively. These are used when
 | 
			
		||||
building 32-byte bit maps that represent character classes for code points less
 | 
			
		||||
than 256.
 | 
			
		||||
 | 
			
		||||
The final 256-byte table has bits indicating various character types, as
 | 
			
		||||
follows:
 | 
			
		||||
 | 
			
		||||
    1   white space character
 | 
			
		||||
    2   letter
 | 
			
		||||
    4   decimal digit
 | 
			
		||||
    8   hexadecimal digit
 | 
			
		||||
   16   alphanumeric or '_'
 | 
			
		||||
  128   regular expression metacharacter or binary zero
 | 
			
		||||
 | 
			
		||||
You should not alter the set of characters that contain the 128 bit, as that
 | 
			
		||||
will cause PCRE to malfunction.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
File manifest
 | 
			
		||||
-------------
 | 
			
		||||
 | 
			
		||||
The distribution should contain the files listed below. Where a file name is
 | 
			
		||||
given as pcre[16|32]_xxx it means that there are three files, one with the name
 | 
			
		||||
pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
 | 
			
		||||
 | 
			
		||||
(A) Source files of the PCRE library functions and their headers:
 | 
			
		||||
 | 
			
		||||
  dftables.c              auxiliary program for building pcre_chartables.c
 | 
			
		||||
                          when --enable-rebuild-chartables is specified
 | 
			
		||||
 | 
			
		||||
  pcre_chartables.c.dist  a default set of character tables that assume ASCII
 | 
			
		||||
                          coding; used, unless --enable-rebuild-chartables is
 | 
			
		||||
                          specified, by copying to pcre[16]_chartables.c
 | 
			
		||||
 | 
			
		||||
  pcreposix.c                )
 | 
			
		||||
  pcre[16|32]_byte_order.c   )
 | 
			
		||||
  pcre[16|32]_compile.c      )
 | 
			
		||||
  pcre[16|32]_config.c       )
 | 
			
		||||
  pcre[16|32]_dfa_exec.c     )
 | 
			
		||||
  pcre[16|32]_exec.c         )
 | 
			
		||||
  pcre[16|32]_fullinfo.c     )
 | 
			
		||||
  pcre[16|32]_get.c          ) sources for the functions in the library,
 | 
			
		||||
  pcre[16|32]_globals.c      )   and some internal functions that they use
 | 
			
		||||
  pcre[16|32]_jit_compile.c  )
 | 
			
		||||
  pcre[16|32]_maketables.c   )
 | 
			
		||||
  pcre[16|32]_newline.c      )
 | 
			
		||||
  pcre[16|32]_refcount.c     )
 | 
			
		||||
  pcre[16|32]_string_utils.c )
 | 
			
		||||
  pcre[16|32]_study.c        )
 | 
			
		||||
  pcre[16|32]_tables.c       )
 | 
			
		||||
  pcre[16|32]_ucd.c          )
 | 
			
		||||
  pcre[16|32]_version.c      )
 | 
			
		||||
  pcre[16|32]_xclass.c       )
 | 
			
		||||
  pcre_ord2utf8.c            )
 | 
			
		||||
  pcre_valid_utf8.c          )
 | 
			
		||||
  pcre16_ord2utf16.c         )
 | 
			
		||||
  pcre16_utf16_utils.c       )
 | 
			
		||||
  pcre16_valid_utf16.c       )
 | 
			
		||||
  pcre32_utf32_utils.c       )
 | 
			
		||||
  pcre32_valid_utf32.c       )
 | 
			
		||||
 | 
			
		||||
  pcre[16|32]_printint.c     ) debugging function that is used by pcretest,
 | 
			
		||||
                             )   and can also be #included in pcre_compile()
 | 
			
		||||
 | 
			
		||||
  pcre.h.in               template for pcre.h when built by "configure"
 | 
			
		||||
  pcreposix.h             header for the external POSIX wrapper API
 | 
			
		||||
  pcre_internal.h         header for internal use
 | 
			
		||||
  sljit/*                 16 files that make up the JIT compiler
 | 
			
		||||
  ucp.h                   header for Unicode property handling
 | 
			
		||||
 | 
			
		||||
  config.h.in             template for config.h, which is built by "configure"
 | 
			
		||||
 | 
			
		||||
  pcrecpp.h               public header file for the C++ wrapper
 | 
			
		||||
  pcrecpparg.h.in         template for another C++ header file
 | 
			
		||||
  pcre_scanner.h          public header file for C++ scanner functions
 | 
			
		||||
  pcrecpp.cc              )
 | 
			
		||||
  pcre_scanner.cc         ) source for the C++ wrapper library
 | 
			
		||||
 | 
			
		||||
  pcre_stringpiece.h.in   template for pcre_stringpiece.h, the header for the
 | 
			
		||||
                            C++ stringpiece functions
 | 
			
		||||
  pcre_stringpiece.cc     source for the C++ stringpiece functions
 | 
			
		||||
 | 
			
		||||
(B) Source files for programs that use PCRE:
 | 
			
		||||
 | 
			
		||||
  pcredemo.c              simple demonstration of coding calls to PCRE
 | 
			
		||||
  pcregrep.c              source of a grep utility that uses PCRE
 | 
			
		||||
  pcretest.c              comprehensive test program
 | 
			
		||||
 | 
			
		||||
(C) Auxiliary files:
 | 
			
		||||
 | 
			
		||||
  132html                 script to turn "man" pages into HTML
 | 
			
		||||
  AUTHORS                 information about the author of PCRE
 | 
			
		||||
  ChangeLog               log of changes to the code
 | 
			
		||||
  CleanTxt                script to clean nroff output for txt man pages
 | 
			
		||||
  Detrail                 script to remove trailing spaces
 | 
			
		||||
  HACKING                 some notes about the internals of PCRE
 | 
			
		||||
  INSTALL                 generic installation instructions
 | 
			
		||||
  LICENCE                 conditions for the use of PCRE
 | 
			
		||||
  COPYING                 the same, using GNU's standard name
 | 
			
		||||
  Makefile.in             ) template for Unix Makefile, which is built by
 | 
			
		||||
                          )   "configure"
 | 
			
		||||
  Makefile.am             ) the automake input that was used to create
 | 
			
		||||
                          )   Makefile.in
 | 
			
		||||
  NEWS                    important changes in this release
 | 
			
		||||
  NON-UNIX-USE            the previous name for NON-AUTOTOOLS-BUILD
 | 
			
		||||
  NON-AUTOTOOLS-BUILD     notes on building PCRE without using autotools
 | 
			
		||||
  PrepareRelease          script to make preparations for "make dist"
 | 
			
		||||
  README                  this file
 | 
			
		||||
  RunTest                 a Unix shell script for running tests
 | 
			
		||||
  RunGrepTest             a Unix shell script for pcregrep tests
 | 
			
		||||
  aclocal.m4              m4 macros (generated by "aclocal")
 | 
			
		||||
  config.guess            ) files used by libtool,
 | 
			
		||||
  config.sub              )   used only when building a shared library
 | 
			
		||||
  configure               a configuring shell script (built by autoconf)
 | 
			
		||||
  configure.ac            ) the autoconf input that was used to build
 | 
			
		||||
                          )   "configure" and config.h
 | 
			
		||||
  depcomp                 ) script to find program dependencies, generated by
 | 
			
		||||
                          )   automake
 | 
			
		||||
  doc/*.3                 man page sources for PCRE
 | 
			
		||||
  doc/*.1                 man page sources for pcregrep and pcretest
 | 
			
		||||
  doc/index.html.src      the base HTML page
 | 
			
		||||
  doc/html/*              HTML documentation
 | 
			
		||||
  doc/pcre.txt            plain text version of the man pages
 | 
			
		||||
  doc/pcretest.txt        plain text documentation of test program
 | 
			
		||||
  doc/perltest.txt        plain text documentation of Perl test program
 | 
			
		||||
  install-sh              a shell script for installing files
 | 
			
		||||
  libpcre16.pc.in         template for libpcre16.pc for pkg-config
 | 
			
		||||
  libpcre32.pc.in         template for libpcre32.pc for pkg-config
 | 
			
		||||
  libpcre.pc.in           template for libpcre.pc for pkg-config
 | 
			
		||||
  libpcreposix.pc.in      template for libpcreposix.pc for pkg-config
 | 
			
		||||
  libpcrecpp.pc.in        template for libpcrecpp.pc for pkg-config
 | 
			
		||||
  ltmain.sh               file used to build a libtool script
 | 
			
		||||
  missing                 ) common stub for a few missing GNU programs while
 | 
			
		||||
                          )   installing, generated by automake
 | 
			
		||||
  mkinstalldirs           script for making install directories
 | 
			
		||||
  perltest.pl             Perl test program
 | 
			
		||||
  pcre-config.in          source of script which retains PCRE information
 | 
			
		||||
  pcre_jit_test.c         test program for the JIT compiler
 | 
			
		||||
  pcrecpp_unittest.cc          )
 | 
			
		||||
  pcre_scanner_unittest.cc     ) test programs for the C++ wrapper
 | 
			
		||||
  pcre_stringpiece_unittest.cc )
 | 
			
		||||
  testdata/testinput*     test data for main library tests
 | 
			
		||||
  testdata/testoutput*    expected test results
 | 
			
		||||
  testdata/grep*          input and output for pcregrep tests
 | 
			
		||||
  testdata/*              other supporting test files
 | 
			
		||||
 | 
			
		||||
(D) Auxiliary files for cmake support
 | 
			
		||||
 | 
			
		||||
  cmake/COPYING-CMAKE-SCRIPTS
 | 
			
		||||
  cmake/FindPackageHandleStandardArgs.cmake
 | 
			
		||||
  cmake/FindEditline.cmake
 | 
			
		||||
  cmake/FindReadline.cmake
 | 
			
		||||
  CMakeLists.txt
 | 
			
		||||
  config-cmake.h.in
 | 
			
		||||
 | 
			
		||||
(E) Auxiliary files for VPASCAL
 | 
			
		||||
 | 
			
		||||
  makevp.bat
 | 
			
		||||
  makevp_c.txt
 | 
			
		||||
  makevp_l.txt
 | 
			
		||||
  pcregexp.pas
 | 
			
		||||
 | 
			
		||||
(F) Auxiliary files for building PCRE "by hand"
 | 
			
		||||
 | 
			
		||||
  pcre.h.generic          ) a version of the public PCRE header file
 | 
			
		||||
                          )   for use in non-"configure" environments
 | 
			
		||||
  config.h.generic        ) a version of config.h for use in non-"configure"
 | 
			
		||||
                          )   environments
 | 
			
		||||
 | 
			
		||||
(F) Miscellaneous
 | 
			
		||||
 | 
			
		||||
  RunTest.bat            a script for running tests under Windows
 | 
			
		||||
 | 
			
		||||
Philip Hazel
 | 
			
		||||
Email local part: ph10
 | 
			
		||||
Email domain: cam.ac.uk
 | 
			
		||||
Last updated: 17 January 2014
 | 
			
		||||
							
								
								
									
										185
									
								
								tools/pcre/doc/html/index.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										185
									
								
								tools/pcre/doc/html/index.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,185 @@
 | 
			
		||||
<html>
 | 
			
		||||
<!-- This is a manually maintained file that is the root of the HTML version of
 | 
			
		||||
     the PCRE documentation. When the HTML documents are built from the man
 | 
			
		||||
     page versions, the entire doc/html directory is emptied, this file is then
 | 
			
		||||
     copied into doc/html/index.html, and the remaining files therein are
 | 
			
		||||
     created by the 132html script.
 | 
			
		||||
-->
 | 
			
		||||
<head>
 | 
			
		||||
<title>PCRE specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>Perl-compatible Regular Expressions (PCRE)</h1>
 | 
			
		||||
<p>
 | 
			
		||||
The HTML documentation for PCRE consists of a number of pages that are listed
 | 
			
		||||
below in alphabetical order. If you are new to PCRE, please read the first one
 | 
			
		||||
first.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<table>
 | 
			
		||||
<tr><td><a href="pcre.html">pcre</a></td>
 | 
			
		||||
    <td>  Introductory page</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre-config.html">pcre-config</a></td>
 | 
			
		||||
    <td>  Information about the installation configuration</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre16.html">pcre16</a></td>
 | 
			
		||||
    <td>  Discussion of the 16-bit PCRE library</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre32.html">pcre32</a></td>
 | 
			
		||||
    <td>  Discussion of the 32-bit PCRE library</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcreapi.html">pcreapi</a></td>
 | 
			
		||||
    <td>  PCRE's native API</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcrebuild.html">pcrebuild</a></td>
 | 
			
		||||
    <td>  Building PCRE</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcrecallout.html">pcrecallout</a></td>
 | 
			
		||||
    <td>  The <i>callout</i> facility</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcrecompat.html">pcrecompat</a></td>
 | 
			
		||||
    <td>  Compability with Perl</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcrecpp.html">pcrecpp</a></td>
 | 
			
		||||
    <td>  The C++ wrapper for the PCRE library</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcredemo.html">pcredemo</a></td>
 | 
			
		||||
    <td>  A demonstration C program that uses the PCRE library</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcregrep.html">pcregrep</a></td>
 | 
			
		||||
    <td>  The <b>pcregrep</b> command</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcrejit.html">pcrejit</a></td>
 | 
			
		||||
    <td>  Discussion of the just-in-time optimization support</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcrelimits.html">pcrelimits</a></td>
 | 
			
		||||
    <td>  Details of size and other limits</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcrematching.html">pcrematching</a></td>
 | 
			
		||||
    <td>  Discussion of the two matching algorithms</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcrepartial.html">pcrepartial</a></td>
 | 
			
		||||
    <td>  Using PCRE for partial matching</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcrepattern.html">pcrepattern</a></td>
 | 
			
		||||
    <td>  Specification of the regular expressions supported by PCRE</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcreperform.html">pcreperform</a></td>
 | 
			
		||||
    <td>  Some comments on performance</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcreposix.html">pcreposix</a></td>
 | 
			
		||||
    <td>  The POSIX API to the PCRE 8-bit library</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcreprecompile.html">pcreprecompile</a></td>
 | 
			
		||||
    <td>  How to save and re-use compiled patterns</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcresample.html">pcresample</a></td>
 | 
			
		||||
    <td>  Discussion of the pcredemo program</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcrestack.html">pcrestack</a></td>
 | 
			
		||||
    <td>  Discussion of PCRE's stack usage</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcresyntax.html">pcresyntax</a></td>
 | 
			
		||||
    <td>  Syntax quick-reference summary</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcretest.html">pcretest</a></td>
 | 
			
		||||
    <td>  The <b>pcretest</b> command for testing PCRE</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcreunicode.html">pcreunicode</a></td>
 | 
			
		||||
    <td>  Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
 | 
			
		||||
</table>
 | 
			
		||||
 | 
			
		||||
<p>
 | 
			
		||||
There are also individual pages that summarize the interface for each function
 | 
			
		||||
in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
 | 
			
		||||
functions.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<table>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_assign_jit_stack.html">pcre_assign_jit_stack</a></td>
 | 
			
		||||
    <td>  Assign stack for JIT matching</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_compile.html">pcre_compile</a></td>
 | 
			
		||||
    <td>  Compile a regular expression</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_compile2.html">pcre_compile2</a></td>
 | 
			
		||||
    <td>  Compile a regular expression (alternate interface)</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_config.html">pcre_config</a></td>
 | 
			
		||||
    <td>  Show build-time configuration options</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_copy_named_substring.html">pcre_copy_named_substring</a></td>
 | 
			
		||||
    <td>  Extract named substring into given buffer</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_copy_substring.html">pcre_copy_substring</a></td>
 | 
			
		||||
    <td>  Extract numbered substring into given buffer</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_dfa_exec.html">pcre_dfa_exec</a></td>
 | 
			
		||||
    <td>  Match a compiled pattern to a subject string
 | 
			
		||||
    (DFA algorithm; <i>not</i> Perl compatible)</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
 | 
			
		||||
    <td>  Match a compiled pattern to a subject string
 | 
			
		||||
    (Perl compatible)</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
 | 
			
		||||
    <td>  Free study data</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_free_substring.html">pcre_free_substring</a></td>
 | 
			
		||||
    <td>  Free extracted substring</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_free_substring_list.html">pcre_free_substring_list</a></td>
 | 
			
		||||
    <td>  Free list of extracted substrings</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_fullinfo.html">pcre_fullinfo</a></td>
 | 
			
		||||
    <td>  Extract information about a pattern</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_get_named_substring.html">pcre_get_named_substring</a></td>
 | 
			
		||||
    <td>  Extract named substring into new memory</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_get_stringnumber.html">pcre_get_stringnumber</a></td>
 | 
			
		||||
    <td>  Convert captured string name to number</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_get_stringtable_entries.html">pcre_get_stringtable_entries</a></td>
 | 
			
		||||
    <td>  Find table entries for given string name</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_get_substring.html">pcre_get_substring</a></td>
 | 
			
		||||
    <td>  Extract numbered substring into new memory</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_get_substring_list.html">pcre_get_substring_list</a></td>
 | 
			
		||||
    <td>  Extract all substrings into new memory</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_jit_exec.html">pcre_jit_exec</a></td>
 | 
			
		||||
    <td>  Fast path interface to JIT matching</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
 | 
			
		||||
    <td>  Create a stack for JIT matching</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_jit_stack_free.html">pcre_jit_stack_free</a></td>
 | 
			
		||||
    <td>  Free a JIT matching stack</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_maketables.html">pcre_maketables</a></td>
 | 
			
		||||
    <td>  Build character tables in current locale</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_pattern_to_host_byte_order.html">pcre_pattern_to_host_byte_order</a></td>
 | 
			
		||||
    <td>  Convert compiled pattern to host byte order if necessary</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_refcount.html">pcre_refcount</a></td>
 | 
			
		||||
    <td>  Maintain reference count in compiled pattern</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_study.html">pcre_study</a></td>
 | 
			
		||||
    <td>  Study a compiled pattern</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_utf16_to_host_byte_order.html">pcre_utf16_to_host_byte_order</a></td>
 | 
			
		||||
    <td>  Convert UTF-16 string to host byte order if necessary</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_utf32_to_host_byte_order.html">pcre_utf32_to_host_byte_order</a></td>
 | 
			
		||||
    <td>  Convert UTF-32 string to host byte order if necessary</td></tr>
 | 
			
		||||
 | 
			
		||||
<tr><td><a href="pcre_version.html">pcre_version</a></td>
 | 
			
		||||
    <td>  Return PCRE version and release date</td></tr>
 | 
			
		||||
</table>
 | 
			
		||||
 | 
			
		||||
</html>
 | 
			
		||||
							
								
								
									
										109
									
								
								tools/pcre/doc/html/pcre-config.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								tools/pcre/doc/html/pcre-config.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,109 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre-config specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre-config man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<ul>
 | 
			
		||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
 | 
			
		||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
 | 
			
		||||
<li><a name="TOC3" href="#SEC3">OPTIONS</a>
 | 
			
		||||
<li><a name="TOC4" href="#SEC4">SEE ALSO</a>
 | 
			
		||||
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
 | 
			
		||||
<li><a name="TOC6" href="#SEC6">REVISION</a>
 | 
			
		||||
</ul>
 | 
			
		||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcre-config  [--prefix] [--exec-prefix] [--version] [--libs]</b>
 | 
			
		||||
<b>            [--libs16] [--libs32] [--libs-cpp] [--libs-posix]</b>
 | 
			
		||||
<b>            [--cflags] [--cflags-posix]</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcre-config</b> returns the configuration of the installed PCRE
 | 
			
		||||
libraries and the options required to compile a program to use them. Some of
 | 
			
		||||
the options apply only to the 8-bit, or 16-bit, or 32-bit libraries,
 | 
			
		||||
respectively, and are
 | 
			
		||||
not available if only one of those libraries has been built. If an unavailable
 | 
			
		||||
option is encountered, the "usage" information is output.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC3" href="#TOC1">OPTIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--prefix</b>
 | 
			
		||||
Writes the directory prefix used in the PCRE installation for architecture
 | 
			
		||||
independent files (<i>/usr</i> on many systems, <i>/usr/local</i> on some
 | 
			
		||||
systems) to the standard output.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--exec-prefix</b>
 | 
			
		||||
Writes the directory prefix used in the PCRE installation for architecture
 | 
			
		||||
dependent files (normally the same as <b>--prefix</b>) to the standard output.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--version</b>
 | 
			
		||||
Writes the version number of the installed PCRE libraries to the standard
 | 
			
		||||
output.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--libs</b>
 | 
			
		||||
Writes to the standard output the command line options required to link
 | 
			
		||||
with the 8-bit PCRE library (<b>-lpcre</b> on many systems).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--libs16</b>
 | 
			
		||||
Writes to the standard output the command line options required to link
 | 
			
		||||
with the 16-bit PCRE library (<b>-lpcre16</b> on many systems).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--libs32</b>
 | 
			
		||||
Writes to the standard output the command line options required to link
 | 
			
		||||
with the 32-bit PCRE library (<b>-lpcre32</b> on many systems).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--libs-cpp</b>
 | 
			
		||||
Writes to the standard output the command line options required to link with
 | 
			
		||||
PCRE's C++ wrapper library (<b>-lpcrecpp</b> <b>-lpcre</b> on many
 | 
			
		||||
systems).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--libs-posix</b>
 | 
			
		||||
Writes to the standard output the command line options required to link with
 | 
			
		||||
PCRE's POSIX API wrapper library (<b>-lpcreposix</b> <b>-lpcre</b> on many
 | 
			
		||||
systems).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--cflags</b>
 | 
			
		||||
Writes to the standard output the command line options required to compile
 | 
			
		||||
files that use PCRE (this may include some <b>-I</b> options, but is blank on
 | 
			
		||||
many systems).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--cflags-posix</b>
 | 
			
		||||
Writes to the standard output the command line options required to compile
 | 
			
		||||
files that use PCRE's POSIX API wrapper library (this may include some <b>-I</b>
 | 
			
		||||
options, but is blank on many systems).
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC4" href="#TOC1">SEE ALSO</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcre(3)</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
This manual page was originally written by Mark Baker for the Debian GNU/Linux
 | 
			
		||||
system. It has been subsequently revised as a generic PCRE man page.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 24 June 2012
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										213
									
								
								tools/pcre/doc/html/pcre.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										213
									
								
								tools/pcre/doc/html/pcre.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,213 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<ul>
 | 
			
		||||
<li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
 | 
			
		||||
<li><a name="TOC2" href="#SEC2">SECURITY CONSIDERATIONS</a>
 | 
			
		||||
<li><a name="TOC3" href="#SEC3">USER DOCUMENTATION</a>
 | 
			
		||||
<li><a name="TOC4" href="#SEC4">AUTHOR</a>
 | 
			
		||||
<li><a name="TOC5" href="#SEC5">REVISION</a>
 | 
			
		||||
</ul>
 | 
			
		||||
<br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The PCRE library is a set of functions that implement regular expression
 | 
			
		||||
pattern matching using the same syntax and semantics as Perl, with just a few
 | 
			
		||||
differences. Some features that appeared in Python and PCRE before they
 | 
			
		||||
appeared in Perl are also available using the Python syntax, there is some
 | 
			
		||||
support for one or two .NET and Oniguruma syntax items, and there is an option
 | 
			
		||||
for requesting some minor changes that give better JavaScript compatibility.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Starting with release 8.30, it is possible to compile two separate PCRE
 | 
			
		||||
libraries: the original, which supports 8-bit character strings (including
 | 
			
		||||
UTF-8 strings), and a second library that supports 16-bit character strings
 | 
			
		||||
(including UTF-16 strings). The build process allows either one or both to be
 | 
			
		||||
built. The majority of the work to make this possible was done by Zoltan
 | 
			
		||||
Herczeg.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Starting with release 8.32 it is possible to compile a third separate PCRE
 | 
			
		||||
library that supports 32-bit character strings (including UTF-32 strings). The
 | 
			
		||||
build process allows any combination of the 8-, 16- and 32-bit libraries. The
 | 
			
		||||
work to make this possible was done by Christian Persch.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The three libraries contain identical sets of functions, except that the names
 | 
			
		||||
in the 16-bit library start with <b>pcre16_</b> instead of <b>pcre_</b>, and the
 | 
			
		||||
names in the 32-bit library start with <b>pcre32_</b> instead of <b>pcre_</b>. To
 | 
			
		||||
avoid over-complication and reduce the documentation maintenance load, most of
 | 
			
		||||
the documentation describes the 8-bit library, with the differences for the
 | 
			
		||||
16-bit and 32-bit libraries described separately in the
 | 
			
		||||
<a href="pcre16.html"><b>pcre16</b></a>
 | 
			
		||||
and
 | 
			
		||||
<a href="pcre32.html"><b>pcre32</b></a>
 | 
			
		||||
pages. References to functions or structures of the form <i>pcre[16|32]_xxx</i>
 | 
			
		||||
should be read as meaning "<i>pcre_xxx</i> when using the 8-bit library,
 | 
			
		||||
<i>pcre16_xxx</i> when using the 16-bit library, or <i>pcre32_xxx</i> when using
 | 
			
		||||
the 32-bit library".
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The current implementation of PCRE corresponds approximately with Perl 5.12,
 | 
			
		||||
including support for UTF-8/16/32 encoded strings and Unicode general category
 | 
			
		||||
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
 | 
			
		||||
enabled; it is not the default. The Unicode tables correspond to Unicode
 | 
			
		||||
release 6.3.0.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
In addition to the Perl-compatible matching function, PCRE contains an
 | 
			
		||||
alternative function that matches the same compiled patterns in a different
 | 
			
		||||
way. In certain circumstances, the alternative function has some advantages.
 | 
			
		||||
For a discussion of the two matching algorithms, see the
 | 
			
		||||
<a href="pcrematching.html"><b>pcrematching</b></a>
 | 
			
		||||
page.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
PCRE is written in C and released as a C library. A number of people have
 | 
			
		||||
written wrappers and interfaces of various kinds. In particular, Google Inc.
 | 
			
		||||
have provided a comprehensive C++ wrapper for the 8-bit library. This is now
 | 
			
		||||
included as part of the PCRE distribution. The
 | 
			
		||||
<a href="pcrecpp.html"><b>pcrecpp</b></a>
 | 
			
		||||
page has details of this interface. Other people's contributions can be found
 | 
			
		||||
in the <i>Contrib</i> directory at the primary FTP site, which is:
 | 
			
		||||
<a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre">ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre</a>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Details of exactly which Perl regular expression features are and are not
 | 
			
		||||
supported by PCRE are given in separate documents. See the
 | 
			
		||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
 | 
			
		||||
and
 | 
			
		||||
<a href="pcrecompat.html"><b>pcrecompat</b></a>
 | 
			
		||||
pages. There is a syntax summary in the
 | 
			
		||||
<a href="pcresyntax.html"><b>pcresyntax</b></a>
 | 
			
		||||
page.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Some features of PCRE can be included, excluded, or changed when the library is
 | 
			
		||||
built. The
 | 
			
		||||
<a href="pcre_config.html"><b>pcre_config()</b></a>
 | 
			
		||||
function makes it possible for a client to discover which features are
 | 
			
		||||
available. The features themselves are described in the
 | 
			
		||||
<a href="pcrebuild.html"><b>pcrebuild</b></a>
 | 
			
		||||
page. Documentation about building PCRE for various operating systems can be
 | 
			
		||||
found in the
 | 
			
		||||
<a href="README.txt"><b>README</b></a>
 | 
			
		||||
and
 | 
			
		||||
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS_BUILD</b></a>
 | 
			
		||||
files in the source distribution.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The libraries contains a number of undocumented internal functions and data
 | 
			
		||||
tables that are used by more than one of the exported external functions, but
 | 
			
		||||
which are not intended for use by external callers. Their names all begin with
 | 
			
		||||
"_pcre_" or "_pcre16_" or "_pcre32_", which hopefully will not provoke any name
 | 
			
		||||
clashes. In some environments, it is possible to control which external symbols
 | 
			
		||||
are exported when a shared library is built, and in these cases the
 | 
			
		||||
undocumented symbols are not exported.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC2" href="#TOC1">SECURITY CONSIDERATIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
If you are using PCRE in a non-UTF application that permits users to supply
 | 
			
		||||
arbitrary patterns for compilation, you should be aware of a feature that
 | 
			
		||||
allows users to turn on UTF support from within a pattern, provided that PCRE
 | 
			
		||||
was built with UTF support. For example, an 8-bit pattern that begins with
 | 
			
		||||
"(*UTF8)" or "(*UTF)" turns on UTF-8 mode, which interprets patterns and
 | 
			
		||||
subjects as strings of UTF-8 characters instead of individual 8-bit characters.
 | 
			
		||||
This causes both the pattern and any data against which it is matched to be
 | 
			
		||||
checked for UTF-8 validity. If the data string is very long, such a check might
 | 
			
		||||
use sufficiently many resources as to cause your application to lose
 | 
			
		||||
performance.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
One way of guarding against this possibility is to use the
 | 
			
		||||
<b>pcre_fullinfo()</b> function to check the compiled pattern's options for UTF.
 | 
			
		||||
Alternatively, from release 8.33, you can set the PCRE_NEVER_UTF option at
 | 
			
		||||
compile time. This causes an compile time error if a pattern contains a
 | 
			
		||||
UTF-setting sequence.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If your application is one that supports UTF, be aware that validity checking
 | 
			
		||||
can take time. If the same data string is to be matched many times, you can use
 | 
			
		||||
the PCRE_NO_UTF[8|16|32]_CHECK option for the second and subsequent matches to
 | 
			
		||||
save redundant checks.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Another way that performance can be hit is by running a pattern that has a very
 | 
			
		||||
large search tree against a string that will never match. Nested unlimited
 | 
			
		||||
repeats in a pattern are a common example. PCRE provides some protection
 | 
			
		||||
against this: see the PCRE_EXTRA_MATCH_LIMIT feature in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC3" href="#TOC1">USER DOCUMENTATION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The user documentation for PCRE comprises a number of different sections. In
 | 
			
		||||
the "man" format, each of these is a separate "man page". In the HTML format,
 | 
			
		||||
each is a separate page, linked from the index page. In the plain text format,
 | 
			
		||||
the descriptions of the <b>pcregrep</b> and <b>pcretest</b> programs are in files
 | 
			
		||||
called <b>pcregrep.txt</b> and <b>pcretest.txt</b>, respectively. The remaining
 | 
			
		||||
sections, except for the <b>pcredemo</b> section (which is a program listing),
 | 
			
		||||
are concatenated in <b>pcre.txt</b>, for ease of searching. The sections are as
 | 
			
		||||
follows:
 | 
			
		||||
<pre>
 | 
			
		||||
  pcre              this document
 | 
			
		||||
  pcre-config       show PCRE installation configuration information
 | 
			
		||||
  pcre16            details of the 16-bit library
 | 
			
		||||
  pcre32            details of the 32-bit library
 | 
			
		||||
  pcreapi           details of PCRE's native C API
 | 
			
		||||
  pcrebuild         building PCRE
 | 
			
		||||
  pcrecallout       details of the callout feature
 | 
			
		||||
  pcrecompat        discussion of Perl compatibility
 | 
			
		||||
  pcrecpp           details of the C++ wrapper for the 8-bit library
 | 
			
		||||
  pcredemo          a demonstration C program that uses PCRE
 | 
			
		||||
  pcregrep          description of the <b>pcregrep</b> command (8-bit only)
 | 
			
		||||
  pcrejit           discussion of the just-in-time optimization support
 | 
			
		||||
  pcrelimits        details of size and other limits
 | 
			
		||||
  pcrematching      discussion of the two matching algorithms
 | 
			
		||||
  pcrepartial       details of the partial matching facility
 | 
			
		||||
  pcrepattern       syntax and semantics of supported regular expressions
 | 
			
		||||
  pcreperform       discussion of performance issues
 | 
			
		||||
  pcreposix         the POSIX-compatible C API for the 8-bit library
 | 
			
		||||
  pcreprecompile    details of saving and re-using precompiled patterns
 | 
			
		||||
  pcresample        discussion of the pcredemo program
 | 
			
		||||
  pcrestack         discussion of stack usage
 | 
			
		||||
  pcresyntax        quick syntax reference
 | 
			
		||||
  pcretest          description of the <b>pcretest</b> testing command
 | 
			
		||||
  pcreunicode       discussion of Unicode and UTF-8/16/32 support
 | 
			
		||||
</pre>
 | 
			
		||||
In the "man" and HTML formats, there is also a short page for each C library
 | 
			
		||||
function, listing its arguments and results.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC4" href="#TOC1">AUTHOR</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Philip Hazel
 | 
			
		||||
<br>
 | 
			
		||||
University Computing Service
 | 
			
		||||
<br>
 | 
			
		||||
Cambridge CB2 3QH, England.
 | 
			
		||||
<br>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Putting an actual email address here seems to have been a spam magnet, so I've
 | 
			
		||||
taken it away. If you want to email me, use my two initials, followed by the
 | 
			
		||||
two digits 10, at the domain cam.ac.uk.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 08 January 2014
 | 
			
		||||
<br>
 | 
			
		||||
Copyright © 1997-2014 University of Cambridge.
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										384
									
								
								tools/pcre/doc/html/pcre16.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										384
									
								
								tools/pcre/doc/html/pcre16.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,384 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre16 specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre16 man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<ul>
 | 
			
		||||
<li><a name="TOC1" href="#SEC1">PCRE 16-BIT API BASIC FUNCTIONS</a>
 | 
			
		||||
<li><a name="TOC2" href="#SEC2">PCRE 16-BIT API STRING EXTRACTION FUNCTIONS</a>
 | 
			
		||||
<li><a name="TOC3" href="#SEC3">PCRE 16-BIT API AUXILIARY FUNCTIONS</a>
 | 
			
		||||
<li><a name="TOC4" href="#SEC4">PCRE 16-BIT API INDIRECTED FUNCTIONS</a>
 | 
			
		||||
<li><a name="TOC5" href="#SEC5">PCRE 16-BIT API 16-BIT-ONLY FUNCTION</a>
 | 
			
		||||
<li><a name="TOC6" href="#SEC6">THE PCRE 16-BIT LIBRARY</a>
 | 
			
		||||
<li><a name="TOC7" href="#SEC7">THE HEADER FILE</a>
 | 
			
		||||
<li><a name="TOC8" href="#SEC8">THE LIBRARY NAME</a>
 | 
			
		||||
<li><a name="TOC9" href="#SEC9">STRING TYPES</a>
 | 
			
		||||
<li><a name="TOC10" href="#SEC10">STRUCTURE TYPES</a>
 | 
			
		||||
<li><a name="TOC11" href="#SEC11">16-BIT FUNCTIONS</a>
 | 
			
		||||
<li><a name="TOC12" href="#SEC12">SUBJECT STRING OFFSETS</a>
 | 
			
		||||
<li><a name="TOC13" href="#SEC13">NAMED SUBPATTERNS</a>
 | 
			
		||||
<li><a name="TOC14" href="#SEC14">OPTION NAMES</a>
 | 
			
		||||
<li><a name="TOC15" href="#SEC15">CHARACTER CODES</a>
 | 
			
		||||
<li><a name="TOC16" href="#SEC16">ERROR NAMES</a>
 | 
			
		||||
<li><a name="TOC17" href="#SEC17">ERROR TEXTS</a>
 | 
			
		||||
<li><a name="TOC18" href="#SEC18">CALLOUTS</a>
 | 
			
		||||
<li><a name="TOC19" href="#SEC19">TESTING</a>
 | 
			
		||||
<li><a name="TOC20" href="#SEC20">NOT SUPPORTED IN 16-BIT MODE</a>
 | 
			
		||||
<li><a name="TOC21" href="#SEC21">AUTHOR</a>
 | 
			
		||||
<li><a name="TOC22" href="#SEC22">REVISION</a>
 | 
			
		||||
</ul>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC1" href="#TOC1">PCRE 16-BIT API BASIC FUNCTIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
 | 
			
		||||
<b>     const unsigned char *<i>tableptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     int *<i>errorcodeptr</i>,</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
 | 
			
		||||
<b>     const unsigned char *<i>tableptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void pcre16_free_study(pcre16_extra *<i>extra</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
 | 
			
		||||
<b>     int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
 | 
			
		||||
<b>     int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
 | 
			
		||||
<b>     int *<i>workspace</i>, int <i>wscount</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC2" href="#TOC1">PCRE 16-BIT API STRING EXTRACTION FUNCTIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
 | 
			
		||||
<b>     PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
 | 
			
		||||
<b>     int <i>buffersize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 *<i>stringptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
 | 
			
		||||
<b>"     PCRE_SPTR16 <i>name</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, int <i>stringnumber</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 *<i>stringptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
 | 
			
		||||
<b>     int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void pcre16_free_substring(PCRE_SPTR16 <i>stringptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void pcre16_free_substring_list(PCRE_SPTR16 *<i>stringptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC3" href="#TOC1">PCRE 16-BIT API AUXILIARY FUNCTIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void pcre16_jit_stack_free(pcre16_jit_stack *<i>stack</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>const unsigned char *pcre16_maketables(void);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     int <i>what</i>, void *<i>where</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_refcount(pcre16 *<i>code</i>, int <i>adjust</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_config(int <i>what</i>, void *<i>where</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>const char *pcre16_version(void);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
 | 
			
		||||
<b>     pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC4" href="#TOC1">PCRE 16-BIT API INDIRECTED FUNCTIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void *(*pcre16_malloc)(size_t);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void (*pcre16_free)(void *);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void *(*pcre16_stack_malloc)(size_t);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void (*pcre16_stack_free)(void *);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int (*pcre16_callout)(pcre16_callout_block *);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC5" href="#TOC1">PCRE 16-BIT API 16-BIT-ONLY FUNCTION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
 | 
			
		||||
<b>     int <i>keep_boms</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC6" href="#TOC1">THE PCRE 16-BIT LIBRARY</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Starting with release 8.30, it is possible to compile a PCRE library that
 | 
			
		||||
supports 16-bit character strings, including UTF-16 strings, as well as or
 | 
			
		||||
instead of the original 8-bit library. The majority of the work to make this
 | 
			
		||||
possible was done by Zoltan Herczeg. The two libraries contain identical sets
 | 
			
		||||
of functions, used in exactly the same way. Only the names of the functions and
 | 
			
		||||
the data types of their arguments and results are different. To avoid
 | 
			
		||||
over-complication and reduce the documentation maintenance load, most of the
 | 
			
		||||
PCRE documentation describes the 8-bit library, with only occasional references
 | 
			
		||||
to the 16-bit library. This page describes what is different when you use the
 | 
			
		||||
16-bit library.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
WARNING: A single application can be linked with both libraries, but you must
 | 
			
		||||
take care when processing any particular pattern to use functions from just one
 | 
			
		||||
library. For example, if you want to study a pattern that was compiled with
 | 
			
		||||
<b>pcre16_compile()</b>, you must do so with <b>pcre16_study()</b>, not
 | 
			
		||||
<b>pcre_study()</b>, and you must free the study data with
 | 
			
		||||
<b>pcre16_free_study()</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC7" href="#TOC1">THE HEADER FILE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
There is only one header file, <b>pcre.h</b>. It contains prototypes for all the
 | 
			
		||||
functions in all libraries, as well as definitions of flags, structures, error
 | 
			
		||||
codes, etc.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC8" href="#TOC1">THE LIBRARY NAME</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
In Unix-like systems, the 16-bit library is called <b>libpcre16</b>, and can
 | 
			
		||||
normally be accesss by adding <b>-lpcre16</b> to the command for linking an
 | 
			
		||||
application that uses PCRE.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC9" href="#TOC1">STRING TYPES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
In the 8-bit library, strings are passed to PCRE library functions as vectors
 | 
			
		||||
of bytes with the C type "char *". In the 16-bit library, strings are passed as
 | 
			
		||||
vectors of unsigned 16-bit quantities. The macro PCRE_UCHAR16 specifies an
 | 
			
		||||
appropriate data type, and PCRE_SPTR16 is defined as "const PCRE_UCHAR16 *". In
 | 
			
		||||
very many environments, "short int" is a 16-bit data type. When PCRE is built,
 | 
			
		||||
it defines PCRE_UCHAR16 as "unsigned short int", but checks that it really is a
 | 
			
		||||
16-bit data type. If it is not, the build fails with an error message telling
 | 
			
		||||
the maintainer to modify the definition appropriately.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC10" href="#TOC1">STRUCTURE TYPES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The types of the opaque structures that are used for compiled 16-bit patterns
 | 
			
		||||
and JIT stacks are <b>pcre16</b> and <b>pcre16_jit_stack</b> respectively. The
 | 
			
		||||
type of the user-accessible structure that is returned by <b>pcre16_study()</b>
 | 
			
		||||
is <b>pcre16_extra</b>, and the type of the structure that is used for passing
 | 
			
		||||
data to a callout function is <b>pcre16_callout_block</b>. These structures
 | 
			
		||||
contain the same fields, with the same names, as their 8-bit counterparts. The
 | 
			
		||||
only difference is that pointers to character strings are 16-bit instead of
 | 
			
		||||
8-bit types.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC11" href="#TOC1">16-BIT FUNCTIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
For every function in the 8-bit library there is a corresponding function in
 | 
			
		||||
the 16-bit library with a name that starts with <b>pcre16_</b> instead of
 | 
			
		||||
<b>pcre_</b>. The prototypes are listed above. In addition, there is one extra
 | 
			
		||||
function, <b>pcre16_utf16_to_host_byte_order()</b>. This is a utility function
 | 
			
		||||
that converts a UTF-16 character string to host byte order if necessary. The
 | 
			
		||||
other 16-bit functions expect the strings they are passed to be in host byte
 | 
			
		||||
order.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>input</i> and <i>output</i> arguments of
 | 
			
		||||
<b>pcre16_utf16_to_host_byte_order()</b> may point to the same address, that is,
 | 
			
		||||
conversion in place is supported. The output buffer must be at least as long as
 | 
			
		||||
the input.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>length</i> argument specifies the number of 16-bit data units in the
 | 
			
		||||
input string; a negative value specifies a zero-terminated string.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If <i>byte_order</i> is NULL, it is assumed that the string starts off in host
 | 
			
		||||
byte order. This may be changed by byte-order marks (BOMs) anywhere in the
 | 
			
		||||
string (commonly as the first character).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If <i>byte_order</i> is not NULL, a non-zero value of the integer to which it
 | 
			
		||||
points means that the input starts off in host byte order, otherwise the
 | 
			
		||||
opposite order is assumed. Again, BOMs in the string can change this. The final
 | 
			
		||||
byte order is passed back at the end of processing.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If <i>keep_boms</i> is not zero, byte-order mark characters (0xfeff) are copied
 | 
			
		||||
into the output string. Otherwise they are discarded.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The result of the function is the number of 16-bit units placed into the output
 | 
			
		||||
buffer, including the zero terminator if the string was zero-terminated.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC12" href="#TOC1">SUBJECT STRING OFFSETS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The lengths and starting offsets of subject strings must be specified in 16-bit
 | 
			
		||||
data units, and the offsets within subject strings that are returned by the
 | 
			
		||||
matching functions are in also 16-bit units rather than bytes.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC13" href="#TOC1">NAMED SUBPATTERNS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The name-to-number translation table that is maintained for named subpatterns
 | 
			
		||||
uses 16-bit characters. The <b>pcre16_get_stringtable_entries()</b> function
 | 
			
		||||
returns the length of each entry in the table as the number of 16-bit data
 | 
			
		||||
units.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC14" href="#TOC1">OPTION NAMES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
There are two new general option names, PCRE_UTF16 and PCRE_NO_UTF16_CHECK,
 | 
			
		||||
which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In
 | 
			
		||||
fact, these new options define the same bits in the options word. There is a
 | 
			
		||||
discussion about the
 | 
			
		||||
<a href="pcreunicode.html#utf16strings">validity of UTF-16 strings</a>
 | 
			
		||||
in the
 | 
			
		||||
<a href="pcreunicode.html"><b>pcreunicode</b></a>
 | 
			
		||||
page.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
For the <b>pcre16_config()</b> function there is an option PCRE_CONFIG_UTF16
 | 
			
		||||
that returns 1 if UTF-16 support is configured, otherwise 0. If this option is
 | 
			
		||||
given to <b>pcre_config()</b> or <b>pcre32_config()</b>, or if the
 | 
			
		||||
PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 option is given to <b>pcre16_config()</b>,
 | 
			
		||||
the result is the PCRE_ERROR_BADOPTION error.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC15" href="#TOC1">CHARACTER CODES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
In 16-bit mode, when PCRE_UTF16 is not set, character values are treated in the
 | 
			
		||||
same way as in 8-bit, non UTF-8 mode, except, of course, that they can range
 | 
			
		||||
from 0 to 0xffff instead of 0 to 0xff. Character types for characters less than
 | 
			
		||||
0xff can therefore be influenced by the locale in the same way as before.
 | 
			
		||||
Characters greater than 0xff have only one case, and no "type" (such as letter
 | 
			
		||||
or digit).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
In UTF-16 mode, the character code is Unicode, in the range 0 to 0x10ffff, with
 | 
			
		||||
the exception of values in the range 0xd800 to 0xdfff because those are
 | 
			
		||||
"surrogate" values that are used in pairs to encode values greater than 0xffff.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
A UTF-16 string can indicate its endianness by special code knows as a
 | 
			
		||||
byte-order mark (BOM). The PCRE functions do not handle this, expecting strings
 | 
			
		||||
to be in host byte order. A utility function called
 | 
			
		||||
<b>pcre16_utf16_to_host_byte_order()</b> is provided to help with this (see
 | 
			
		||||
above).
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC16" href="#TOC1">ERROR NAMES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The errors PCRE_ERROR_BADUTF16_OFFSET and PCRE_ERROR_SHORTUTF16 correspond to
 | 
			
		||||
their 8-bit counterparts. The error PCRE_ERROR_BADMODE is given when a compiled
 | 
			
		||||
pattern is passed to a function that processes patterns in the other
 | 
			
		||||
mode, for example, if a pattern compiled with <b>pcre_compile()</b> is passed to
 | 
			
		||||
<b>pcre16_exec()</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There are new error codes whose names begin with PCRE_UTF16_ERR for invalid
 | 
			
		||||
UTF-16 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that
 | 
			
		||||
are described in the section entitled
 | 
			
		||||
<a href="pcreapi.html#badutf8reasons">"Reason codes for invalid UTF-8 strings"</a>
 | 
			
		||||
in the main
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page. The UTF-16 errors are:
 | 
			
		||||
<pre>
 | 
			
		||||
  PCRE_UTF16_ERR1  Missing low surrogate at end of string
 | 
			
		||||
  PCRE_UTF16_ERR2  Invalid low surrogate follows high surrogate
 | 
			
		||||
  PCRE_UTF16_ERR3  Isolated low surrogate
 | 
			
		||||
  PCRE_UTF16_ERR4  Non-character
 | 
			
		||||
</PRE>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC17" href="#TOC1">ERROR TEXTS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
If there is an error while compiling a pattern, the error text that is passed
 | 
			
		||||
back by <b>pcre16_compile()</b> or <b>pcre16_compile2()</b> is still an 8-bit
 | 
			
		||||
character string, zero-terminated.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC18" href="#TOC1">CALLOUTS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>subject</i> and <i>mark</i> fields in the callout block that is passed to
 | 
			
		||||
a callout function point to 16-bit vectors.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC19" href="#TOC1">TESTING</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The <b>pcretest</b> program continues to operate with 8-bit input and output
 | 
			
		||||
files, but it can be used for testing the 16-bit library. If it is run with the
 | 
			
		||||
command line option <b>-16</b>, patterns and subject strings are converted from
 | 
			
		||||
8-bit to 16-bit before being passed to PCRE, and the 16-bit library functions
 | 
			
		||||
are used instead of the 8-bit ones. Returned 16-bit strings are converted to
 | 
			
		||||
8-bit for output. If both the 8-bit and the 32-bit libraries were not compiled,
 | 
			
		||||
<b>pcretest</b> defaults to 16-bit and the <b>-16</b> option is ignored.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
When PCRE is being built, the <b>RunTest</b> script that is called by "make
 | 
			
		||||
check" uses the <b>pcretest</b> <b>-C</b> option to discover which of the 8-bit,
 | 
			
		||||
16-bit and 32-bit libraries has been built, and runs the tests appropriately.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC20" href="#TOC1">NOT SUPPORTED IN 16-BIT MODE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Not all the features of the 8-bit library are available with the 16-bit
 | 
			
		||||
library. The C++ and POSIX wrapper functions support only the 8-bit library,
 | 
			
		||||
and the <b>pcregrep</b> program is at present 8-bit only.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC21" href="#TOC1">AUTHOR</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Philip Hazel
 | 
			
		||||
<br>
 | 
			
		||||
University Computing Service
 | 
			
		||||
<br>
 | 
			
		||||
Cambridge CB2 3QH, England.
 | 
			
		||||
<br>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 12 May 2013
 | 
			
		||||
<br>
 | 
			
		||||
Copyright © 1997-2013 University of Cambridge.
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										382
									
								
								tools/pcre/doc/html/pcre32.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										382
									
								
								tools/pcre/doc/html/pcre32.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,382 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre32 specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre32 man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<ul>
 | 
			
		||||
<li><a name="TOC1" href="#SEC1">PCRE 32-BIT API BASIC FUNCTIONS</a>
 | 
			
		||||
<li><a name="TOC2" href="#SEC2">PCRE 32-BIT API STRING EXTRACTION FUNCTIONS</a>
 | 
			
		||||
<li><a name="TOC3" href="#SEC3">PCRE 32-BIT API AUXILIARY FUNCTIONS</a>
 | 
			
		||||
<li><a name="TOC4" href="#SEC4">PCRE 32-BIT API INDIRECTED FUNCTIONS</a>
 | 
			
		||||
<li><a name="TOC5" href="#SEC5">PCRE 32-BIT API 32-BIT-ONLY FUNCTION</a>
 | 
			
		||||
<li><a name="TOC6" href="#SEC6">THE PCRE 32-BIT LIBRARY</a>
 | 
			
		||||
<li><a name="TOC7" href="#SEC7">THE HEADER FILE</a>
 | 
			
		||||
<li><a name="TOC8" href="#SEC8">THE LIBRARY NAME</a>
 | 
			
		||||
<li><a name="TOC9" href="#SEC9">STRING TYPES</a>
 | 
			
		||||
<li><a name="TOC10" href="#SEC10">STRUCTURE TYPES</a>
 | 
			
		||||
<li><a name="TOC11" href="#SEC11">32-BIT FUNCTIONS</a>
 | 
			
		||||
<li><a name="TOC12" href="#SEC12">SUBJECT STRING OFFSETS</a>
 | 
			
		||||
<li><a name="TOC13" href="#SEC13">NAMED SUBPATTERNS</a>
 | 
			
		||||
<li><a name="TOC14" href="#SEC14">OPTION NAMES</a>
 | 
			
		||||
<li><a name="TOC15" href="#SEC15">CHARACTER CODES</a>
 | 
			
		||||
<li><a name="TOC16" href="#SEC16">ERROR NAMES</a>
 | 
			
		||||
<li><a name="TOC17" href="#SEC17">ERROR TEXTS</a>
 | 
			
		||||
<li><a name="TOC18" href="#SEC18">CALLOUTS</a>
 | 
			
		||||
<li><a name="TOC19" href="#SEC19">TESTING</a>
 | 
			
		||||
<li><a name="TOC20" href="#SEC20">NOT SUPPORTED IN 32-BIT MODE</a>
 | 
			
		||||
<li><a name="TOC21" href="#SEC21">AUTHOR</a>
 | 
			
		||||
<li><a name="TOC22" href="#SEC22">REVISION</a>
 | 
			
		||||
</ul>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC1" href="#TOC1">PCRE 32-BIT API BASIC FUNCTIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
 | 
			
		||||
<b>     const unsigned char *<i>tableptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     int *<i>errorcodeptr</i>,</b>
 | 
			
		||||
<b>     const unsigned char *<i>tableptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void pcre32_free_study(pcre32_extra *<i>extra</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
 | 
			
		||||
<b>     int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
 | 
			
		||||
<b>     int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
 | 
			
		||||
<b>     int *<i>workspace</i>, int <i>wscount</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC2" href="#TOC1">PCRE 32-BIT API STRING EXTRACTION FUNCTIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
 | 
			
		||||
<b>     PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
 | 
			
		||||
<b>     int <i>buffersize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 *<i>stringptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>name</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, int <i>stringnumber</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 *<i>stringptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
 | 
			
		||||
<b>     int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void pcre32_free_substring(PCRE_SPTR32 <i>stringptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void pcre32_free_substring_list(PCRE_SPTR32 *<i>stringptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC3" href="#TOC1">PCRE 32-BIT API AUXILIARY FUNCTIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void pcre32_jit_stack_free(pcre32_jit_stack *<i>stack</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>const unsigned char *pcre32_maketables(void);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     int <i>what</i>, void *<i>where</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_refcount(pcre32 *<i>code</i>, int <i>adjust</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_config(int <i>what</i>, void *<i>where</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>const char *pcre32_version(void);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
 | 
			
		||||
<b>     pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC4" href="#TOC1">PCRE 32-BIT API INDIRECTED FUNCTIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void *(*pcre32_malloc)(size_t);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void (*pcre32_free)(void *);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void *(*pcre32_stack_malloc)(size_t);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void (*pcre32_stack_free)(void *);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int (*pcre32_callout)(pcre32_callout_block *);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC5" href="#TOC1">PCRE 32-BIT API 32-BIT-ONLY FUNCTION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
 | 
			
		||||
<b>     int <i>keep_boms</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC6" href="#TOC1">THE PCRE 32-BIT LIBRARY</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Starting with release 8.32, it is possible to compile a PCRE library that
 | 
			
		||||
supports 32-bit character strings, including UTF-32 strings, as well as or
 | 
			
		||||
instead of the original 8-bit library. This work was done by Christian Persch,
 | 
			
		||||
based on the work done by Zoltan Herczeg for the 16-bit library. All three
 | 
			
		||||
libraries contain identical sets of functions, used in exactly the same way.
 | 
			
		||||
Only the names of the functions and the data types of their arguments and
 | 
			
		||||
results are different. To avoid over-complication and reduce the documentation
 | 
			
		||||
maintenance load, most of the PCRE documentation describes the 8-bit library,
 | 
			
		||||
with only occasional references to the 16-bit and 32-bit libraries. This page
 | 
			
		||||
describes what is different when you use the 32-bit library.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
WARNING: A single application can be linked with all or any of the three
 | 
			
		||||
libraries, but you must take care when processing any particular pattern
 | 
			
		||||
to use functions from just one library. For example, if you want to study
 | 
			
		||||
a pattern that was compiled with <b>pcre32_compile()</b>, you must do so
 | 
			
		||||
with <b>pcre32_study()</b>, not <b>pcre_study()</b>, and you must free the
 | 
			
		||||
study data with <b>pcre32_free_study()</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC7" href="#TOC1">THE HEADER FILE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
There is only one header file, <b>pcre.h</b>. It contains prototypes for all the
 | 
			
		||||
functions in all libraries, as well as definitions of flags, structures, error
 | 
			
		||||
codes, etc.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC8" href="#TOC1">THE LIBRARY NAME</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
In Unix-like systems, the 32-bit library is called <b>libpcre32</b>, and can
 | 
			
		||||
normally be accesss by adding <b>-lpcre32</b> to the command for linking an
 | 
			
		||||
application that uses PCRE.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC9" href="#TOC1">STRING TYPES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
In the 8-bit library, strings are passed to PCRE library functions as vectors
 | 
			
		||||
of bytes with the C type "char *". In the 32-bit library, strings are passed as
 | 
			
		||||
vectors of unsigned 32-bit quantities. The macro PCRE_UCHAR32 specifies an
 | 
			
		||||
appropriate data type, and PCRE_SPTR32 is defined as "const PCRE_UCHAR32 *". In
 | 
			
		||||
very many environments, "unsigned int" is a 32-bit data type. When PCRE is
 | 
			
		||||
built, it defines PCRE_UCHAR32 as "unsigned int", but checks that it really is
 | 
			
		||||
a 32-bit data type. If it is not, the build fails with an error message telling
 | 
			
		||||
the maintainer to modify the definition appropriately.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC10" href="#TOC1">STRUCTURE TYPES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The types of the opaque structures that are used for compiled 32-bit patterns
 | 
			
		||||
and JIT stacks are <b>pcre32</b> and <b>pcre32_jit_stack</b> respectively. The
 | 
			
		||||
type of the user-accessible structure that is returned by <b>pcre32_study()</b>
 | 
			
		||||
is <b>pcre32_extra</b>, and the type of the structure that is used for passing
 | 
			
		||||
data to a callout function is <b>pcre32_callout_block</b>. These structures
 | 
			
		||||
contain the same fields, with the same names, as their 8-bit counterparts. The
 | 
			
		||||
only difference is that pointers to character strings are 32-bit instead of
 | 
			
		||||
8-bit types.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC11" href="#TOC1">32-BIT FUNCTIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
For every function in the 8-bit library there is a corresponding function in
 | 
			
		||||
the 32-bit library with a name that starts with <b>pcre32_</b> instead of
 | 
			
		||||
<b>pcre_</b>. The prototypes are listed above. In addition, there is one extra
 | 
			
		||||
function, <b>pcre32_utf32_to_host_byte_order()</b>. This is a utility function
 | 
			
		||||
that converts a UTF-32 character string to host byte order if necessary. The
 | 
			
		||||
other 32-bit functions expect the strings they are passed to be in host byte
 | 
			
		||||
order.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>input</i> and <i>output</i> arguments of
 | 
			
		||||
<b>pcre32_utf32_to_host_byte_order()</b> may point to the same address, that is,
 | 
			
		||||
conversion in place is supported. The output buffer must be at least as long as
 | 
			
		||||
the input.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>length</i> argument specifies the number of 32-bit data units in the
 | 
			
		||||
input string; a negative value specifies a zero-terminated string.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If <i>byte_order</i> is NULL, it is assumed that the string starts off in host
 | 
			
		||||
byte order. This may be changed by byte-order marks (BOMs) anywhere in the
 | 
			
		||||
string (commonly as the first character).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If <i>byte_order</i> is not NULL, a non-zero value of the integer to which it
 | 
			
		||||
points means that the input starts off in host byte order, otherwise the
 | 
			
		||||
opposite order is assumed. Again, BOMs in the string can change this. The final
 | 
			
		||||
byte order is passed back at the end of processing.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If <i>keep_boms</i> is not zero, byte-order mark characters (0xfeff) are copied
 | 
			
		||||
into the output string. Otherwise they are discarded.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The result of the function is the number of 32-bit units placed into the output
 | 
			
		||||
buffer, including the zero terminator if the string was zero-terminated.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC12" href="#TOC1">SUBJECT STRING OFFSETS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The lengths and starting offsets of subject strings must be specified in 32-bit
 | 
			
		||||
data units, and the offsets within subject strings that are returned by the
 | 
			
		||||
matching functions are in also 32-bit units rather than bytes.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC13" href="#TOC1">NAMED SUBPATTERNS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The name-to-number translation table that is maintained for named subpatterns
 | 
			
		||||
uses 32-bit characters. The <b>pcre32_get_stringtable_entries()</b> function
 | 
			
		||||
returns the length of each entry in the table as the number of 32-bit data
 | 
			
		||||
units.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC14" href="#TOC1">OPTION NAMES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
There are two new general option names, PCRE_UTF32 and PCRE_NO_UTF32_CHECK,
 | 
			
		||||
which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In
 | 
			
		||||
fact, these new options define the same bits in the options word. There is a
 | 
			
		||||
discussion about the
 | 
			
		||||
<a href="pcreunicode.html#utf32strings">validity of UTF-32 strings</a>
 | 
			
		||||
in the
 | 
			
		||||
<a href="pcreunicode.html"><b>pcreunicode</b></a>
 | 
			
		||||
page.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
For the <b>pcre32_config()</b> function there is an option PCRE_CONFIG_UTF32
 | 
			
		||||
that returns 1 if UTF-32 support is configured, otherwise 0. If this option is
 | 
			
		||||
given to <b>pcre_config()</b> or <b>pcre16_config()</b>, or if the
 | 
			
		||||
PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 option is given to <b>pcre32_config()</b>,
 | 
			
		||||
the result is the PCRE_ERROR_BADOPTION error.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC15" href="#TOC1">CHARACTER CODES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
In 32-bit mode, when PCRE_UTF32 is not set, character values are treated in the
 | 
			
		||||
same way as in 8-bit, non UTF-8 mode, except, of course, that they can range
 | 
			
		||||
from 0 to 0x7fffffff instead of 0 to 0xff. Character types for characters less
 | 
			
		||||
than 0xff can therefore be influenced by the locale in the same way as before.
 | 
			
		||||
Characters greater than 0xff have only one case, and no "type" (such as letter
 | 
			
		||||
or digit).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
In UTF-32 mode, the character code is Unicode, in the range 0 to 0x10ffff, with
 | 
			
		||||
the exception of values in the range 0xd800 to 0xdfff because those are
 | 
			
		||||
"surrogate" values that are ill-formed in UTF-32.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
A UTF-32 string can indicate its endianness by special code knows as a
 | 
			
		||||
byte-order mark (BOM). The PCRE functions do not handle this, expecting strings
 | 
			
		||||
to be in host byte order. A utility function called
 | 
			
		||||
<b>pcre32_utf32_to_host_byte_order()</b> is provided to help with this (see
 | 
			
		||||
above).
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC16" href="#TOC1">ERROR NAMES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The error PCRE_ERROR_BADUTF32 corresponds to its 8-bit counterpart.
 | 
			
		||||
The error PCRE_ERROR_BADMODE is given when a compiled
 | 
			
		||||
pattern is passed to a function that processes patterns in the other
 | 
			
		||||
mode, for example, if a pattern compiled with <b>pcre_compile()</b> is passed to
 | 
			
		||||
<b>pcre32_exec()</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There are new error codes whose names begin with PCRE_UTF32_ERR for invalid
 | 
			
		||||
UTF-32 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that
 | 
			
		||||
are described in the section entitled
 | 
			
		||||
<a href="pcreapi.html#badutf8reasons">"Reason codes for invalid UTF-8 strings"</a>
 | 
			
		||||
in the main
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page. The UTF-32 errors are:
 | 
			
		||||
<pre>
 | 
			
		||||
  PCRE_UTF32_ERR1  Surrogate character (range from 0xd800 to 0xdfff)
 | 
			
		||||
  PCRE_UTF32_ERR2  Non-character
 | 
			
		||||
  PCRE_UTF32_ERR3  Character > 0x10ffff
 | 
			
		||||
</PRE>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC17" href="#TOC1">ERROR TEXTS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
If there is an error while compiling a pattern, the error text that is passed
 | 
			
		||||
back by <b>pcre32_compile()</b> or <b>pcre32_compile2()</b> is still an 8-bit
 | 
			
		||||
character string, zero-terminated.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC18" href="#TOC1">CALLOUTS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>subject</i> and <i>mark</i> fields in the callout block that is passed to
 | 
			
		||||
a callout function point to 32-bit vectors.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC19" href="#TOC1">TESTING</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The <b>pcretest</b> program continues to operate with 8-bit input and output
 | 
			
		||||
files, but it can be used for testing the 32-bit library. If it is run with the
 | 
			
		||||
command line option <b>-32</b>, patterns and subject strings are converted from
 | 
			
		||||
8-bit to 32-bit before being passed to PCRE, and the 32-bit library functions
 | 
			
		||||
are used instead of the 8-bit ones. Returned 32-bit strings are converted to
 | 
			
		||||
8-bit for output. If both the 8-bit and the 16-bit libraries were not compiled,
 | 
			
		||||
<b>pcretest</b> defaults to 32-bit and the <b>-32</b> option is ignored.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
When PCRE is being built, the <b>RunTest</b> script that is called by "make
 | 
			
		||||
check" uses the <b>pcretest</b> <b>-C</b> option to discover which of the 8-bit,
 | 
			
		||||
16-bit and 32-bit libraries has been built, and runs the tests appropriately.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC20" href="#TOC1">NOT SUPPORTED IN 32-BIT MODE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Not all the features of the 8-bit library are available with the 32-bit
 | 
			
		||||
library. The C++ and POSIX wrapper functions support only the 8-bit library,
 | 
			
		||||
and the <b>pcregrep</b> program is at present 8-bit only.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC21" href="#TOC1">AUTHOR</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Philip Hazel
 | 
			
		||||
<br>
 | 
			
		||||
University Computing Service
 | 
			
		||||
<br>
 | 
			
		||||
Cambridge CB2 3QH, England.
 | 
			
		||||
<br>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 12 May 2013
 | 
			
		||||
<br>
 | 
			
		||||
Copyright © 1997-2013 University of Cambridge.
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										76
									
								
								tools/pcre/doc/html/pcre_assign_jit_stack.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								tools/pcre/doc/html/pcre_assign_jit_stack.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,76 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_assign_jit_stack specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_assign_jit_stack man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function provides control over the memory used as a stack at run-time by a
 | 
			
		||||
call to <b>pcre[16|32]_exec()</b> with a pattern that has been successfully
 | 
			
		||||
compiled with JIT optimization. The arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  extra     the data pointer returned by <b>pcre[16|32]_study()</b>
 | 
			
		||||
  callback  a callback function
 | 
			
		||||
  data      a JIT stack or a value to be passed to the callback
 | 
			
		||||
              function
 | 
			
		||||
</PRE>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32K block on
 | 
			
		||||
the machine stack is used.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If <i>callback</i> is NULL and <i>data</i> is not NULL, <i>data</i> must
 | 
			
		||||
be a valid JIT stack, the result of calling <b>pcre[16|32]_jit_stack_alloc()</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If <i>callback</i> not NULL, it is called with <i>data</i> as an argument at
 | 
			
		||||
the start of matching, in order to set up a JIT stack. If the result is NULL,
 | 
			
		||||
the internal 32K stack is used; otherwise the return value must be a valid JIT
 | 
			
		||||
stack, the result of calling <b>pcre[16|32]_jit_stack_alloc()</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
You may safely assign the same JIT stack to multiple patterns, as long as they
 | 
			
		||||
are all matched in the same thread. In a multithread application, each thread
 | 
			
		||||
must use its own JIT stack. For more details, see the
 | 
			
		||||
<a href="pcrejit.html"><b>pcrejit</b></a>
 | 
			
		||||
page.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										111
									
								
								tools/pcre/doc/html/pcre_compile.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								tools/pcre/doc/html/pcre_compile.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,111 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_compile specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_compile man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
 | 
			
		||||
<b>     const unsigned char *<i>tableptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
 | 
			
		||||
<b>     const unsigned char *<i>tableptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
 | 
			
		||||
<b>     const unsigned char *<i>tableptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function compiles a regular expression into an internal form. It is the
 | 
			
		||||
same as <b>pcre[16|32]_compile2()</b>, except for the absence of the
 | 
			
		||||
<i>errorcodeptr</i> argument. Its arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>pattern</i>       A zero-terminated string containing the
 | 
			
		||||
                  regular expression to be compiled
 | 
			
		||||
  <i>options</i>       Zero or more option bits
 | 
			
		||||
  <i>errptr</i>        Where to put an error message
 | 
			
		||||
  <i>erroffset</i>     Offset in pattern where error was found
 | 
			
		||||
  <i>tableptr</i>      Pointer to character tables, or NULL to
 | 
			
		||||
                  use the built-in default
 | 
			
		||||
</pre>
 | 
			
		||||
The option bits are:
 | 
			
		||||
<pre>
 | 
			
		||||
  PCRE_ANCHORED           Force pattern anchoring
 | 
			
		||||
  PCRE_AUTO_CALLOUT       Compile automatic callouts
 | 
			
		||||
  PCRE_BSR_ANYCRLF        \R matches only CR, LF, or CRLF
 | 
			
		||||
  PCRE_BSR_UNICODE        \R matches all Unicode line endings
 | 
			
		||||
  PCRE_CASELESS           Do caseless matching
 | 
			
		||||
  PCRE_DOLLAR_ENDONLY     $ not to match newline at end
 | 
			
		||||
  PCRE_DOTALL             . matches anything including NL
 | 
			
		||||
  PCRE_DUPNAMES           Allow duplicate names for subpatterns
 | 
			
		||||
  PCRE_EXTENDED           Ignore white space and # comments
 | 
			
		||||
  PCRE_EXTRA              PCRE extra features
 | 
			
		||||
                            (not much use currently)
 | 
			
		||||
  PCRE_FIRSTLINE          Force matching to be before newline
 | 
			
		||||
  PCRE_JAVASCRIPT_COMPAT  JavaScript compatibility
 | 
			
		||||
  PCRE_MULTILINE          ^ and $ match newlines within data
 | 
			
		||||
  PCRE_NEVER_UTF          Lock out UTF, e.g. via (*UTF)
 | 
			
		||||
  PCRE_NEWLINE_ANY        Recognize any Unicode newline sequence
 | 
			
		||||
  PCRE_NEWLINE_ANYCRLF    Recognize CR, LF, and CRLF as newline
 | 
			
		||||
                            sequences
 | 
			
		||||
  PCRE_NEWLINE_CR         Set CR as the newline sequence
 | 
			
		||||
  PCRE_NEWLINE_CRLF       Set CRLF as the newline sequence
 | 
			
		||||
  PCRE_NEWLINE_LF         Set LF as the newline sequence
 | 
			
		||||
  PCRE_NO_AUTO_CAPTURE    Disable numbered capturing paren-
 | 
			
		||||
                            theses (named ones available)
 | 
			
		||||
  PCRE_NO_AUTO_POSSESS    Disable auto-possessification
 | 
			
		||||
  PCRE_NO_START_OPTIMIZE  Disable match-time start optimizations
 | 
			
		||||
  PCRE_NO_UTF16_CHECK     Do not check the pattern for UTF-16
 | 
			
		||||
                            validity (only relevant if
 | 
			
		||||
                            PCRE_UTF16 is set)
 | 
			
		||||
  PCRE_NO_UTF32_CHECK     Do not check the pattern for UTF-32
 | 
			
		||||
                            validity (only relevant if
 | 
			
		||||
                            PCRE_UTF32 is set)
 | 
			
		||||
  PCRE_NO_UTF8_CHECK      Do not check the pattern for UTF-8
 | 
			
		||||
                            validity (only relevant if
 | 
			
		||||
                            PCRE_UTF8 is set)
 | 
			
		||||
  PCRE_UCP                Use Unicode properties for \d, \w, etc.
 | 
			
		||||
  PCRE_UNGREEDY           Invert greediness of quantifiers
 | 
			
		||||
  PCRE_UTF16              Run in <b>pcre16_compile()</b> UTF-16 mode
 | 
			
		||||
  PCRE_UTF32              Run in <b>pcre32_compile()</b> UTF-32 mode
 | 
			
		||||
  PCRE_UTF8               Run in <b>pcre_compile()</b> UTF-8 mode
 | 
			
		||||
</pre>
 | 
			
		||||
PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and
 | 
			
		||||
PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The yield of the function is a pointer to a private data structure that
 | 
			
		||||
contains the compiled pattern, or NULL if an error was detected. Note that
 | 
			
		||||
compiling regular expressions with one version of PCRE for use with a different
 | 
			
		||||
version is not guaranteed to work and may cause crashes.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										115
									
								
								tools/pcre/doc/html/pcre_compile2.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								tools/pcre/doc/html/pcre_compile2.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,115 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_compile2 specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_compile2 man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     int *<i>errorcodeptr</i>,</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
 | 
			
		||||
<b>     const unsigned char *<i>tableptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     int *<i>errorcodeptr</i>,</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
 | 
			
		||||
<b>     const unsigned char *<i>tableptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>"     int *<i>errorcodeptr</i>,£</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
 | 
			
		||||
<b>     const unsigned char *<i>tableptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function compiles a regular expression into an internal form. It is the
 | 
			
		||||
same as <b>pcre[16|32]_compile()</b>, except for the addition of the
 | 
			
		||||
<i>errorcodeptr</i> argument. The arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>pattern</i>       A zero-terminated string containing the
 | 
			
		||||
                  regular expression to be compiled
 | 
			
		||||
  <i>options</i>       Zero or more option bits
 | 
			
		||||
  <i>errorcodeptr</i>  Where to put an error code
 | 
			
		||||
  <i>errptr</i>        Where to put an error message
 | 
			
		||||
  <i>erroffset</i>     Offset in pattern where error was found
 | 
			
		||||
  <i>tableptr</i>      Pointer to character tables, or NULL to
 | 
			
		||||
                  use the built-in default
 | 
			
		||||
</pre>
 | 
			
		||||
The option bits are:
 | 
			
		||||
<pre>
 | 
			
		||||
  PCRE_ANCHORED           Force pattern anchoring
 | 
			
		||||
  PCRE_AUTO_CALLOUT       Compile automatic callouts
 | 
			
		||||
  PCRE_BSR_ANYCRLF        \R matches only CR, LF, or CRLF
 | 
			
		||||
  PCRE_BSR_UNICODE        \R matches all Unicode line endings
 | 
			
		||||
  PCRE_CASELESS           Do caseless matching
 | 
			
		||||
  PCRE_DOLLAR_ENDONLY     $ not to match newline at end
 | 
			
		||||
  PCRE_DOTALL             . matches anything including NL
 | 
			
		||||
  PCRE_DUPNAMES           Allow duplicate names for subpatterns
 | 
			
		||||
  PCRE_EXTENDED           Ignore white space and # comments
 | 
			
		||||
  PCRE_EXTRA              PCRE extra features
 | 
			
		||||
                            (not much use currently)
 | 
			
		||||
  PCRE_FIRSTLINE          Force matching to be before newline
 | 
			
		||||
  PCRE_JAVASCRIPT_COMPAT  JavaScript compatibility
 | 
			
		||||
  PCRE_MULTILINE          ^ and $ match newlines within data
 | 
			
		||||
  PCRE_NEVER_UTF          Lock out UTF, e.g. via (*UTF)
 | 
			
		||||
  PCRE_NEWLINE_ANY        Recognize any Unicode newline sequence
 | 
			
		||||
  PCRE_NEWLINE_ANYCRLF    Recognize CR, LF, and CRLF as newline
 | 
			
		||||
                            sequences
 | 
			
		||||
  PCRE_NEWLINE_CR         Set CR as the newline sequence
 | 
			
		||||
  PCRE_NEWLINE_CRLF       Set CRLF as the newline sequence
 | 
			
		||||
  PCRE_NEWLINE_LF         Set LF as the newline sequence
 | 
			
		||||
  PCRE_NO_AUTO_CAPTURE    Disable numbered capturing paren-
 | 
			
		||||
                            theses (named ones available)
 | 
			
		||||
  PCRE_NO_AUTO_POSSESS    Disable auto-possessification
 | 
			
		||||
  PCRE_NO_START_OPTIMIZE  Disable match-time start optimizations
 | 
			
		||||
  PCRE_NO_UTF16_CHECK     Do not check the pattern for UTF-16
 | 
			
		||||
                            validity (only relevant if
 | 
			
		||||
                            PCRE_UTF16 is set)
 | 
			
		||||
  PCRE_NO_UTF32_CHECK     Do not check the pattern for UTF-32
 | 
			
		||||
                            validity (only relevant if
 | 
			
		||||
                            PCRE_UTF32 is set)
 | 
			
		||||
  PCRE_NO_UTF8_CHECK      Do not check the pattern for UTF-8
 | 
			
		||||
                            validity (only relevant if
 | 
			
		||||
                            PCRE_UTF8 is set)
 | 
			
		||||
  PCRE_UCP                Use Unicode properties for \d, \w, etc.
 | 
			
		||||
  PCRE_UNGREEDY           Invert greediness of quantifiers
 | 
			
		||||
  PCRE_UTF16              Run <b>pcre16_compile()</b> in UTF-16 mode
 | 
			
		||||
  PCRE_UTF32              Run <b>pcre32_compile()</b> in UTF-32 mode
 | 
			
		||||
  PCRE_UTF8               Run <b>pcre_compile()</b> in UTF-8 mode
 | 
			
		||||
</pre>
 | 
			
		||||
PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and
 | 
			
		||||
PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The yield of the function is a pointer to a private data structure that
 | 
			
		||||
contains the compiled pattern, or NULL if an error was detected. Note that
 | 
			
		||||
compiling regular expressions with one version of PCRE for use with a different
 | 
			
		||||
version is not guaranteed to work and may cause crashes.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										92
									
								
								tools/pcre/doc/html/pcre_config.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								tools/pcre/doc/html/pcre_config.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,92 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_config specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_config man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre16_config(int <i>what</i>, void *<i>where</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre32_config(int <i>what</i>, void *<i>where</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function makes it possible for a client program to find out which optional
 | 
			
		||||
features are available in the version of the PCRE library it is using. The
 | 
			
		||||
arguments are as follows:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>what</i>     A code specifying what information is required
 | 
			
		||||
  <i>where</i>    Points to where to put the data
 | 
			
		||||
</pre>
 | 
			
		||||
The <i>where</i> argument must point to an integer variable, except for
 | 
			
		||||
PCRE_CONFIG_MATCH_LIMIT and PCRE_CONFIG_MATCH_LIMIT_RECURSION, when it must
 | 
			
		||||
point to an unsigned long integer. The available codes are:
 | 
			
		||||
<pre>
 | 
			
		||||
  PCRE_CONFIG_JIT           Availability of just-in-time compiler
 | 
			
		||||
                              support (1=yes 0=no)
 | 
			
		||||
  PCRE_CONFIG_JITTARGET     String containing information about the
 | 
			
		||||
                              target architecture for the JIT compiler,
 | 
			
		||||
                              or NULL if there is no JIT support
 | 
			
		||||
  PCRE_CONFIG_LINK_SIZE     Internal link size: 2, 3, or 4
 | 
			
		||||
  PCRE_CONFIG_PARENS_LIMIT  Parentheses nesting limit
 | 
			
		||||
  PCRE_CONFIG_MATCH_LIMIT   Internal resource limit
 | 
			
		||||
  PCRE_CONFIG_MATCH_LIMIT_RECURSION
 | 
			
		||||
                            Internal recursion depth limit
 | 
			
		||||
  PCRE_CONFIG_NEWLINE       Value of the default newline sequence:
 | 
			
		||||
                                13 (0x000d)    for CR
 | 
			
		||||
                                10 (0x000a)    for LF
 | 
			
		||||
                              3338 (0x0d0a)    for CRLF
 | 
			
		||||
                                -2             for ANYCRLF
 | 
			
		||||
                                -1             for ANY
 | 
			
		||||
  PCRE_CONFIG_BSR           Indicates what \R matches by default:
 | 
			
		||||
                                 0             all Unicode line endings
 | 
			
		||||
                                 1             CR, LF, or CRLF only
 | 
			
		||||
  PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
 | 
			
		||||
                            Threshold of return slots, above which
 | 
			
		||||
                              <b>malloc()</b> is used by the POSIX API
 | 
			
		||||
  PCRE_CONFIG_STACKRECURSE  Recursion implementation (1=stack 0=heap)
 | 
			
		||||
  PCRE_CONFIG_UTF16         Availability of UTF-16 support (1=yes
 | 
			
		||||
                               0=no); option for <b>pcre16_config()</b>
 | 
			
		||||
  PCRE_CONFIG_UTF32         Availability of UTF-32 support (1=yes
 | 
			
		||||
                               0=no); option for <b>pcre32_config()</b>
 | 
			
		||||
  PCRE_CONFIG_UTF8          Availability of UTF-8 support (1=yes 0=no);
 | 
			
		||||
                              option for <b>pcre_config()</b>
 | 
			
		||||
  PCRE_CONFIG_UNICODE_PROPERTIES
 | 
			
		||||
                            Availability of Unicode property support
 | 
			
		||||
                              (1=yes 0=no)
 | 
			
		||||
</pre>
 | 
			
		||||
The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise. That error
 | 
			
		||||
is also given if PCRE_CONFIG_UTF16 or PCRE_CONFIG_UTF32 is passed to
 | 
			
		||||
<b>pcre_config()</b>, if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 is passed to
 | 
			
		||||
<b>pcre16_config()</b>, or if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 is passed to
 | 
			
		||||
<b>pcre32_config()</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										65
									
								
								tools/pcre/doc/html/pcre_copy_named_substring.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								tools/pcre/doc/html/pcre_copy_named_substring.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,65 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_copy_named_substring specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_copy_named_substring man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
 | 
			
		||||
<b>     const char *<i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, const char *<i>stringname</i>,</b>
 | 
			
		||||
<b>     char *<i>buffer</i>, int <i>buffersize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
 | 
			
		||||
<b>     PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
 | 
			
		||||
<b>     PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This is a convenience function for extracting a captured substring, identified
 | 
			
		||||
by name, into a given buffer. The arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>code</i>          Pattern that was successfully matched
 | 
			
		||||
  <i>subject</i>       Subject that has been successfully matched
 | 
			
		||||
  <i>ovector</i>       Offset vector that <b>pcre[16|32]_exec()</b> used
 | 
			
		||||
  <i>stringcount</i>   Value returned by <b>pcre[16|32]_exec()</b>
 | 
			
		||||
  <i>stringname</i>    Name of the required substring
 | 
			
		||||
  <i>buffer</i>        Buffer to receive the string
 | 
			
		||||
  <i>buffersize</i>    Size of buffer
 | 
			
		||||
</pre>
 | 
			
		||||
The yield is the length of the substring, PCRE_ERROR_NOMEMORY if the buffer was
 | 
			
		||||
too small, or PCRE_ERROR_NOSUBSTRING if the string name is invalid.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										61
									
								
								tools/pcre/doc/html/pcre_copy_substring.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								tools/pcre/doc/html/pcre_copy_substring.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,61 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_copy_substring specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_copy_substring man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
 | 
			
		||||
<b>     int <i>buffersize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
 | 
			
		||||
<b>     int <i>buffersize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
 | 
			
		||||
<b>     int <i>buffersize</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This is a convenience function for extracting a captured substring into a given
 | 
			
		||||
buffer. The arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>subject</i>       Subject that has been successfully matched
 | 
			
		||||
  <i>ovector</i>       Offset vector that <b>pcre[16|32]_exec()</b> used
 | 
			
		||||
  <i>stringcount</i>   Value returned by <b>pcre[16|32]_exec()</b>
 | 
			
		||||
  <i>stringnumber</i>  Number of the required substring
 | 
			
		||||
  <i>buffer</i>        Buffer to receive the string
 | 
			
		||||
  <i>buffersize</i>    Size of buffer
 | 
			
		||||
</pre>
 | 
			
		||||
The yield is the length of the string, PCRE_ERROR_NOMEMORY if the buffer was
 | 
			
		||||
too small, or PCRE_ERROR_NOSUBSTRING if the string number is invalid.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										129
									
								
								tools/pcre/doc/html/pcre_dfa_exec.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								tools/pcre/doc/html/pcre_dfa_exec.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,129 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_dfa_exec specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_dfa_exec man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
 | 
			
		||||
<b>     int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
 | 
			
		||||
<b>     int *<i>workspace</i>, int <i>wscount</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
 | 
			
		||||
<b>     int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
 | 
			
		||||
<b>     int *<i>workspace</i>, int <i>wscount</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
 | 
			
		||||
<b>     int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
 | 
			
		||||
<b>     int *<i>workspace</i>, int <i>wscount</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function matches a compiled regular expression against a given subject
 | 
			
		||||
string, using an alternative matching algorithm that scans the subject string
 | 
			
		||||
just once (<i>not</i> Perl-compatible). Note that the main, Perl-compatible,
 | 
			
		||||
matching function is <b>pcre[16|32]_exec()</b>. The arguments for this function
 | 
			
		||||
are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>code</i>         Points to the compiled pattern
 | 
			
		||||
  <i>extra</i>        Points to an associated <b>pcre[16|32]_extra</b> structure,
 | 
			
		||||
                 or is NULL
 | 
			
		||||
  <i>subject</i>      Points to the subject string
 | 
			
		||||
  <i>length</i>       Length of the subject string
 | 
			
		||||
  <i>startoffset</i>  Offset in the subject at which to start matching
 | 
			
		||||
  <i>options</i>      Option bits
 | 
			
		||||
  <i>ovector</i>      Points to a vector of ints for result offsets
 | 
			
		||||
  <i>ovecsize</i>     Number of elements in the vector
 | 
			
		||||
  <i>workspace</i>    Points to a vector of ints used as working space
 | 
			
		||||
  <i>wscount</i>      Number of elements in the vector
 | 
			
		||||
</pre>
 | 
			
		||||
The units for <i>length</i> and <i>startoffset</i> are bytes for
 | 
			
		||||
<b>pcre_exec()</b>, 16-bit data items for <b>pcre16_exec()</b>, and 32-bit items
 | 
			
		||||
for <b>pcre32_exec()</b>. The options are:
 | 
			
		||||
<pre>
 | 
			
		||||
  PCRE_ANCHORED          Match only at the first position
 | 
			
		||||
  PCRE_BSR_ANYCRLF       \R matches only CR, LF, or CRLF
 | 
			
		||||
  PCRE_BSR_UNICODE       \R matches all Unicode line endings
 | 
			
		||||
  PCRE_NEWLINE_ANY       Recognize any Unicode newline sequence
 | 
			
		||||
  PCRE_NEWLINE_ANYCRLF   Recognize CR, LF, & CRLF as newline sequences
 | 
			
		||||
  PCRE_NEWLINE_CR        Recognize CR as the only newline sequence
 | 
			
		||||
  PCRE_NEWLINE_CRLF      Recognize CRLF as the only newline sequence
 | 
			
		||||
  PCRE_NEWLINE_LF        Recognize LF as the only newline sequence
 | 
			
		||||
  PCRE_NOTBOL            Subject is not the beginning of a line
 | 
			
		||||
  PCRE_NOTEOL            Subject is not the end of a line
 | 
			
		||||
  PCRE_NOTEMPTY          An empty string is not a valid match
 | 
			
		||||
  PCRE_NOTEMPTY_ATSTART  An empty string at the start of the subject
 | 
			
		||||
                           is not a valid match
 | 
			
		||||
  PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
 | 
			
		||||
  PCRE_NO_UTF16_CHECK    Do not check the subject for UTF-16
 | 
			
		||||
                           validity (only relevant if PCRE_UTF16
 | 
			
		||||
                           was set at compile time)
 | 
			
		||||
  PCRE_NO_UTF32_CHECK    Do not check the subject for UTF-32
 | 
			
		||||
                           validity (only relevant if PCRE_UTF32
 | 
			
		||||
                           was set at compile time)
 | 
			
		||||
  PCRE_NO_UTF8_CHECK     Do not check the subject for UTF-8
 | 
			
		||||
                           validity (only relevant if PCRE_UTF8
 | 
			
		||||
                           was set at compile time)
 | 
			
		||||
  PCRE_PARTIAL           ) Return PCRE_ERROR_PARTIAL for a partial
 | 
			
		||||
  PCRE_PARTIAL_SOFT      )   match if no full matches are found
 | 
			
		||||
  PCRE_PARTIAL_HARD      Return PCRE_ERROR_PARTIAL for a partial match
 | 
			
		||||
                           even if there is a full match as well
 | 
			
		||||
  PCRE_DFA_SHORTEST      Return only the shortest match
 | 
			
		||||
  PCRE_DFA_RESTART       Restart after a partial match
 | 
			
		||||
</pre>
 | 
			
		||||
There are restrictions on what may appear in a pattern when using this matching
 | 
			
		||||
function. Details are given in the
 | 
			
		||||
<a href="pcrematching.html"><b>pcrematching</b></a>
 | 
			
		||||
documentation. For details of partial matching, see the
 | 
			
		||||
<a href="pcrepartial.html"><b>pcrepartial</b></a>
 | 
			
		||||
page.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
A <b>pcre[16|32]_extra</b> structure contains the following fields:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>flags</i>            Bits indicating which fields are set
 | 
			
		||||
  <i>study_data</i>       Opaque data from <b>pcre[16|32]_study()</b>
 | 
			
		||||
  <i>match_limit</i>      Limit on internal resource use
 | 
			
		||||
  <i>match_limit_recursion</i>  Limit on internal recursion depth
 | 
			
		||||
  <i>callout_data</i>     Opaque data passed back to callouts
 | 
			
		||||
  <i>tables</i>           Points to character tables or is NULL
 | 
			
		||||
  <i>mark</i>             For passing back a *MARK pointer
 | 
			
		||||
  <i>executable_jit</i>   Opaque data from JIT compilation
 | 
			
		||||
</pre>
 | 
			
		||||
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
 | 
			
		||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
 | 
			
		||||
PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT. For this
 | 
			
		||||
matching function, the <i>match_limit</i> and <i>match_limit_recursion</i> fields
 | 
			
		||||
are not used, and must not be set. The PCRE_EXTRA_EXECUTABLE_JIT flag and
 | 
			
		||||
the corresponding variable are ignored.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										111
									
								
								tools/pcre/doc/html/pcre_exec.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								tools/pcre/doc/html/pcre_exec.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,111 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_exec specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_exec man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
 | 
			
		||||
<b>     int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
 | 
			
		||||
<b>     int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
 | 
			
		||||
<b>     int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function matches a compiled regular expression against a given subject
 | 
			
		||||
string, using a matching algorithm that is similar to Perl's. It returns
 | 
			
		||||
offsets to captured substrings. Its arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>code</i>         Points to the compiled pattern
 | 
			
		||||
  <i>extra</i>        Points to an associated <b>pcre[16|32]_extra</b> structure,
 | 
			
		||||
                 or is NULL
 | 
			
		||||
  <i>subject</i>      Points to the subject string
 | 
			
		||||
  <i>length</i>       Length of the subject string
 | 
			
		||||
  <i>startoffset</i>  Offset in the subject at which to start matching
 | 
			
		||||
  <i>options</i>      Option bits
 | 
			
		||||
  <i>ovector</i>      Points to a vector of ints for result offsets
 | 
			
		||||
  <i>ovecsize</i>     Number of elements in the vector (a multiple of 3)
 | 
			
		||||
</pre>
 | 
			
		||||
The units for <i>length</i> and <i>startoffset</i> are bytes for
 | 
			
		||||
<b>pcre_exec()</b>, 16-bit data items for <b>pcre16_exec()</b>, and 32-bit items
 | 
			
		||||
for <b>pcre32_exec()</b>. The options are:
 | 
			
		||||
<pre>
 | 
			
		||||
  PCRE_ANCHORED          Match only at the first position
 | 
			
		||||
  PCRE_BSR_ANYCRLF       \R matches only CR, LF, or CRLF
 | 
			
		||||
  PCRE_BSR_UNICODE       \R matches all Unicode line endings
 | 
			
		||||
  PCRE_NEWLINE_ANY       Recognize any Unicode newline sequence
 | 
			
		||||
  PCRE_NEWLINE_ANYCRLF   Recognize CR, LF, & CRLF as newline sequences
 | 
			
		||||
  PCRE_NEWLINE_CR        Recognize CR as the only newline sequence
 | 
			
		||||
  PCRE_NEWLINE_CRLF      Recognize CRLF as the only newline sequence
 | 
			
		||||
  PCRE_NEWLINE_LF        Recognize LF as the only newline sequence
 | 
			
		||||
  PCRE_NOTBOL            Subject string is not the beginning of a line
 | 
			
		||||
  PCRE_NOTEOL            Subject string is not the end of a line
 | 
			
		||||
  PCRE_NOTEMPTY          An empty string is not a valid match
 | 
			
		||||
  PCRE_NOTEMPTY_ATSTART  An empty string at the start of the subject
 | 
			
		||||
                           is not a valid match
 | 
			
		||||
  PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
 | 
			
		||||
  PCRE_NO_UTF16_CHECK    Do not check the subject for UTF-16
 | 
			
		||||
                           validity (only relevant if PCRE_UTF16
 | 
			
		||||
                           was set at compile time)
 | 
			
		||||
  PCRE_NO_UTF32_CHECK    Do not check the subject for UTF-32
 | 
			
		||||
                           validity (only relevant if PCRE_UTF32
 | 
			
		||||
                           was set at compile time)
 | 
			
		||||
  PCRE_NO_UTF8_CHECK     Do not check the subject for UTF-8
 | 
			
		||||
                           validity (only relevant if PCRE_UTF8
 | 
			
		||||
                           was set at compile time)
 | 
			
		||||
  PCRE_PARTIAL           ) Return PCRE_ERROR_PARTIAL for a partial
 | 
			
		||||
  PCRE_PARTIAL_SOFT      )   match if no full matches are found
 | 
			
		||||
  PCRE_PARTIAL_HARD      Return PCRE_ERROR_PARTIAL for a partial match
 | 
			
		||||
                           if that is found before a full match
 | 
			
		||||
</pre>
 | 
			
		||||
For details of partial matching, see the
 | 
			
		||||
<a href="pcrepartial.html"><b>pcrepartial</b></a>
 | 
			
		||||
page. A <b>pcre_extra</b> structure contains the following fields:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>flags</i>            Bits indicating which fields are set
 | 
			
		||||
  <i>study_data</i>       Opaque data from <b>pcre[16|32]_study()</b>
 | 
			
		||||
  <i>match_limit</i>      Limit on internal resource use
 | 
			
		||||
  <i>match_limit_recursion</i>  Limit on internal recursion depth
 | 
			
		||||
  <i>callout_data</i>     Opaque data passed back to callouts
 | 
			
		||||
  <i>tables</i>           Points to character tables or is NULL
 | 
			
		||||
  <i>mark</i>             For passing back a *MARK pointer
 | 
			
		||||
  <i>executable_jit</i>   Opaque data from JIT compilation
 | 
			
		||||
</pre>
 | 
			
		||||
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
 | 
			
		||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
 | 
			
		||||
PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										46
									
								
								tools/pcre/doc/html/pcre_free_study.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								tools/pcre/doc/html/pcre_free_study.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,46 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_free_study specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_free_study man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void pcre_free_study(pcre_extra *<i>extra</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void pcre16_free_study(pcre16_extra *<i>extra</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void pcre32_free_study(pcre32_extra *<i>extra</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function is used to free the memory used for the data generated by a call
 | 
			
		||||
to <b>pcre[16|32]_study()</b> when it is no longer needed. The argument must be the
 | 
			
		||||
result of such a call.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										46
									
								
								tools/pcre/doc/html/pcre_free_substring.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								tools/pcre/doc/html/pcre_free_substring.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,46 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_free_substring specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_free_substring man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void pcre16_free_substring(PCRE_SPTR16 <i>stringptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void pcre32_free_substring(PCRE_SPTR32 <i>stringptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This is a convenience function for freeing the store obtained by a previous
 | 
			
		||||
call to <b>pcre[16|32]_get_substring()</b> or <b>pcre[16|32]_get_named_substring()</b>.
 | 
			
		||||
Its only argument is a pointer to the string.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										46
									
								
								tools/pcre/doc/html/pcre_free_substring_list.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								tools/pcre/doc/html/pcre_free_substring_list.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,46 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_free_substring_list specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_free_substring_list man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void pcre16_free_substring_list(PCRE_SPTR16 *<i>stringptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void pcre32_free_substring_list(PCRE_SPTR32 *<i>stringptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This is a convenience function for freeing the store obtained by a previous
 | 
			
		||||
call to <b>pcre[16|32]_get_substring_list()</b>. Its only argument is a pointer to
 | 
			
		||||
the list of string pointers.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										108
									
								
								tools/pcre/doc/html/pcre_fullinfo.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										108
									
								
								tools/pcre/doc/html/pcre_fullinfo.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,108 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_fullinfo specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_fullinfo man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     int <i>what</i>, void *<i>where</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     int <i>what</i>, void *<i>where</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     int <i>what</i>, void *<i>where</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function returns information about a compiled pattern. Its arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>code</i>                      Compiled regular expression
 | 
			
		||||
  <i>extra</i>                     Result of <b>pcre[16|32]_study()</b> or NULL
 | 
			
		||||
  <i>what</i>                      What information is required
 | 
			
		||||
  <i>where</i>                     Where to put the information
 | 
			
		||||
</pre>
 | 
			
		||||
The following information is available:
 | 
			
		||||
<pre>
 | 
			
		||||
  PCRE_INFO_BACKREFMAX      Number of highest back reference
 | 
			
		||||
  PCRE_INFO_CAPTURECOUNT    Number of capturing subpatterns
 | 
			
		||||
  PCRE_INFO_DEFAULT_TABLES  Pointer to default tables
 | 
			
		||||
  PCRE_INFO_FIRSTBYTE       Fixed first data unit for a match, or
 | 
			
		||||
                              -1 for start of string
 | 
			
		||||
                                 or after newline, or
 | 
			
		||||
                              -2 otherwise
 | 
			
		||||
  PCRE_INFO_FIRSTTABLE      Table of first data units (after studying)
 | 
			
		||||
  PCRE_INFO_HASCRORLF       Return 1 if explicit CR or LF matches exist
 | 
			
		||||
  PCRE_INFO_JCHANGED        Return 1 if (?J) or (?-J) was used
 | 
			
		||||
  PCRE_INFO_JIT             Return 1 after successful JIT compilation
 | 
			
		||||
  PCRE_INFO_JITSIZE         Size of JIT compiled code
 | 
			
		||||
  PCRE_INFO_LASTLITERAL     Literal last data unit required
 | 
			
		||||
  PCRE_INFO_MINLENGTH       Lower bound length of matching strings
 | 
			
		||||
  PCRE_INFO_NAMECOUNT       Number of named subpatterns
 | 
			
		||||
  PCRE_INFO_NAMEENTRYSIZE   Size of name table entry
 | 
			
		||||
  PCRE_INFO_NAMETABLE       Pointer to name table
 | 
			
		||||
  PCRE_INFO_OKPARTIAL       Return 1 if partial matching can be tried
 | 
			
		||||
                              (always returns 1 after release 8.00)
 | 
			
		||||
  PCRE_INFO_OPTIONS         Option bits used for compilation
 | 
			
		||||
  PCRE_INFO_SIZE            Size of compiled pattern
 | 
			
		||||
  PCRE_INFO_STUDYSIZE       Size of study data
 | 
			
		||||
  PCRE_INFO_FIRSTCHARACTER      Fixed first data unit for a match
 | 
			
		||||
  PCRE_INFO_FIRSTCHARACTERFLAGS Returns
 | 
			
		||||
                                  1 if there is a first data character set, which can
 | 
			
		||||
                                    then be retrieved using PCRE_INFO_FIRSTCHARACTER,
 | 
			
		||||
                                  2 if the first character is at the start of the data
 | 
			
		||||
                                    string or after a newline, and
 | 
			
		||||
                                  0 otherwise
 | 
			
		||||
  PCRE_INFO_REQUIREDCHAR      Literal last data unit required
 | 
			
		||||
  PCRE_INFO_REQUIREDCHARFLAGS Returns 1 if the last data character is set (which can then
 | 
			
		||||
                              be retrieved using PCRE_INFO_REQUIREDCHAR); 0 otherwise
 | 
			
		||||
</pre>
 | 
			
		||||
The <i>where</i> argument must point to an integer variable, except for the
 | 
			
		||||
following <i>what</i> values:
 | 
			
		||||
<pre>
 | 
			
		||||
  PCRE_INFO_DEFAULT_TABLES  const unsigned char *
 | 
			
		||||
  PCRE_INFO_FIRSTTABLE      const unsigned char *
 | 
			
		||||
  PCRE_INFO_NAMETABLE       PCRE_SPTR16           (16-bit library)
 | 
			
		||||
  PCRE_INFO_NAMETABLE       PCRE_SPTR32           (32-bit library)
 | 
			
		||||
  PCRE_INFO_NAMETABLE       const unsigned char * (8-bit library)
 | 
			
		||||
  PCRE_INFO_OPTIONS         unsigned long int
 | 
			
		||||
  PCRE_INFO_SIZE            size_t
 | 
			
		||||
  PCRE_INFO_FIRSTCHARACTER  uint32_t
 | 
			
		||||
  PCRE_INFO_REQUIREDCHAR    uint32_t
 | 
			
		||||
</pre>
 | 
			
		||||
The yield of the function is zero on success or:
 | 
			
		||||
<pre>
 | 
			
		||||
  PCRE_ERROR_NULL           the argument <i>code</i> was NULL
 | 
			
		||||
                            the argument <i>where</i> was NULL
 | 
			
		||||
  PCRE_ERROR_BADMAGIC       the "magic number" was not found
 | 
			
		||||
  PCRE_ERROR_BADOPTION      the value of <i>what</i> was invalid
 | 
			
		||||
</PRE>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										68
									
								
								tools/pcre/doc/html/pcre_get_named_substring.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								tools/pcre/doc/html/pcre_get_named_substring.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,68 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_get_named_substring specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_get_named_substring man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
 | 
			
		||||
<b>     const char *<i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, const char *<i>stringname</i>,</b>
 | 
			
		||||
<b>     const char **<i>stringptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 *<i>stringptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 *<i>stringptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This is a convenience function for extracting a captured substring by name. The
 | 
			
		||||
arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>code</i>          Compiled pattern
 | 
			
		||||
  <i>subject</i>       Subject that has been successfully matched
 | 
			
		||||
  <i>ovector</i>       Offset vector that <b>pcre[16|32]_exec()</b> used
 | 
			
		||||
  <i>stringcount</i>   Value returned by <b>pcre[16|32]_exec()</b>
 | 
			
		||||
  <i>stringname</i>    Name of the required substring
 | 
			
		||||
  <i>stringptr</i>     Where to put the string pointer
 | 
			
		||||
</pre>
 | 
			
		||||
The memory in which the substring is placed is obtained by calling
 | 
			
		||||
<b>pcre[16|32]_malloc()</b>. The convenience function
 | 
			
		||||
<b>pcre[16|32]_free_substring()</b> can be used to free it when it is no longer
 | 
			
		||||
needed. The yield of the function is the length of the extracted substring,
 | 
			
		||||
PCRE_ERROR_NOMEMORY if sufficient memory could not be obtained, or
 | 
			
		||||
PCRE_ERROR_NOSUBSTRING if the string name is invalid.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										57
									
								
								tools/pcre/doc/html/pcre_get_stringnumber.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								tools/pcre/doc/html/pcre_get_stringnumber.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,57 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_get_stringnumber specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_get_stringnumber man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
 | 
			
		||||
<b>     const char *<i>name</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>name</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>name</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This convenience function finds the number of a named substring capturing
 | 
			
		||||
parenthesis in a compiled pattern. Its arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>code</i>    Compiled regular expression
 | 
			
		||||
  <i>name</i>    Name whose number is required
 | 
			
		||||
</pre>
 | 
			
		||||
The yield of the function is the number of the parenthesis if the name is
 | 
			
		||||
found, or PCRE_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed
 | 
			
		||||
(PCRE_DUPNAMES is set), it is not defined which of the numbers is returned by
 | 
			
		||||
<b>pcre[16|32]_get_stringnumber()</b>. You can obtain the complete list by calling
 | 
			
		||||
<b>pcre[16|32]_get_stringtable_entries()</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										60
									
								
								tools/pcre/doc/html/pcre_get_stringtable_entries.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								tools/pcre/doc/html/pcre_get_stringtable_entries.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,60 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_get_stringtable_entries specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_get_stringtable_entries man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
 | 
			
		||||
<b>     const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This convenience function finds, for a compiled pattern, the first and last
 | 
			
		||||
entries for a given name in the table that translates capturing parenthesis
 | 
			
		||||
names into numbers. When names are required to be unique (PCRE_DUPNAMES is
 | 
			
		||||
<i>not</i> set), it is usually easier to use <b>pcre[16|32]_get_stringnumber()</b>
 | 
			
		||||
instead.
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>code</i>    Compiled regular expression
 | 
			
		||||
  <i>name</i>    Name whose entries required
 | 
			
		||||
  <i>first</i>   Where to return a pointer to the first entry
 | 
			
		||||
  <i>last</i>    Where to return a pointer to the last entry
 | 
			
		||||
</pre>
 | 
			
		||||
The yield of the function is the length of each entry, or
 | 
			
		||||
PCRE_ERROR_NOSUBSTRING if none are found.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API, including the format of
 | 
			
		||||
the table entries, in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page, and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										64
									
								
								tools/pcre/doc/html/pcre_get_substring.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								tools/pcre/doc/html/pcre_get_substring.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,64 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_get_substring specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_get_substring man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, int <i>stringnumber</i>,</b>
 | 
			
		||||
<b>     const char **<i>stringptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, int <i>stringnumber</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 *<i>stringptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
 | 
			
		||||
<b>     int <i>stringcount</i>, int <i>stringnumber</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 *<i>stringptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This is a convenience function for extracting a captured substring. The
 | 
			
		||||
arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>subject</i>       Subject that has been successfully matched
 | 
			
		||||
  <i>ovector</i>       Offset vector that <b>pcre[16|32]_exec()</b> used
 | 
			
		||||
  <i>stringcount</i>   Value returned by <b>pcre[16|32]_exec()</b>
 | 
			
		||||
  <i>stringnumber</i>  Number of the required substring
 | 
			
		||||
  <i>stringptr</i>     Where to put the string pointer
 | 
			
		||||
</pre>
 | 
			
		||||
The memory in which the substring is placed is obtained by calling
 | 
			
		||||
<b>pcre[16|32]_malloc()</b>. The convenience function
 | 
			
		||||
<b>pcre[16|32]_free_substring()</b> can be used to free it when it is no longer
 | 
			
		||||
needed. The yield of the function is the length of the substring,
 | 
			
		||||
PCRE_ERROR_NOMEMORY if sufficient memory could not be obtained, or
 | 
			
		||||
PCRE_ERROR_NOSUBSTRING if the string number is invalid.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										61
									
								
								tools/pcre/doc/html/pcre_get_substring_list.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								tools/pcre/doc/html/pcre_get_substring_list.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,61 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_get_substring_list specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_get_substring_list man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
 | 
			
		||||
<b>     int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
 | 
			
		||||
<b>     int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
 | 
			
		||||
<b>     int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This is a convenience function for extracting a list of all the captured
 | 
			
		||||
substrings. The arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>subject</i>       Subject that has been successfully matched
 | 
			
		||||
  <i>ovector</i>       Offset vector that <b>pcre[16|32]_exec</b> used
 | 
			
		||||
  <i>stringcount</i>   Value returned by <b>pcre[16|32]_exec</b>
 | 
			
		||||
  <i>listptr</i>       Where to put a pointer to the list
 | 
			
		||||
</pre>
 | 
			
		||||
The memory in which the substrings and the list are placed is obtained by
 | 
			
		||||
calling <b>pcre[16|32]_malloc()</b>. The convenience function
 | 
			
		||||
<b>pcre[16|32]_free_substring_list()</b> can be used to free it when it is no
 | 
			
		||||
longer needed. A pointer to a list of pointers is put in the variable whose
 | 
			
		||||
address is in <i>listptr</i>. The list is terminated by a NULL pointer. The
 | 
			
		||||
yield of the function is zero on success or PCRE_ERROR_NOMEMORY if sufficient
 | 
			
		||||
memory could not be obtained.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										108
									
								
								tools/pcre/doc/html/pcre_jit_exec.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										108
									
								
								tools/pcre/doc/html/pcre_jit_exec.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,108 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_jit_exec specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_jit_exec man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_jit_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
 | 
			
		||||
<b>     int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
 | 
			
		||||
<b>     pcre_jit_stack *<i>jstack</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_jit_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
 | 
			
		||||
<b>     int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
 | 
			
		||||
<b>     pcre_jit_stack *<i>jstack</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_jit_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
 | 
			
		||||
<b>     int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
 | 
			
		||||
<b>     pcre_jit_stack *<i>jstack</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function matches a compiled regular expression that has been successfully
 | 
			
		||||
studied with one of the JIT options against a given subject string, using a
 | 
			
		||||
matching algorithm that is similar to Perl's. It is a "fast path" interface to
 | 
			
		||||
JIT, and it bypasses some of the sanity checks that <b>pcre_exec()</b> applies.
 | 
			
		||||
It returns offsets to captured substrings. Its arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>code</i>         Points to the compiled pattern
 | 
			
		||||
  <i>extra</i>        Points to an associated <b>pcre[16|32]_extra</b> structure,
 | 
			
		||||
                 or is NULL
 | 
			
		||||
  <i>subject</i>      Points to the subject string
 | 
			
		||||
  <i>length</i>       Length of the subject string, in bytes
 | 
			
		||||
  <i>startoffset</i>  Offset in bytes in the subject at which to
 | 
			
		||||
                 start matching
 | 
			
		||||
  <i>options</i>      Option bits
 | 
			
		||||
  <i>ovector</i>      Points to a vector of ints for result offsets
 | 
			
		||||
  <i>ovecsize</i>     Number of elements in the vector (a multiple of 3)
 | 
			
		||||
  <i>jstack</i>       Pointer to a JIT stack
 | 
			
		||||
</pre>
 | 
			
		||||
The allowed options are:
 | 
			
		||||
<pre>
 | 
			
		||||
  PCRE_NOTBOL            Subject string is not the beginning of a line
 | 
			
		||||
  PCRE_NOTEOL            Subject string is not the end of a line
 | 
			
		||||
  PCRE_NOTEMPTY          An empty string is not a valid match
 | 
			
		||||
  PCRE_NOTEMPTY_ATSTART  An empty string at the start of the subject
 | 
			
		||||
                           is not a valid match
 | 
			
		||||
  PCRE_NO_UTF16_CHECK    Do not check the subject for UTF-16
 | 
			
		||||
                           validity (only relevant if PCRE_UTF16
 | 
			
		||||
                           was set at compile time)
 | 
			
		||||
  PCRE_NO_UTF32_CHECK    Do not check the subject for UTF-32
 | 
			
		||||
                           validity (only relevant if PCRE_UTF32
 | 
			
		||||
                           was set at compile time)
 | 
			
		||||
  PCRE_NO_UTF8_CHECK     Do not check the subject for UTF-8
 | 
			
		||||
                           validity (only relevant if PCRE_UTF8
 | 
			
		||||
                           was set at compile time)
 | 
			
		||||
  PCRE_PARTIAL           ) Return PCRE_ERROR_PARTIAL for a partial
 | 
			
		||||
  PCRE_PARTIAL_SOFT      )   match if no full matches are found
 | 
			
		||||
  PCRE_PARTIAL_HARD      Return PCRE_ERROR_PARTIAL for a partial match
 | 
			
		||||
                           if that is found before a full match
 | 
			
		||||
</pre>
 | 
			
		||||
However, the PCRE_NO_UTF[8|16|32]_CHECK options have no effect, as this check
 | 
			
		||||
is never applied. For details of partial matching, see the
 | 
			
		||||
<a href="pcrepartial.html"><b>pcrepartial</b></a>
 | 
			
		||||
page. A <b>pcre_extra</b> structure contains the following fields:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>flags</i>            Bits indicating which fields are set
 | 
			
		||||
  <i>study_data</i>       Opaque data from <b>pcre[16|32]_study()</b>
 | 
			
		||||
  <i>match_limit</i>      Limit on internal resource use
 | 
			
		||||
  <i>match_limit_recursion</i>  Limit on internal recursion depth
 | 
			
		||||
  <i>callout_data</i>     Opaque data passed back to callouts
 | 
			
		||||
  <i>tables</i>           Points to character tables or is NULL
 | 
			
		||||
  <i>mark</i>             For passing back a *MARK pointer
 | 
			
		||||
  <i>executable_jit</i>   Opaque data from JIT compilation
 | 
			
		||||
</pre>
 | 
			
		||||
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
 | 
			
		||||
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
 | 
			
		||||
PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the JIT API in the
 | 
			
		||||
<a href="pcrejit.html"><b>pcrejit</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										55
									
								
								tools/pcre/doc/html/pcre_jit_stack_alloc.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								tools/pcre/doc/html/pcre_jit_stack_alloc.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,55 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_jit_stack_alloc specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_jit_stack_alloc man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>,</b>
 | 
			
		||||
<b>     int <i>maxsize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>,</b>
 | 
			
		||||
<b>     int <i>maxsize</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>,</b>
 | 
			
		||||
<b>     int <i>maxsize</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function is used to create a stack for use by the code compiled by the JIT
 | 
			
		||||
optimization of <b>pcre[16|32]_study()</b>. The arguments are a starting size for
 | 
			
		||||
the stack, and a maximum size to which it is allowed to grow. The result can be
 | 
			
		||||
passed to the JIT run-time code by <b>pcre[16|32]_assign_jit_stack()</b>, or that
 | 
			
		||||
function can set up a callback for obtaining a stack. A maximum stack size of
 | 
			
		||||
512K to 1M should be more than enough for any pattern. For more details, see
 | 
			
		||||
the
 | 
			
		||||
<a href="pcrejit.html"><b>pcrejit</b></a>
 | 
			
		||||
page.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										48
									
								
								tools/pcre/doc/html/pcre_jit_stack_free.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								tools/pcre/doc/html/pcre_jit_stack_free.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,48 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_jit_stack_free specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_jit_stack_free man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void pcre_jit_stack_free(pcre_jit_stack *<i>stack</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void pcre16_jit_stack_free(pcre16_jit_stack *<i>stack</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>void pcre32_jit_stack_free(pcre32_jit_stack *<i>stack</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function is used to free a JIT stack that was created by
 | 
			
		||||
<b>pcre[16|32]_jit_stack_alloc()</b> when it is no longer needed. For more details,
 | 
			
		||||
see the
 | 
			
		||||
<a href="pcrejit.html"><b>pcrejit</b></a>
 | 
			
		||||
page.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										48
									
								
								tools/pcre/doc/html/pcre_maketables.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								tools/pcre/doc/html/pcre_maketables.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,48 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_maketables specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_maketables man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>const unsigned char *pcre_maketables(void);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>const unsigned char *pcre16_maketables(void);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>const unsigned char *pcre32_maketables(void);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function builds a set of character tables for character values less than
 | 
			
		||||
256. These can be passed to <b>pcre[16|32]_compile()</b> to override PCRE's
 | 
			
		||||
internal, built-in tables (which were made by <b>pcre[16|32]_maketables()</b> when
 | 
			
		||||
PCRE was compiled). You might want to do this if you are using a non-standard
 | 
			
		||||
locale. The function yields a pointer to the tables.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										58
									
								
								tools/pcre/doc/html/pcre_pattern_to_host_byte_order.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								tools/pcre/doc/html/pcre_pattern_to_host_byte_order.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,58 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_pattern_to_host_byte_order specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_pattern_to_host_byte_order man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
 | 
			
		||||
<b>     pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
 | 
			
		||||
<b>     pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
 | 
			
		||||
<b>     pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function ensures that the bytes in 2-byte and 4-byte values in a compiled
 | 
			
		||||
pattern are in the correct order for the current host. It is useful when a
 | 
			
		||||
pattern that has been compiled on one host is transferred to another that might
 | 
			
		||||
have different endianness. The arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>code</i>         A compiled regular expression
 | 
			
		||||
  <i>extra</i>        Points to an associated <b>pcre[16|32]_extra</b> structure,
 | 
			
		||||
                 or is NULL
 | 
			
		||||
  <i>tables</i>       Pointer to character tables, or NULL to
 | 
			
		||||
                 set the built-in default
 | 
			
		||||
</pre>
 | 
			
		||||
The result is 0 for success, a negative PCRE_ERROR_xxx value otherwise.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										51
									
								
								tools/pcre/doc/html/pcre_refcount.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								tools/pcre/doc/html/pcre_refcount.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,51 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_refcount specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_refcount man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre16_refcount(pcre16 *<i>code</i>, int <i>adjust</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre32_refcount(pcre32 *<i>code</i>, int <i>adjust</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function is used to maintain a reference count inside a data block that
 | 
			
		||||
contains a compiled pattern. Its arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>code</i>                      Compiled regular expression
 | 
			
		||||
  <i>adjust</i>                    Adjustment to reference value
 | 
			
		||||
</pre>
 | 
			
		||||
The yield of the function is the adjusted reference value, which is constrained
 | 
			
		||||
to lie between 0 and 65535.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										68
									
								
								tools/pcre/doc/html/pcre_study.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								tools/pcre/doc/html/pcre_study.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,68 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_study specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_study man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>);</b>
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
 | 
			
		||||
<b>     const char **<i>errptr</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function studies a compiled pattern, to see if additional information can
 | 
			
		||||
be extracted that might speed up matching. Its arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>code</i>       A compiled regular expression
 | 
			
		||||
  <i>options</i>    Options for <b>pcre[16|32]_study()</b>
 | 
			
		||||
  <i>errptr</i>     Where to put an error message
 | 
			
		||||
</pre>
 | 
			
		||||
If the function succeeds, it returns a value that can be passed to
 | 
			
		||||
<b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> via their <i>extra</i>
 | 
			
		||||
arguments.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If the function returns NULL, either it could not find any additional
 | 
			
		||||
information, or there was an error. You can tell the difference by looking at
 | 
			
		||||
the error value. It is NULL in first case.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The only option is PCRE_STUDY_JIT_COMPILE. It requests just-in-time compilation
 | 
			
		||||
if possible. If PCRE has been compiled without JIT support, this option is
 | 
			
		||||
ignored. See the
 | 
			
		||||
<a href="pcrejit.html"><b>pcrejit</b></a>
 | 
			
		||||
page for further details.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										57
									
								
								tools/pcre/doc/html/pcre_utf16_to_host_byte_order.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								tools/pcre/doc/html/pcre_utf16_to_host_byte_order.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,57 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_utf16_to_host_byte_order specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_utf16_to_host_byte_order man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
 | 
			
		||||
<b>     int <i>keep_boms</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function, which exists only in the 16-bit library, converts a UTF-16
 | 
			
		||||
string to the correct order for the current host, taking account of any byte
 | 
			
		||||
order marks (BOMs) within the string. Its arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>output</i>           pointer to output buffer, may be the same as <i>input</i>
 | 
			
		||||
  <i>input</i>            pointer to input buffer
 | 
			
		||||
  <i>length</i>           number of 16-bit units in the input, or negative for
 | 
			
		||||
                     a zero-terminated string
 | 
			
		||||
  <i>host_byte_order</i>  a NULL value or a non-zero value pointed to means
 | 
			
		||||
                     start in host byte order
 | 
			
		||||
  <i>keep_boms</i>        if non-zero, BOMs are copied to the output string
 | 
			
		||||
</pre>
 | 
			
		||||
The result of the function is the number of 16-bit units placed into the output
 | 
			
		||||
buffer, including the zero terminator if the string was zero-terminated.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If <i>host_byte_order</i> is not NULL, it is set to indicate the byte order that
 | 
			
		||||
is current at the end of the string.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										57
									
								
								tools/pcre/doc/html/pcre_utf32_to_host_byte_order.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								tools/pcre/doc/html/pcre_utf32_to_host_byte_order.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,57 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_utf32_to_host_byte_order specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_utf32_to_host_byte_order man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
 | 
			
		||||
<b>     PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
 | 
			
		||||
<b>     int <i>keep_boms</i>);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function, which exists only in the 32-bit library, converts a UTF-32
 | 
			
		||||
string to the correct order for the current host, taking account of any byte
 | 
			
		||||
order marks (BOMs) within the string. Its arguments are:
 | 
			
		||||
<pre>
 | 
			
		||||
  <i>output</i>           pointer to output buffer, may be the same as <i>input</i>
 | 
			
		||||
  <i>input</i>            pointer to input buffer
 | 
			
		||||
  <i>length</i>           number of 32-bit units in the input, or negative for
 | 
			
		||||
                     a zero-terminated string
 | 
			
		||||
  <i>host_byte_order</i>  a NULL value or a non-zero value pointed to means
 | 
			
		||||
                     start in host byte order
 | 
			
		||||
  <i>keep_boms</i>        if non-zero, BOMs are copied to the output string
 | 
			
		||||
</pre>
 | 
			
		||||
The result of the function is the number of 32-bit units placed into the output
 | 
			
		||||
buffer, including the zero terminator if the string was zero-terminated.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If <i>host_byte_order</i> is not NULL, it is set to indicate the byte order that
 | 
			
		||||
is current at the end of the string.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										46
									
								
								tools/pcre/doc/html/pcre_version.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								tools/pcre/doc/html/pcre_version.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,46 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcre_version specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcre_version man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SYNOPSIS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>const char *pcre_version(void);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>const char *pcre16_version(void);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>const char *pcre32_version(void);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
DESCRIPTION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This function (even in the 16-bit and 32-bit libraries) returns a
 | 
			
		||||
zero-terminated, 8-bit character string that gives the version number of the
 | 
			
		||||
PCRE library and the date of its release.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a complete description of the PCRE native API in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
page and a description of the POSIX API in the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
page.
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										2922
									
								
								tools/pcre/doc/html/pcreapi.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2922
									
								
								tools/pcre/doc/html/pcreapi.html
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										534
									
								
								tools/pcre/doc/html/pcrebuild.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										534
									
								
								tools/pcre/doc/html/pcrebuild.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,534 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcrebuild specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcrebuild man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<ul>
 | 
			
		||||
<li><a name="TOC1" href="#SEC1">BUILDING PCRE</a>
 | 
			
		||||
<li><a name="TOC2" href="#SEC2">PCRE BUILD-TIME OPTIONS</a>
 | 
			
		||||
<li><a name="TOC3" href="#SEC3">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
 | 
			
		||||
<li><a name="TOC4" href="#SEC4">BUILDING SHARED AND STATIC LIBRARIES</a>
 | 
			
		||||
<li><a name="TOC5" href="#SEC5">C++ SUPPORT</a>
 | 
			
		||||
<li><a name="TOC6" href="#SEC6">UTF-8, UTF-16 AND UTF-32 SUPPORT</a>
 | 
			
		||||
<li><a name="TOC7" href="#SEC7">UNICODE CHARACTER PROPERTY SUPPORT</a>
 | 
			
		||||
<li><a name="TOC8" href="#SEC8">JUST-IN-TIME COMPILER SUPPORT</a>
 | 
			
		||||
<li><a name="TOC9" href="#SEC9">CODE VALUE OF NEWLINE</a>
 | 
			
		||||
<li><a name="TOC10" href="#SEC10">WHAT \R MATCHES</a>
 | 
			
		||||
<li><a name="TOC11" href="#SEC11">POSIX MALLOC USAGE</a>
 | 
			
		||||
<li><a name="TOC12" href="#SEC12">HANDLING VERY LARGE PATTERNS</a>
 | 
			
		||||
<li><a name="TOC13" href="#SEC13">AVOIDING EXCESSIVE STACK USAGE</a>
 | 
			
		||||
<li><a name="TOC14" href="#SEC14">LIMITING PCRE RESOURCE USAGE</a>
 | 
			
		||||
<li><a name="TOC15" href="#SEC15">CREATING CHARACTER TABLES AT BUILD TIME</a>
 | 
			
		||||
<li><a name="TOC16" href="#SEC16">USING EBCDIC CODE</a>
 | 
			
		||||
<li><a name="TOC17" href="#SEC17">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
 | 
			
		||||
<li><a name="TOC18" href="#SEC18">PCREGREP BUFFER SIZE</a>
 | 
			
		||||
<li><a name="TOC19" href="#SEC19">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a>
 | 
			
		||||
<li><a name="TOC20" href="#SEC20">DEBUGGING WITH VALGRIND SUPPORT</a>
 | 
			
		||||
<li><a name="TOC21" href="#SEC21">CODE COVERAGE REPORTING</a>
 | 
			
		||||
<li><a name="TOC22" href="#SEC22">SEE ALSO</a>
 | 
			
		||||
<li><a name="TOC23" href="#SEC23">AUTHOR</a>
 | 
			
		||||
<li><a name="TOC24" href="#SEC24">REVISION</a>
 | 
			
		||||
</ul>
 | 
			
		||||
<br><a name="SEC1" href="#TOC1">BUILDING PCRE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
PCRE is distributed with a <b>configure</b> script that can be used to build the
 | 
			
		||||
library in Unix-like environments using the applications known as Autotools.
 | 
			
		||||
Also in the distribution are files to support building using <b>CMake</b>
 | 
			
		||||
instead of <b>configure</b>. The text file
 | 
			
		||||
<a href="README.txt"><b>README</b></a>
 | 
			
		||||
contains general information about building with Autotools (some of which is
 | 
			
		||||
repeated below), and also has some comments about building on various operating
 | 
			
		||||
systems. There is a lot more information about building PCRE without using
 | 
			
		||||
Autotools (including information about using <b>CMake</b> and building "by
 | 
			
		||||
hand") in the text file called
 | 
			
		||||
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS-BUILD</b>.</a>
 | 
			
		||||
You should consult this file as well as the
 | 
			
		||||
<a href="README.txt"><b>README</b></a>
 | 
			
		||||
file if you are building in a non-Unix-like environment.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC2" href="#TOC1">PCRE BUILD-TIME OPTIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The rest of this document describes the optional features of PCRE that can be
 | 
			
		||||
selected when the library is compiled. It assumes use of the <b>configure</b>
 | 
			
		||||
script, where the optional features are selected or deselected by providing
 | 
			
		||||
options to <b>configure</b> before running the <b>make</b> command. However, the
 | 
			
		||||
same options can be selected in both Unix-like and non-Unix-like environments
 | 
			
		||||
using the GUI facility of <b>cmake-gui</b> if you are using <b>CMake</b> instead
 | 
			
		||||
of <b>configure</b> to build PCRE.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If you are not using Autotools or <b>CMake</b>, option selection can be done by
 | 
			
		||||
editing the <b>config.h</b> file, or by passing parameter settings to the
 | 
			
		||||
compiler, as described in
 | 
			
		||||
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS-BUILD</b>.</a>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The complete list of options for <b>configure</b> (which includes the standard
 | 
			
		||||
ones such as the selection of the installation directory) can be obtained by
 | 
			
		||||
running
 | 
			
		||||
<pre>
 | 
			
		||||
  ./configure --help
 | 
			
		||||
</pre>
 | 
			
		||||
The following sections include descriptions of options whose names begin with
 | 
			
		||||
--enable or --disable. These settings specify changes to the defaults for the
 | 
			
		||||
<b>configure</b> command. Because of the way that <b>configure</b> works,
 | 
			
		||||
--enable and --disable always come in pairs, so the complementary option always
 | 
			
		||||
exists as well, but as it specifies the default, it is not described.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC3" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
By default, a library called <b>libpcre</b> is built, containing functions that
 | 
			
		||||
take string arguments contained in vectors of bytes, either as single-byte
 | 
			
		||||
characters, or interpreted as UTF-8 strings. You can also build a separate
 | 
			
		||||
library, called <b>libpcre16</b>, in which strings are contained in vectors of
 | 
			
		||||
16-bit data units and interpreted either as single-unit characters or UTF-16
 | 
			
		||||
strings, by adding
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-pcre16
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command. You can also build yet another separate
 | 
			
		||||
library, called <b>libpcre32</b>, in which strings are contained in vectors of
 | 
			
		||||
32-bit data units and interpreted either as single-unit characters or UTF-32
 | 
			
		||||
strings, by adding
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-pcre32
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command. If you do not want the 8-bit library, add
 | 
			
		||||
<pre>
 | 
			
		||||
  --disable-pcre8
 | 
			
		||||
</pre>
 | 
			
		||||
as well. At least one of the three libraries must be built. Note that the C++
 | 
			
		||||
and POSIX wrappers are for the 8-bit library only, and that <b>pcregrep</b> is
 | 
			
		||||
an 8-bit program. None of these are built if you select only the 16-bit or
 | 
			
		||||
32-bit libraries.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC4" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The Autotools PCRE building process uses <b>libtool</b> to build both shared and
 | 
			
		||||
static libraries by default. You can suppress one of these by adding one of
 | 
			
		||||
<pre>
 | 
			
		||||
  --disable-shared
 | 
			
		||||
  --disable-static
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command, as required.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC5" href="#TOC1">C++ SUPPORT</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
By default, if the 8-bit library is being built, the <b>configure</b> script
 | 
			
		||||
will search for a C++ compiler and C++ header files. If it finds them, it
 | 
			
		||||
automatically builds the C++ wrapper library (which supports only 8-bit
 | 
			
		||||
strings). You can disable this by adding
 | 
			
		||||
<pre>
 | 
			
		||||
  --disable-cpp
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC6" href="#TOC1">UTF-8, UTF-16 AND UTF-32 SUPPORT</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
To build PCRE with support for UTF Unicode character strings, add
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-utf
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command. This setting applies to all three libraries,
 | 
			
		||||
adding support for UTF-8 to the 8-bit library, support for UTF-16 to the 16-bit
 | 
			
		||||
library, and support for UTF-32 to the to the 32-bit library. There are no
 | 
			
		||||
separate options for enabling UTF-8, UTF-16 and UTF-32 independently because
 | 
			
		||||
that would allow ridiculous settings such as requesting UTF-16 support while
 | 
			
		||||
building only the 8-bit library. It is not possible to build one library with
 | 
			
		||||
UTF support and another without in the same configuration. (For backwards
 | 
			
		||||
compatibility, --enable-utf8 is a synonym of --enable-utf.)
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Of itself, this setting does not make PCRE treat strings as UTF-8, UTF-16 or
 | 
			
		||||
UTF-32. As well as compiling PCRE with this option, you also have have to set
 | 
			
		||||
the PCRE_UTF8, PCRE_UTF16 or PCRE_UTF32 option (as appropriate) when you call
 | 
			
		||||
one of the pattern compiling functions.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If you set --enable-utf when compiling in an EBCDIC environment, PCRE expects
 | 
			
		||||
its input to be either ASCII or UTF-8 (depending on the run-time option). It is
 | 
			
		||||
not possible to support both EBCDIC and UTF-8 codes in the same version of the
 | 
			
		||||
library. Consequently, --enable-utf and --enable-ebcdic are mutually
 | 
			
		||||
exclusive.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC7" href="#TOC1">UNICODE CHARACTER PROPERTY SUPPORT</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
UTF support allows the libraries to process character codepoints up to 0x10ffff
 | 
			
		||||
in the strings that they handle. On its own, however, it does not provide any
 | 
			
		||||
facilities for accessing the properties of such characters. If you want to be
 | 
			
		||||
able to use the pattern escapes \P, \p, and \X, which refer to Unicode
 | 
			
		||||
character properties, you must add
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-unicode-properties
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command. This implies UTF support, even if you have
 | 
			
		||||
not explicitly requested it.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Including Unicode property support adds around 30K of tables to the PCRE
 | 
			
		||||
library. Only the general category properties such as <i>Lu</i> and <i>Nd</i> are
 | 
			
		||||
supported. Details are given in the
 | 
			
		||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
 | 
			
		||||
documentation.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC8" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Just-in-time compiler support is included in the build by specifying
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-jit
 | 
			
		||||
</pre>
 | 
			
		||||
This support is available only for certain hardware architectures. If this
 | 
			
		||||
option is set for an unsupported architecture, a compile time error occurs.
 | 
			
		||||
See the
 | 
			
		||||
<a href="pcrejit.html"><b>pcrejit</b></a>
 | 
			
		||||
documentation for a discussion of JIT usage. When JIT support is enabled,
 | 
			
		||||
pcregrep automatically makes use of it, unless you add
 | 
			
		||||
<pre>
 | 
			
		||||
  --disable-pcregrep-jit
 | 
			
		||||
</pre>
 | 
			
		||||
to the "configure" command.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC9" href="#TOC1">CODE VALUE OF NEWLINE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
By default, PCRE interprets the linefeed (LF) character as indicating the end
 | 
			
		||||
of a line. This is the normal newline character on Unix-like systems. You can
 | 
			
		||||
compile PCRE to use carriage return (CR) instead, by adding
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-newline-is-cr
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command. There is also a --enable-newline-is-lf option,
 | 
			
		||||
which explicitly specifies linefeed as the newline character.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
Alternatively, you can specify that line endings are to be indicated by the two
 | 
			
		||||
character sequence CRLF. If you want this, add
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-newline-is-crlf
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command. There is a fourth option, specified by
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-newline-is-anycrlf
 | 
			
		||||
</pre>
 | 
			
		||||
which causes PCRE to recognize any of the three sequences CR, LF, or CRLF as
 | 
			
		||||
indicating a line ending. Finally, a fifth option, specified by
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-newline-is-any
 | 
			
		||||
</pre>
 | 
			
		||||
causes PCRE to recognize any Unicode newline sequence.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Whatever line ending convention is selected when PCRE is built can be
 | 
			
		||||
overridden when the library functions are called. At build time it is
 | 
			
		||||
conventional to use the standard for your operating system.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC10" href="#TOC1">WHAT \R MATCHES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
By default, the sequence \R in a pattern matches any Unicode newline sequence,
 | 
			
		||||
whatever has been selected as the line ending sequence. If you specify
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-bsr-anycrlf
 | 
			
		||||
</pre>
 | 
			
		||||
the default is changed so that \R matches only CR, LF, or CRLF. Whatever is
 | 
			
		||||
selected when PCRE is built can be overridden when the library functions are
 | 
			
		||||
called.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC11" href="#TOC1">POSIX MALLOC USAGE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
When the 8-bit library is called through the POSIX interface (see the
 | 
			
		||||
<a href="pcreposix.html"><b>pcreposix</b></a>
 | 
			
		||||
documentation), additional working storage is required for holding the pointers
 | 
			
		||||
to capturing substrings, because PCRE requires three integers per substring,
 | 
			
		||||
whereas the POSIX interface provides only two. If the number of expected
 | 
			
		||||
substrings is small, the wrapper function uses space on the stack, because this
 | 
			
		||||
is faster than using <b>malloc()</b> for each call. The default threshold above
 | 
			
		||||
which the stack is no longer used is 10; it can be changed by adding a setting
 | 
			
		||||
such as
 | 
			
		||||
<pre>
 | 
			
		||||
  --with-posix-malloc-threshold=20
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC12" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Within a compiled pattern, offset values are used to point from one part to
 | 
			
		||||
another (for example, from an opening parenthesis to an alternation
 | 
			
		||||
metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values
 | 
			
		||||
are used for these offsets, leading to a maximum size for a compiled pattern of
 | 
			
		||||
around 64K. This is sufficient to handle all but the most gigantic patterns.
 | 
			
		||||
Nevertheless, some people do want to process truly enormous patterns, so it is
 | 
			
		||||
possible to compile PCRE to use three-byte or four-byte offsets by adding a
 | 
			
		||||
setting such as
 | 
			
		||||
<pre>
 | 
			
		||||
  --with-link-size=3
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command. The value given must be 2, 3, or 4. For the
 | 
			
		||||
16-bit library, a value of 3 is rounded up to 4. In these libraries, using
 | 
			
		||||
longer offsets slows down the operation of PCRE because it has to load
 | 
			
		||||
additional data when handling them. For the 32-bit library the value is always
 | 
			
		||||
4 and cannot be overridden; the value of --with-link-size is ignored.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC13" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
When matching with the <b>pcre_exec()</b> function, PCRE implements backtracking
 | 
			
		||||
by making recursive calls to an internal function called <b>match()</b>. In
 | 
			
		||||
environments where the size of the stack is limited, this can severely limit
 | 
			
		||||
PCRE's operation. (The Unix environment does not usually suffer from this
 | 
			
		||||
problem, but it may sometimes be necessary to increase the maximum stack size.
 | 
			
		||||
There is a discussion in the
 | 
			
		||||
<a href="pcrestack.html"><b>pcrestack</b></a>
 | 
			
		||||
documentation.) An alternative approach to recursion that uses memory from the
 | 
			
		||||
heap to remember data, instead of using recursive function calls, has been
 | 
			
		||||
implemented to work round the problem of limited stack size. If you want to
 | 
			
		||||
build a version of PCRE that works this way, add
 | 
			
		||||
<pre>
 | 
			
		||||
  --disable-stack-for-recursion
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command. With this configuration, PCRE will use the
 | 
			
		||||
<b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> variables to call memory
 | 
			
		||||
management functions. By default these point to <b>malloc()</b> and
 | 
			
		||||
<b>free()</b>, but you can replace the pointers so that your own functions are
 | 
			
		||||
used instead.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Separate functions are provided rather than using <b>pcre_malloc</b> and
 | 
			
		||||
<b>pcre_free</b> because the usage is very predictable: the block sizes
 | 
			
		||||
requested are always the same, and the blocks are always freed in reverse
 | 
			
		||||
order. A calling program might be able to implement optimized functions that
 | 
			
		||||
perform better than <b>malloc()</b> and <b>free()</b>. PCRE runs noticeably more
 | 
			
		||||
slowly when built in this way. This option affects only the <b>pcre_exec()</b>
 | 
			
		||||
function; it is not relevant for <b>pcre_dfa_exec()</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC14" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Internally, PCRE has a function called <b>match()</b>, which it calls repeatedly
 | 
			
		||||
(sometimes recursively) when matching a pattern with the <b>pcre_exec()</b>
 | 
			
		||||
function. By controlling the maximum number of times this function may be
 | 
			
		||||
called during a single matching operation, a limit can be placed on the
 | 
			
		||||
resources used by a single call to <b>pcre_exec()</b>. The limit can be changed
 | 
			
		||||
at run time, as described in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
documentation. The default is 10 million, but this can be changed by adding a
 | 
			
		||||
setting such as
 | 
			
		||||
<pre>
 | 
			
		||||
  --with-match-limit=500000
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command. This setting has no effect on the
 | 
			
		||||
<b>pcre_dfa_exec()</b> matching function.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
In some environments it is desirable to limit the depth of recursive calls of
 | 
			
		||||
<b>match()</b> more strictly than the total number of calls, in order to
 | 
			
		||||
restrict the maximum amount of stack (or heap, if --disable-stack-for-recursion
 | 
			
		||||
is specified) that is used. A second limit controls this; it defaults to the
 | 
			
		||||
value that is set for --with-match-limit, which imposes no additional
 | 
			
		||||
constraints. However, you can set a lower limit by adding, for example,
 | 
			
		||||
<pre>
 | 
			
		||||
  --with-match-limit-recursion=10000
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command. This value can also be overridden at run time.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC15" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
PCRE uses fixed tables for processing characters whose code values are less
 | 
			
		||||
than 256. By default, PCRE is built with a set of tables that are distributed
 | 
			
		||||
in the file <i>pcre_chartables.c.dist</i>. These tables are for ASCII codes
 | 
			
		||||
only. If you add
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-rebuild-chartables
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command, the distributed tables are no longer used.
 | 
			
		||||
Instead, a program called <b>dftables</b> is compiled and run. This outputs the
 | 
			
		||||
source for new set of tables, created in the default locale of your C run-time
 | 
			
		||||
system. (This method of replacing the tables does not work if you are cross
 | 
			
		||||
compiling, because <b>dftables</b> is run on the local host. If you need to
 | 
			
		||||
create alternative tables when cross compiling, you will have to do so "by
 | 
			
		||||
hand".)
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC16" href="#TOC1">USING EBCDIC CODE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
PCRE assumes by default that it will run in an environment where the character
 | 
			
		||||
code is ASCII (or Unicode, which is a superset of ASCII). This is the case for
 | 
			
		||||
most computer operating systems. PCRE can, however, be compiled to run in an
 | 
			
		||||
EBCDIC environment by adding
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-ebcdic
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command. This setting implies
 | 
			
		||||
--enable-rebuild-chartables. You should only use it if you know that you are in
 | 
			
		||||
an EBCDIC environment (for example, an IBM mainframe operating system). The
 | 
			
		||||
--enable-ebcdic option is incompatible with --enable-utf.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The EBCDIC character that corresponds to an ASCII LF is assumed to have the
 | 
			
		||||
value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In
 | 
			
		||||
such an environment you should use
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-ebcdic-nl25
 | 
			
		||||
</pre>
 | 
			
		||||
as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR has the
 | 
			
		||||
same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is <i>not</i>
 | 
			
		||||
chosen as LF is made to correspond to the Unicode NEL character (which, in
 | 
			
		||||
Unicode, is 0x85).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The options that select newline behaviour, such as --enable-newline-is-cr,
 | 
			
		||||
and equivalent run-time options, refer to these character values in an EBCDIC
 | 
			
		||||
environment.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC17" href="#TOC1">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
By default, <b>pcregrep</b> reads all files as plain text. You can build it so
 | 
			
		||||
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
 | 
			
		||||
them with <b>libz</b> or <b>libbz2</b>, respectively, by adding one or both of
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-pcregrep-libz
 | 
			
		||||
  --enable-pcregrep-libbz2
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command. These options naturally require that the
 | 
			
		||||
relevant libraries are installed on your system. Configuration will fail if
 | 
			
		||||
they are not.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC18" href="#TOC1">PCREGREP BUFFER SIZE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcregrep</b> uses an internal buffer to hold a "window" on the file it is
 | 
			
		||||
scanning, in order to be able to output "before" and "after" lines when it
 | 
			
		||||
finds a match. The size of the buffer is controlled by a parameter whose
 | 
			
		||||
default value is 20K. The buffer itself is three times this size, but because
 | 
			
		||||
of the way it is used for holding "before" lines, the longest line that is
 | 
			
		||||
guaranteed to be processable is the parameter size. You can change the default
 | 
			
		||||
parameter value by adding, for example,
 | 
			
		||||
<pre>
 | 
			
		||||
  --with-pcregrep-bufsize=50K
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command. The caller of \fPpcregrep\fP can, however,
 | 
			
		||||
override this value by specifying a run-time option.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC19" href="#TOC1">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
If you add
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-pcretest-libreadline
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command, <b>pcretest</b> is linked with the
 | 
			
		||||
<b>libreadline</b> library, and when its input is from a terminal, it reads it
 | 
			
		||||
using the <b>readline()</b> function. This provides line-editing and history
 | 
			
		||||
facilities. Note that <b>libreadline</b> is GPL-licensed, so if you distribute a
 | 
			
		||||
binary of <b>pcretest</b> linked in this way, there may be licensing issues.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Setting this option causes the <b>-lreadline</b> option to be added to the
 | 
			
		||||
<b>pcretest</b> build. In many operating environments with a sytem-installed
 | 
			
		||||
<b>libreadline</b> this is sufficient. However, in some environments (e.g.
 | 
			
		||||
if an unmodified distribution version of readline is in use), some extra
 | 
			
		||||
configuration may be necessary. The INSTALL file for <b>libreadline</b> says
 | 
			
		||||
this:
 | 
			
		||||
<pre>
 | 
			
		||||
  "Readline uses the termcap functions, but does not link with the
 | 
			
		||||
  termcap or curses library itself, allowing applications which link
 | 
			
		||||
  with readline the to choose an appropriate library."
 | 
			
		||||
</pre>
 | 
			
		||||
If your environment has not been set up so that an appropriate library is
 | 
			
		||||
automatically included, you may need to add something like
 | 
			
		||||
<pre>
 | 
			
		||||
  LIBS="-ncurses"
 | 
			
		||||
</pre>
 | 
			
		||||
immediately before the <b>configure</b> command.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC20" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
By adding the
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-valgrind
 | 
			
		||||
</pre>
 | 
			
		||||
option to to the <b>configure</b> command, PCRE will use valgrind annotations
 | 
			
		||||
to mark certain memory regions as unaddressable. This allows it to detect
 | 
			
		||||
invalid memory accesses, and is mostly useful for debugging PCRE itself.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC21" href="#TOC1">CODE COVERAGE REPORTING</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
If your C compiler is gcc, you can build a version of PCRE that can generate a
 | 
			
		||||
code coverage report for its test suite. To enable this, you must install
 | 
			
		||||
<b>lcov</b> version 1.6 or above. Then specify
 | 
			
		||||
<pre>
 | 
			
		||||
  --enable-coverage
 | 
			
		||||
</pre>
 | 
			
		||||
to the <b>configure</b> command and build PCRE in the usual way.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Note that using <b>ccache</b> (a caching C compiler) is incompatible with code
 | 
			
		||||
coverage reporting. If you have configured <b>ccache</b> to run automatically
 | 
			
		||||
on your system, you must set the environment variable
 | 
			
		||||
<pre>
 | 
			
		||||
  CCACHE_DISABLE=1
 | 
			
		||||
</pre>
 | 
			
		||||
before running <b>make</b> to build PCRE, so that <b>ccache</b> is not used.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
When --enable-coverage is used, the following addition targets are added to the
 | 
			
		||||
<i>Makefile</i>:
 | 
			
		||||
<pre>
 | 
			
		||||
  make coverage
 | 
			
		||||
</pre>
 | 
			
		||||
This creates a fresh coverage report for the PCRE test suite. It is equivalent
 | 
			
		||||
to running "make coverage-reset", "make coverage-baseline", "make check", and
 | 
			
		||||
then "make coverage-report".
 | 
			
		||||
<pre>
 | 
			
		||||
  make coverage-reset
 | 
			
		||||
</pre>
 | 
			
		||||
This zeroes the coverage counters, but does nothing else.
 | 
			
		||||
<pre>
 | 
			
		||||
  make coverage-baseline
 | 
			
		||||
</pre>
 | 
			
		||||
This captures baseline coverage information.
 | 
			
		||||
<pre>
 | 
			
		||||
  make coverage-report
 | 
			
		||||
</pre>
 | 
			
		||||
This creates the coverage report.
 | 
			
		||||
<pre>
 | 
			
		||||
  make coverage-clean-report
 | 
			
		||||
</pre>
 | 
			
		||||
This removes the generated coverage report without cleaning the coverage data
 | 
			
		||||
itself.
 | 
			
		||||
<pre>
 | 
			
		||||
  make coverage-clean-data
 | 
			
		||||
</pre>
 | 
			
		||||
This removes the captured coverage data without removing the coverage files
 | 
			
		||||
created at compile time (*.gcno).
 | 
			
		||||
<pre>
 | 
			
		||||
  make coverage-clean
 | 
			
		||||
</pre>
 | 
			
		||||
This cleans all coverage data including the generated coverage report. For more
 | 
			
		||||
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
 | 
			
		||||
documentation.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC22" href="#TOC1">SEE ALSO</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcreapi</b>(3), <b>pcre16</b>, <b>pcre32</b>, <b>pcre_config</b>(3).
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC23" href="#TOC1">AUTHOR</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Philip Hazel
 | 
			
		||||
<br>
 | 
			
		||||
University Computing Service
 | 
			
		||||
<br>
 | 
			
		||||
Cambridge CB2 3QH, England.
 | 
			
		||||
<br>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC24" href="#TOC1">REVISION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 12 May 2013
 | 
			
		||||
<br>
 | 
			
		||||
Copyright © 1997-2013 University of Cambridge.
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										286
									
								
								tools/pcre/doc/html/pcrecallout.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										286
									
								
								tools/pcre/doc/html/pcrecallout.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,286 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcrecallout specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcrecallout man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<ul>
 | 
			
		||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
 | 
			
		||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
 | 
			
		||||
<li><a name="TOC3" href="#SEC3">MISSING CALLOUTS</a>
 | 
			
		||||
<li><a name="TOC4" href="#SEC4">THE CALLOUT INTERFACE</a>
 | 
			
		||||
<li><a name="TOC5" href="#SEC5">RETURN VALUES</a>
 | 
			
		||||
<li><a name="TOC6" href="#SEC6">AUTHOR</a>
 | 
			
		||||
<li><a name="TOC7" href="#SEC7">REVISION</a>
 | 
			
		||||
</ul>
 | 
			
		||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcre.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int (*pcre_callout)(pcre_callout_block *);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int (*pcre16_callout)(pcre16_callout_block *);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>int (*pcre32_callout)(pcre32_callout_block *);</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
PCRE provides a feature called "callout", which is a means of temporarily
 | 
			
		||||
passing control to the caller of PCRE in the middle of pattern matching. The
 | 
			
		||||
caller of PCRE provides an external function by putting its entry point in the
 | 
			
		||||
global variable <i>pcre_callout</i> (<i>pcre16_callout</i> for the 16-bit
 | 
			
		||||
library, <i>pcre32_callout</i> for the 32-bit library). By default, this
 | 
			
		||||
variable contains NULL, which disables all calling out.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Within a regular expression, (?C) indicates the points at which the external
 | 
			
		||||
function is to be called. Different callout points can be identified by putting
 | 
			
		||||
a number less than 256 after the letter C. The default value is zero.
 | 
			
		||||
For example, this pattern has two callout points:
 | 
			
		||||
<pre>
 | 
			
		||||
  (?C1)abc(?C2)def
 | 
			
		||||
</pre>
 | 
			
		||||
If the PCRE_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE
 | 
			
		||||
automatically inserts callouts, all with number 255, before each item in the
 | 
			
		||||
pattern. For example, if PCRE_AUTO_CALLOUT is used with the pattern
 | 
			
		||||
<pre>
 | 
			
		||||
  A(\d{2}|--)
 | 
			
		||||
</pre>
 | 
			
		||||
it is processed as if it were
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
Notice that there is a callout before and after each parenthesis and
 | 
			
		||||
alternation bar. If the pattern contains a conditional group whose condition is
 | 
			
		||||
an assertion, an automatic callout is inserted immediately before the
 | 
			
		||||
condition. Such a callout may also be inserted explicitly, for example:
 | 
			
		||||
<pre>
 | 
			
		||||
  (?(?C9)(?=a)ab|de)
 | 
			
		||||
</pre>
 | 
			
		||||
This applies only to assertion conditions (because they are themselves
 | 
			
		||||
independent groups).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Automatic callouts can be used for tracking the progress of pattern matching.
 | 
			
		||||
The
 | 
			
		||||
<a href="pcretest.html"><b>pcretest</b></a>
 | 
			
		||||
program has a pattern qualifier (/C) that sets automatic callouts; when it is
 | 
			
		||||
used, the output indicates how the pattern is being matched. This is useful
 | 
			
		||||
information when you are trying to optimize the performance of a particular
 | 
			
		||||
pattern.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
You should be aware that, because of optimizations in the way PCRE compiles and
 | 
			
		||||
matches patterns, callouts sometimes do not happen exactly as you might expect.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
At compile time, PCRE "auto-possessifies" repeated items when it knows that
 | 
			
		||||
what follows cannot be part of the repeat. For example, a+[bc] is compiled as
 | 
			
		||||
if it were a++[bc]. The <b>pcretest</b> output when this pattern is anchored and
 | 
			
		||||
then applied with automatic callouts to the string "aaaa" is:
 | 
			
		||||
<pre>
 | 
			
		||||
  --->aaaa
 | 
			
		||||
   +0 ^        ^
 | 
			
		||||
   +1 ^        a+
 | 
			
		||||
   +3 ^   ^    [bc]
 | 
			
		||||
  No match
 | 
			
		||||
</pre>
 | 
			
		||||
This indicates that when matching [bc] fails, there is no backtracking into a+
 | 
			
		||||
and therefore the callouts that would be taken for the backtracks do not occur.
 | 
			
		||||
You can disable the auto-possessify feature by passing PCRE_NO_AUTO_POSSESS
 | 
			
		||||
to <b>pcre_compile()</b>, or starting the pattern with (*NO_AUTO_POSSESS). If
 | 
			
		||||
this is done in <b>pcretest</b> (using the /O qualifier), the output changes to
 | 
			
		||||
this:
 | 
			
		||||
<pre>
 | 
			
		||||
  --->aaaa
 | 
			
		||||
   +0 ^        ^
 | 
			
		||||
   +1 ^        a+
 | 
			
		||||
   +3 ^   ^    [bc]
 | 
			
		||||
   +3 ^  ^     [bc]
 | 
			
		||||
   +3 ^ ^      [bc]
 | 
			
		||||
   +3 ^^       [bc]
 | 
			
		||||
  No match
 | 
			
		||||
</pre>
 | 
			
		||||
This time, when matching [bc] fails, the matcher backtracks into a+ and tries
 | 
			
		||||
again, repeatedly, until a+ itself fails.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Other optimizations that provide fast "no match" results also affect callouts.
 | 
			
		||||
For example, if the pattern is
 | 
			
		||||
<pre>
 | 
			
		||||
  ab(?C4)cd
 | 
			
		||||
</pre>
 | 
			
		||||
PCRE knows that any matching string must contain the letter "d". If the subject
 | 
			
		||||
string is "abyz", the lack of "d" means that matching doesn't ever start, and
 | 
			
		||||
the callout is never reached. However, with "abyd", though the result is still
 | 
			
		||||
no match, the callout is obeyed.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If the pattern is studied, PCRE knows the minimum length of a matching string,
 | 
			
		||||
and will immediately give a "no match" return without actually running a match
 | 
			
		||||
if the subject is not long enough, or, for unanchored patterns, if it has
 | 
			
		||||
been scanned far enough.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
You can disable these optimizations by passing the PCRE_NO_START_OPTIMIZE
 | 
			
		||||
option to the matching function, or by starting the pattern with
 | 
			
		||||
(*NO_START_OPT). This slows down the matching process, but does ensure that
 | 
			
		||||
callouts such as the example above are obeyed.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
During matching, when PCRE reaches a callout point, the external function
 | 
			
		||||
defined by <i>pcre_callout</i> or <i>pcre[16|32]_callout</i> is called (if it is
 | 
			
		||||
set). This applies to both normal and DFA matching. The only argument to the
 | 
			
		||||
callout function is a pointer to a <b>pcre_callout</b> or
 | 
			
		||||
<b>pcre[16|32]_callout</b> block. These structures contains the following
 | 
			
		||||
fields:
 | 
			
		||||
<pre>
 | 
			
		||||
  int           <i>version</i>;
 | 
			
		||||
  int           <i>callout_number</i>;
 | 
			
		||||
  int          *<i>offset_vector</i>;
 | 
			
		||||
  const char   *<i>subject</i>;           (8-bit version)
 | 
			
		||||
  PCRE_SPTR16   <i>subject</i>;           (16-bit version)
 | 
			
		||||
  PCRE_SPTR32   <i>subject</i>;           (32-bit version)
 | 
			
		||||
  int           <i>subject_length</i>;
 | 
			
		||||
  int           <i>start_match</i>;
 | 
			
		||||
  int           <i>current_position</i>;
 | 
			
		||||
  int           <i>capture_top</i>;
 | 
			
		||||
  int           <i>capture_last</i>;
 | 
			
		||||
  void         *<i>callout_data</i>;
 | 
			
		||||
  int           <i>pattern_position</i>;
 | 
			
		||||
  int           <i>next_item_length</i>;
 | 
			
		||||
  const unsigned char *<i>mark</i>;       (8-bit version)
 | 
			
		||||
  const PCRE_UCHAR16  *<i>mark</i>;       (16-bit version)
 | 
			
		||||
  const PCRE_UCHAR32  *<i>mark</i>;       (32-bit version)
 | 
			
		||||
</pre>
 | 
			
		||||
The <i>version</i> field is an integer containing the version number of the
 | 
			
		||||
block format. The initial version was 0; the current version is 2. The version
 | 
			
		||||
number will change again in future if additional fields are added, but the
 | 
			
		||||
intention is never to remove any of the existing fields.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>callout_number</i> field contains the number of the callout, as compiled
 | 
			
		||||
into the pattern (that is, the number after ?C for manual callouts, and 255 for
 | 
			
		||||
automatically generated callouts).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>offset_vector</i> field is a pointer to the vector of offsets that was
 | 
			
		||||
passed by the caller to the matching function. When <b>pcre_exec()</b> or
 | 
			
		||||
<b>pcre[16|32]_exec()</b> is used, the contents can be inspected, in order to
 | 
			
		||||
extract substrings that have been matched so far, in the same way as for
 | 
			
		||||
extracting substrings after a match has completed. For the DFA matching
 | 
			
		||||
functions, this field is not useful.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
 | 
			
		||||
that were passed to the matching function.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>start_match</i> field normally contains the offset within the subject at
 | 
			
		||||
which the current match attempt started. However, if the escape sequence \K
 | 
			
		||||
has been encountered, this value is changed to reflect the modified starting
 | 
			
		||||
point. If the pattern is not anchored, the callout function may be called
 | 
			
		||||
several times from the same point in the pattern for different starting points
 | 
			
		||||
in the subject.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>current_position</i> field contains the offset within the subject of the
 | 
			
		||||
current match pointer.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
When the <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> is used, the
 | 
			
		||||
<i>capture_top</i> field contains one more than the number of the highest
 | 
			
		||||
numbered captured substring so far. If no substrings have been captured, the
 | 
			
		||||
value of <i>capture_top</i> is one. This is always the case when the DFA
 | 
			
		||||
functions are used, because they do not support captured substrings.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>capture_last</i> field contains the number of the most recently captured
 | 
			
		||||
substring. However, when a recursion exits, the value reverts to what it was
 | 
			
		||||
outside the recursion, as do the values of all captured substrings. If no
 | 
			
		||||
substrings have been captured, the value of <i>capture_last</i> is -1. This is
 | 
			
		||||
always the case for the DFA matching functions.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>callout_data</i> field contains a value that is passed to a matching
 | 
			
		||||
function specifically so that it can be passed back in callouts. It is passed
 | 
			
		||||
in the <i>callout_data</i> field of a <b>pcre_extra</b> or <b>pcre[16|32]_extra</b>
 | 
			
		||||
data structure. If no such data was passed, the value of <i>callout_data</i> in
 | 
			
		||||
a callout block is NULL. There is a description of the <b>pcre_extra</b>
 | 
			
		||||
structure in the
 | 
			
		||||
<a href="pcreapi.html"><b>pcreapi</b></a>
 | 
			
		||||
documentation.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>pattern_position</i> field is present from version 1 of the callout
 | 
			
		||||
structure. It contains the offset to the next item to be matched in the pattern
 | 
			
		||||
string.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>next_item_length</i> field is present from version 1 of the callout
 | 
			
		||||
structure. It contains the length of the next item to be matched in the pattern
 | 
			
		||||
string. When the callout immediately precedes an alternation bar, a closing
 | 
			
		||||
parenthesis, or the end of the pattern, the length is zero. When the callout
 | 
			
		||||
precedes an opening parenthesis, the length is that of the entire subpattern.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
 | 
			
		||||
help in distinguishing between different automatic callouts, which all have the
 | 
			
		||||
same callout number. However, they are set for all callouts.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <i>mark</i> field is present from version 2 of the callout structure. In
 | 
			
		||||
callouts from <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> it contains a
 | 
			
		||||
pointer to the zero-terminated name of the most recently passed (*MARK),
 | 
			
		||||
(*PRUNE), or (*THEN) item in the match, or NULL if no such items have been
 | 
			
		||||
passed. Instances of (*PRUNE) or (*THEN) without a name do not obliterate a
 | 
			
		||||
previous (*MARK). In callouts from the DFA matching functions this field always
 | 
			
		||||
contains NULL.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC5" href="#TOC1">RETURN VALUES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The external callout function returns an integer to PCRE. If the value is zero,
 | 
			
		||||
matching proceeds as normal. If the value is greater than zero, matching fails
 | 
			
		||||
at the current point, but the testing of other matching possibilities goes
 | 
			
		||||
ahead, just as if a lookahead assertion had failed. If the value is less than
 | 
			
		||||
zero, the match is abandoned, the matching function returns the negative value.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Negative values should normally be chosen from the set of PCRE_ERROR_xxx
 | 
			
		||||
values. In particular, PCRE_ERROR_NOMATCH forces a standard "no match" failure.
 | 
			
		||||
The error number PCRE_ERROR_CALLOUT is reserved for use by callout functions;
 | 
			
		||||
it will never be used by PCRE itself.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC6" href="#TOC1">AUTHOR</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Philip Hazel
 | 
			
		||||
<br>
 | 
			
		||||
University Computing Service
 | 
			
		||||
<br>
 | 
			
		||||
Cambridge CB2 3QH, England.
 | 
			
		||||
<br>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 12 November 2013
 | 
			
		||||
<br>
 | 
			
		||||
Copyright © 1997-2013 University of Cambridge.
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										235
									
								
								tools/pcre/doc/html/pcrecompat.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										235
									
								
								tools/pcre/doc/html/pcrecompat.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,235 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcrecompat specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcrecompat man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
DIFFERENCES BETWEEN PCRE AND PERL
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
This document describes the differences in the ways that PCRE and Perl handle
 | 
			
		||||
regular expressions. The differences described here are with respect to Perl
 | 
			
		||||
versions 5.10 and above.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
1. PCRE has only a subset of Perl's Unicode support. Details of what it does
 | 
			
		||||
have are given in the
 | 
			
		||||
<a href="pcreunicode.html"><b>pcreunicode</b></a>
 | 
			
		||||
page.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
2. PCRE allows repeat quantifiers only on parenthesized assertions, but they do
 | 
			
		||||
not mean what you might think. For example, (?!a){3} does not assert that the
 | 
			
		||||
next three characters are not "a". It just asserts that the next character is
 | 
			
		||||
not "a" three times (in principle: PCRE optimizes this to run the assertion
 | 
			
		||||
just once). Perl allows repeat quantifiers on other assertions such as \b, but
 | 
			
		||||
these do not seem to have any use.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
3. Capturing subpatterns that occur inside negative lookahead assertions are
 | 
			
		||||
counted, but their entries in the offsets vector are never set. Perl sometimes
 | 
			
		||||
(but not always) sets its numerical variables from inside negative assertions.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
4. Though binary zero characters are supported in the subject string, they are
 | 
			
		||||
not allowed in a pattern string because it is passed as a normal C string,
 | 
			
		||||
terminated by zero. The escape sequence \0 can be used in the pattern to
 | 
			
		||||
represent a binary zero.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
5. The following Perl escape sequences are not supported: \l, \u, \L,
 | 
			
		||||
\U, and \N when followed by a character name or Unicode value. (\N on its
 | 
			
		||||
own, matching a non-newline character, is supported.) In fact these are
 | 
			
		||||
implemented by Perl's general string-handling and are not part of its pattern
 | 
			
		||||
matching engine. If any of these are encountered by PCRE, an error is
 | 
			
		||||
generated by default. However, if the PCRE_JAVASCRIPT_COMPAT option is set,
 | 
			
		||||
\U and \u are interpreted as JavaScript interprets them.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
6. The Perl escape sequences \p, \P, and \X are supported only if PCRE is
 | 
			
		||||
built with Unicode character property support. The properties that can be
 | 
			
		||||
tested with \p and \P are limited to the general category properties such as
 | 
			
		||||
Lu and Nd, script names such as Greek or Han, and the derived properties Any
 | 
			
		||||
and L&. PCRE does support the Cs (surrogate) property, which Perl does not; the
 | 
			
		||||
Perl documentation says "Because Perl hides the need for the user to understand
 | 
			
		||||
the internal representation of Unicode characters, there is no need to
 | 
			
		||||
implement the somewhat messy concept of surrogates."
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
7. PCRE does support the \Q...\E escape for quoting substrings. Characters in
 | 
			
		||||
between are treated as literals. This is slightly different from Perl in that $
 | 
			
		||||
and @ are also handled as literals inside the quotes. In Perl, they cause
 | 
			
		||||
variable interpolation (but of course PCRE does not have variables). Note the
 | 
			
		||||
following examples:
 | 
			
		||||
<pre>
 | 
			
		||||
    Pattern            PCRE matches      Perl matches
 | 
			
		||||
 | 
			
		||||
    \Qabc$xyz\E        abc$xyz           abc followed by the contents of $xyz
 | 
			
		||||
    \Qabc\$xyz\E       abc\$xyz          abc\$xyz
 | 
			
		||||
    \Qabc\E\$\Qxyz\E   abc$xyz           abc$xyz
 | 
			
		||||
</pre>
 | 
			
		||||
The \Q...\E sequence is recognized both inside and outside character classes.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
8. Fairly obviously, PCRE does not support the (?{code}) and (??{code})
 | 
			
		||||
constructions. However, there is support for recursive patterns. This is not
 | 
			
		||||
available in Perl 5.8, but it is in Perl 5.10. Also, the PCRE "callout"
 | 
			
		||||
feature allows an external function to be called during pattern matching. See
 | 
			
		||||
the
 | 
			
		||||
<a href="pcrecallout.html"><b>pcrecallout</b></a>
 | 
			
		||||
documentation for details.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
9. Subpatterns that are called as subroutines (whether or not recursively) are
 | 
			
		||||
always treated as atomic groups in PCRE. This is like Python, but unlike Perl.
 | 
			
		||||
Captured values that are set outside a subroutine call can be reference from
 | 
			
		||||
inside in PCRE, but not in Perl. There is a discussion that explains these
 | 
			
		||||
differences in more detail in the
 | 
			
		||||
<a href="pcrepattern.html#recursiondifference">section on recursion differences from Perl</a>
 | 
			
		||||
in the
 | 
			
		||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
 | 
			
		||||
page.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
10. If any of the backtracking control verbs are used in a subpattern that is
 | 
			
		||||
called as a subroutine (whether or not recursively), their effect is confined
 | 
			
		||||
to that subpattern; it does not extend to the surrounding pattern. This is not
 | 
			
		||||
always the case in Perl. In particular, if (*THEN) is present in a group that
 | 
			
		||||
is called as a subroutine, its action is limited to that group, even if the
 | 
			
		||||
group does not contain any | characters. Note that such subpatterns are
 | 
			
		||||
processed as anchored at the point where they are tested.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
11. If a pattern contains more than one backtracking control verb, the first
 | 
			
		||||
one that is backtracked onto acts. For example, in the pattern
 | 
			
		||||
A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
 | 
			
		||||
triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
 | 
			
		||||
same as PCRE, but there are examples where it differs.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
12. Most backtracking verbs in assertions have their normal actions. They are
 | 
			
		||||
not confined to the assertion.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
13. There are some differences that are concerned with the settings of captured
 | 
			
		||||
strings when part of a pattern is repeated. For example, matching "aba" against
 | 
			
		||||
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b".
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
14. PCRE's handling of duplicate subpattern numbers and duplicate subpattern
 | 
			
		||||
names is not as general as Perl's. This is a consequence of the fact the PCRE
 | 
			
		||||
works internally just with numbers, using an external table to translate
 | 
			
		||||
between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B),
 | 
			
		||||
where the two capturing parentheses have the same number but different names,
 | 
			
		||||
is not supported, and causes an error at compile time. If it were allowed, it
 | 
			
		||||
would not be possible to distinguish which parentheses matched, because both
 | 
			
		||||
names map to capturing subpattern number 1. To avoid this confusing situation,
 | 
			
		||||
an error is given at compile time.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
15. Perl recognizes comments in some places that PCRE does not, for example,
 | 
			
		||||
between the ( and ? at the start of a subpattern. If the /x modifier is set,
 | 
			
		||||
Perl allows white space between ( and ? (though current Perls warn that this is
 | 
			
		||||
deprecated) but PCRE never does, even if the PCRE_EXTENDED option is set.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
16. Perl, when in warning mode, gives warnings for character classes such as
 | 
			
		||||
[A-\d] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE has no
 | 
			
		||||
warning features, so it gives an error in these cases because they are almost
 | 
			
		||||
certainly user mistakes.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
17. In PCRE, the upper/lower case character properties Lu and Ll are not
 | 
			
		||||
affected when case-independent matching is specified. For example, \p{Lu}
 | 
			
		||||
always matches an upper case letter. I think Perl has changed in this respect;
 | 
			
		||||
in the release at the time of writing (5.16), \p{Lu} and \p{Ll} match all
 | 
			
		||||
letters, regardless of case, when case independence is specified.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
18. PCRE provides some extensions to the Perl regular expression facilities.
 | 
			
		||||
Perl 5.10 includes new features that are not in earlier versions of Perl, some
 | 
			
		||||
of which (such as named parentheses) have been in PCRE for some time. This list
 | 
			
		||||
is with respect to Perl 5.10:
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
(a) Although lookbehind assertions in PCRE must match fixed length strings,
 | 
			
		||||
each alternative branch of a lookbehind assertion can match a different length
 | 
			
		||||
of string. Perl requires them all to have the same length.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
(b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the $
 | 
			
		||||
meta-character matches only at the very end of the string.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
(c) If PCRE_EXTRA is set, a backslash followed by a letter with no special
 | 
			
		||||
meaning is faulted. Otherwise, like Perl, the backslash is quietly ignored.
 | 
			
		||||
(Perl can be made to issue a warning.)
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
(d) If PCRE_UNGREEDY is set, the greediness of the repetition quantifiers is
 | 
			
		||||
inverted, that is, by default they are not greedy, but if followed by a
 | 
			
		||||
question mark they are.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
(e) PCRE_ANCHORED can be used at matching time to force a pattern to be tried
 | 
			
		||||
only at the first matching position in the subject string.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
(f) The PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, and
 | 
			
		||||
PCRE_NO_AUTO_CAPTURE options for <b>pcre_exec()</b> have no Perl equivalents.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
(g) The \R escape sequence can be restricted to match only CR, LF, or CRLF
 | 
			
		||||
by the PCRE_BSR_ANYCRLF option.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
(h) The callout facility is PCRE-specific.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
(i) The partial matching facility is PCRE-specific.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
(j) Patterns compiled by PCRE can be saved and re-used at a later time, even on
 | 
			
		||||
different hosts that have the other endianness. However, this does not apply to
 | 
			
		||||
optimized data created by the just-in-time compiler.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
(k) The alternative matching functions (<b>pcre_dfa_exec()</b>,
 | 
			
		||||
<b>pcre16_dfa_exec()</b> and <b>pcre32_dfa_exec()</b>,) match in a different way
 | 
			
		||||
and are not Perl-compatible.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
(l) PCRE recognizes some special sequences such as (*CR) at the start of
 | 
			
		||||
a pattern that set overall options that cannot be changed within the pattern.
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
AUTHOR
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
Philip Hazel
 | 
			
		||||
<br>
 | 
			
		||||
University Computing Service
 | 
			
		||||
<br>
 | 
			
		||||
Cambridge CB2 3QH, England.
 | 
			
		||||
<br>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
REVISION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 10 November 2013
 | 
			
		||||
<br>
 | 
			
		||||
Copyright © 1997-2013 University of Cambridge.
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										368
									
								
								tools/pcre/doc/html/pcrecpp.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										368
									
								
								tools/pcre/doc/html/pcrecpp.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,368 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcrecpp specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcrecpp man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<ul>
 | 
			
		||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS OF C++ WRAPPER</a>
 | 
			
		||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
 | 
			
		||||
<li><a name="TOC3" href="#SEC3">MATCHING INTERFACE</a>
 | 
			
		||||
<li><a name="TOC4" href="#SEC4">QUOTING METACHARACTERS</a>
 | 
			
		||||
<li><a name="TOC5" href="#SEC5">PARTIAL MATCHES</a>
 | 
			
		||||
<li><a name="TOC6" href="#SEC6">UTF-8 AND THE MATCHING INTERFACE</a>
 | 
			
		||||
<li><a name="TOC7" href="#SEC7">PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE</a>
 | 
			
		||||
<li><a name="TOC8" href="#SEC8">SCANNING TEXT INCREMENTALLY</a>
 | 
			
		||||
<li><a name="TOC9" href="#SEC9">PARSING HEX/OCTAL/C-RADIX NUMBERS</a>
 | 
			
		||||
<li><a name="TOC10" href="#SEC10">REPLACING PARTS OF STRINGS</a>
 | 
			
		||||
<li><a name="TOC11" href="#SEC11">AUTHOR</a>
 | 
			
		||||
<li><a name="TOC12" href="#SEC12">REVISION</a>
 | 
			
		||||
</ul>
 | 
			
		||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS OF C++ WRAPPER</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>#include <pcrecpp.h></b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The C++ wrapper for PCRE was provided by Google Inc. Some additional
 | 
			
		||||
functionality was added by Giuseppe Maxia. This brief man page was constructed
 | 
			
		||||
from the notes in the <i>pcrecpp.h</i> file, which should be consulted for
 | 
			
		||||
further details. Note that the C++ wrapper supports only the original 8-bit
 | 
			
		||||
PCRE library. There is no 16-bit or 32-bit support at present.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC3" href="#TOC1">MATCHING INTERFACE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The "FullMatch" operation checks that supplied text matches a supplied pattern
 | 
			
		||||
exactly. If pointer arguments are supplied, it copies matched sub-strings that
 | 
			
		||||
match sub-patterns into them.
 | 
			
		||||
<pre>
 | 
			
		||||
  Example: successful match
 | 
			
		||||
     pcrecpp::RE re("h.*o");
 | 
			
		||||
     re.FullMatch("hello");
 | 
			
		||||
 | 
			
		||||
  Example: unsuccessful match (requires full match):
 | 
			
		||||
     pcrecpp::RE re("e");
 | 
			
		||||
     !re.FullMatch("hello");
 | 
			
		||||
 | 
			
		||||
  Example: creating a temporary RE object:
 | 
			
		||||
     pcrecpp::RE("h.*o").FullMatch("hello");
 | 
			
		||||
</pre>
 | 
			
		||||
You can pass in a "const char*" or a "string" for "text". The examples below
 | 
			
		||||
tend to use a const char*. You can, as in the different examples above, store
 | 
			
		||||
the RE object explicitly in a variable or use a temporary RE object. The
 | 
			
		||||
examples below use one mode or the other arbitrarily. Either could correctly be
 | 
			
		||||
used for any of these examples.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
You must supply extra pointer arguments to extract matched subpieces.
 | 
			
		||||
<pre>
 | 
			
		||||
  Example: extracts "ruby" into "s" and 1234 into "i"
 | 
			
		||||
     int i;
 | 
			
		||||
     string s;
 | 
			
		||||
     pcrecpp::RE re("(\\w+):(\\d+)");
 | 
			
		||||
     re.FullMatch("ruby:1234", &s, &i);
 | 
			
		||||
 | 
			
		||||
  Example: does not try to extract any extra sub-patterns
 | 
			
		||||
     re.FullMatch("ruby:1234", &s);
 | 
			
		||||
 | 
			
		||||
  Example: does not try to extract into NULL
 | 
			
		||||
     re.FullMatch("ruby:1234", NULL, &i);
 | 
			
		||||
 | 
			
		||||
  Example: integer overflow causes failure
 | 
			
		||||
     !re.FullMatch("ruby:1234567891234", NULL, &i);
 | 
			
		||||
 | 
			
		||||
  Example: fails because there aren't enough sub-patterns:
 | 
			
		||||
     !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);
 | 
			
		||||
 | 
			
		||||
  Example: fails because string cannot be stored in integer
 | 
			
		||||
     !pcrecpp::RE("(.*)").FullMatch("ruby", &i);
 | 
			
		||||
</pre>
 | 
			
		||||
The provided pointer arguments can be pointers to any scalar numeric
 | 
			
		||||
type, or one of:
 | 
			
		||||
<pre>
 | 
			
		||||
   string        (matched piece is copied to string)
 | 
			
		||||
   StringPiece   (StringPiece is mutated to point to matched piece)
 | 
			
		||||
   T             (where "bool T::ParseFrom(const char*, int)" exists)
 | 
			
		||||
   NULL          (the corresponding matched sub-pattern is not copied)
 | 
			
		||||
</pre>
 | 
			
		||||
The function returns true iff all of the following conditions are satisfied:
 | 
			
		||||
<pre>
 | 
			
		||||
  a. "text" matches "pattern" exactly;
 | 
			
		||||
 | 
			
		||||
  b. The number of matched sub-patterns is >= number of supplied
 | 
			
		||||
     pointers;
 | 
			
		||||
 | 
			
		||||
  c. The "i"th argument has a suitable type for holding the
 | 
			
		||||
     string captured as the "i"th sub-pattern. If you pass in
 | 
			
		||||
     void * NULL for the "i"th argument, or a non-void * NULL
 | 
			
		||||
     of the correct type, or pass fewer arguments than the
 | 
			
		||||
     number of sub-patterns, "i"th captured sub-pattern is
 | 
			
		||||
     ignored.
 | 
			
		||||
</pre>
 | 
			
		||||
CAVEAT: An optional sub-pattern that does not exist in the matched
 | 
			
		||||
string is assigned the empty string. Therefore, the following will
 | 
			
		||||
return false (because the empty string is not a valid number):
 | 
			
		||||
<pre>
 | 
			
		||||
   int number;
 | 
			
		||||
   pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
 | 
			
		||||
</pre>
 | 
			
		||||
The matching interface supports at most 16 arguments per call.
 | 
			
		||||
If you need more, consider using the more general interface
 | 
			
		||||
<b>pcrecpp::RE::DoMatch</b>. See <b>pcrecpp.h</b> for the signature for
 | 
			
		||||
<b>DoMatch</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
NOTE: Do not use <b>no_arg</b>, which is used internally to mark the end of a
 | 
			
		||||
list of optional arguments, as a placeholder for missing arguments, as this can
 | 
			
		||||
lead to segfaults.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC4" href="#TOC1">QUOTING METACHARACTERS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
You can use the "QuoteMeta" operation to insert backslashes before all
 | 
			
		||||
potentially meaningful characters in a string. The returned string, used as a
 | 
			
		||||
regular expression, will exactly match the original string.
 | 
			
		||||
<pre>
 | 
			
		||||
  Example:
 | 
			
		||||
     string quoted = RE::QuoteMeta(unquoted);
 | 
			
		||||
</pre>
 | 
			
		||||
Note that it's legal to escape a character even if it has no special meaning in
 | 
			
		||||
a regular expression -- so this function does that. (This also makes it
 | 
			
		||||
identical to the perl function of the same name; see "perldoc -f quotemeta".)
 | 
			
		||||
For example, "1.5-2.0?" becomes "1\.5\-2\.0\?".
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC5" href="#TOC1">PARTIAL MATCHES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
You can use the "PartialMatch" operation when you want the pattern
 | 
			
		||||
to match any substring of the text.
 | 
			
		||||
<pre>
 | 
			
		||||
  Example: simple search for a string:
 | 
			
		||||
     pcrecpp::RE("ell").PartialMatch("hello");
 | 
			
		||||
 | 
			
		||||
  Example: find first number in a string:
 | 
			
		||||
     int number;
 | 
			
		||||
     pcrecpp::RE re("(\\d+)");
 | 
			
		||||
     re.PartialMatch("x*100 + 20", &number);
 | 
			
		||||
     assert(number == 100);
 | 
			
		||||
</PRE>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC6" href="#TOC1">UTF-8 AND THE MATCHING INTERFACE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
By default, pattern and text are plain text, one byte per character. The UTF8
 | 
			
		||||
flag, passed to the constructor, causes both pattern and string to be treated
 | 
			
		||||
as UTF-8 text, still a byte stream but potentially multiple bytes per
 | 
			
		||||
character. In practice, the text is likelier to be UTF-8 than the pattern, but
 | 
			
		||||
the match returned may depend on the UTF8 flag, so always use it when matching
 | 
			
		||||
UTF8 text. For example, "." will match one byte normally but with UTF8 set may
 | 
			
		||||
match up to three bytes of a multi-byte character.
 | 
			
		||||
<pre>
 | 
			
		||||
  Example:
 | 
			
		||||
     pcrecpp::RE_Options options;
 | 
			
		||||
     options.set_utf8();
 | 
			
		||||
     pcrecpp::RE re(utf8_pattern, options);
 | 
			
		||||
     re.FullMatch(utf8_string);
 | 
			
		||||
 | 
			
		||||
  Example: using the convenience function UTF8():
 | 
			
		||||
     pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
 | 
			
		||||
     re.FullMatch(utf8_string);
 | 
			
		||||
</pre>
 | 
			
		||||
NOTE: The UTF8 flag is ignored if pcre was not configured with the
 | 
			
		||||
<pre>
 | 
			
		||||
      --enable-utf8 flag.
 | 
			
		||||
</PRE>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC7" href="#TOC1">PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
PCRE defines some modifiers to change the behavior of the regular expression
 | 
			
		||||
engine. The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle to
 | 
			
		||||
pass such modifiers to a RE class. Currently, the following modifiers are
 | 
			
		||||
supported:
 | 
			
		||||
<pre>
 | 
			
		||||
   modifier              description               Perl corresponding
 | 
			
		||||
 | 
			
		||||
   PCRE_CASELESS         case insensitive match      /i
 | 
			
		||||
   PCRE_MULTILINE        multiple lines match        /m
 | 
			
		||||
   PCRE_DOTALL           dot matches newlines        /s
 | 
			
		||||
   PCRE_DOLLAR_ENDONLY   $ matches only at end       N/A
 | 
			
		||||
   PCRE_EXTRA            strict escape parsing       N/A
 | 
			
		||||
   PCRE_EXTENDED         ignore white spaces         /x
 | 
			
		||||
   PCRE_UTF8             handles UTF8 chars          built-in
 | 
			
		||||
   PCRE_UNGREEDY         reverses * and *?           N/A
 | 
			
		||||
   PCRE_NO_AUTO_CAPTURE  disables capturing parens   N/A (*)
 | 
			
		||||
</pre>
 | 
			
		||||
(*) Both Perl and PCRE allow non capturing parentheses by means of the
 | 
			
		||||
"?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
 | 
			
		||||
capture, while (ab|cd) does.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
For a full account on how each modifier works, please check the
 | 
			
		||||
PCRE API reference page.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
For each modifier, there are two member functions whose name is made
 | 
			
		||||
out of the modifier in lowercase, without the "PCRE_" prefix. For
 | 
			
		||||
instance, PCRE_CASELESS is handled by
 | 
			
		||||
<pre>
 | 
			
		||||
  bool caseless()
 | 
			
		||||
</pre>
 | 
			
		||||
which returns true if the modifier is set, and
 | 
			
		||||
<pre>
 | 
			
		||||
  RE_Options & set_caseless(bool)
 | 
			
		||||
</pre>
 | 
			
		||||
which sets or unsets the modifier. Moreover, PCRE_EXTRA_MATCH_LIMIT can be
 | 
			
		||||
accessed through the <b>set_match_limit()</b> and <b>match_limit()</b> member
 | 
			
		||||
functions. Setting <i>match_limit</i> to a non-zero value will limit the
 | 
			
		||||
execution of pcre to keep it from doing bad things like blowing the stack or
 | 
			
		||||
taking an eternity to return a result. A value of 5000 is good enough to stop
 | 
			
		||||
stack blowup in a 2MB thread stack. Setting <i>match_limit</i> to zero disables
 | 
			
		||||
match limiting. Alternatively, you can call <b>match_limit_recursion()</b>
 | 
			
		||||
which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much PCRE
 | 
			
		||||
recurses. <b>match_limit()</b> limits the number of matches PCRE does;
 | 
			
		||||
<b>match_limit_recursion()</b> limits the depth of internal recursion, and
 | 
			
		||||
therefore the amount of stack that is used.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Normally, to pass one or more modifiers to a RE class, you declare
 | 
			
		||||
a <i>RE_Options</i> object, set the appropriate options, and pass this
 | 
			
		||||
object to a RE constructor. Example:
 | 
			
		||||
<pre>
 | 
			
		||||
   RE_Options opt;
 | 
			
		||||
   opt.set_caseless(true);
 | 
			
		||||
   if (RE("HELLO", opt).PartialMatch("hello world")) ...
 | 
			
		||||
</pre>
 | 
			
		||||
RE_options has two constructors. The default constructor takes no arguments and
 | 
			
		||||
creates a set of flags that are off by default. The optional parameter
 | 
			
		||||
<i>option_flags</i> is to facilitate transfer of legacy code from C programs.
 | 
			
		||||
This lets you do
 | 
			
		||||
<pre>
 | 
			
		||||
   RE(pattern,
 | 
			
		||||
     RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
 | 
			
		||||
</pre>
 | 
			
		||||
However, new code is better off doing
 | 
			
		||||
<pre>
 | 
			
		||||
   RE(pattern,
 | 
			
		||||
     RE_Options().set_caseless(true).set_multiline(true))
 | 
			
		||||
       .PartialMatch(str);
 | 
			
		||||
</pre>
 | 
			
		||||
If you are going to pass one of the most used modifiers, there are some
 | 
			
		||||
convenience functions that return a RE_Options class with the
 | 
			
		||||
appropriate modifier already set: <b>CASELESS()</b>, <b>UTF8()</b>,
 | 
			
		||||
<b>MULTILINE()</b>, <b>DOTALL</b>(), and <b>EXTENDED()</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If you need to set several options at once, and you don't want to go through
 | 
			
		||||
the pains of declaring a RE_Options object and setting several options, there
 | 
			
		||||
is a parallel method that give you such ability on the fly. You can concatenate
 | 
			
		||||
several <b>set_xxxxx()</b> member functions, since each of them returns a
 | 
			
		||||
reference to its class object. For example, to pass PCRE_CASELESS,
 | 
			
		||||
PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one statement, you may write:
 | 
			
		||||
<pre>
 | 
			
		||||
   RE(" ^ xyz \\s+ .* blah$",
 | 
			
		||||
     RE_Options()
 | 
			
		||||
       .set_caseless(true)
 | 
			
		||||
       .set_extended(true)
 | 
			
		||||
       .set_multiline(true)).PartialMatch(sometext);
 | 
			
		||||
 | 
			
		||||
</PRE>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC8" href="#TOC1">SCANNING TEXT INCREMENTALLY</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The "Consume" operation may be useful if you want to repeatedly
 | 
			
		||||
match regular expressions at the front of a string and skip over
 | 
			
		||||
them as they match. This requires use of the "StringPiece" type,
 | 
			
		||||
which represents a sub-range of a real string. Like RE, StringPiece
 | 
			
		||||
is defined in the pcrecpp namespace.
 | 
			
		||||
<pre>
 | 
			
		||||
  Example: read lines of the form "var = value" from a string.
 | 
			
		||||
     string contents = ...;                 // Fill string somehow
 | 
			
		||||
     pcrecpp::StringPiece input(contents);  // Wrap in a StringPiece
 | 
			
		||||
 | 
			
		||||
     string var;
 | 
			
		||||
     int value;
 | 
			
		||||
     pcrecpp::RE re("(\\w+) = (\\d+)\n");
 | 
			
		||||
     while (re.Consume(&input, &var, &value)) {
 | 
			
		||||
       ...;
 | 
			
		||||
     }
 | 
			
		||||
</pre>
 | 
			
		||||
Each successful call to "Consume" will set "var/value", and also
 | 
			
		||||
advance "input" so it points past the matched text.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The "FindAndConsume" operation is similar to "Consume" but does not
 | 
			
		||||
anchor your match at the beginning of the string. For example, you
 | 
			
		||||
could extract all words from a string by repeatedly calling
 | 
			
		||||
<pre>
 | 
			
		||||
  pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
 | 
			
		||||
</PRE>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC9" href="#TOC1">PARSING HEX/OCTAL/C-RADIX NUMBERS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
By default, if you pass a pointer to a numeric value, the
 | 
			
		||||
corresponding text is interpreted as a base-10 number. You can
 | 
			
		||||
instead wrap the pointer with a call to one of the operators Hex(),
 | 
			
		||||
Octal(), or CRadix() to interpret the text in another base. The
 | 
			
		||||
CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
 | 
			
		||||
prefixes, but defaults to base-10.
 | 
			
		||||
<pre>
 | 
			
		||||
  Example:
 | 
			
		||||
    int a, b, c, d;
 | 
			
		||||
    pcrecpp::RE re("(.*) (.*) (.*) (.*)");
 | 
			
		||||
    re.FullMatch("100 40 0100 0x40",
 | 
			
		||||
                 pcrecpp::Octal(&a), pcrecpp::Hex(&b),
 | 
			
		||||
                 pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));
 | 
			
		||||
</pre>
 | 
			
		||||
will leave 64 in a, b, c, and d.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC10" href="#TOC1">REPLACING PARTS OF STRINGS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
You can replace the first match of "pattern" in "str" with "rewrite".
 | 
			
		||||
Within "rewrite", backslash-escaped digits (\1 to \9) can be
 | 
			
		||||
used to insert text matching corresponding parenthesized group
 | 
			
		||||
from the pattern. \0 in "rewrite" refers to the entire matching
 | 
			
		||||
text. For example:
 | 
			
		||||
<pre>
 | 
			
		||||
  string s = "yabba dabba doo";
 | 
			
		||||
  pcrecpp::RE("b+").Replace("d", &s);
 | 
			
		||||
</pre>
 | 
			
		||||
will leave "s" containing "yada dabba doo". The result is true if the pattern
 | 
			
		||||
matches and a replacement occurs, false otherwise.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>GlobalReplace</b> is like <b>Replace</b> except that it replaces all
 | 
			
		||||
occurrences of the pattern in the string with the rewrite. Replacements are
 | 
			
		||||
not subject to re-matching. For example:
 | 
			
		||||
<pre>
 | 
			
		||||
  string s = "yabba dabba doo";
 | 
			
		||||
  pcrecpp::RE("b+").GlobalReplace("d", &s);
 | 
			
		||||
</pre>
 | 
			
		||||
will leave "s" containing "yada dada doo". It returns the number of
 | 
			
		||||
replacements made.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>Extract</b> is like <b>Replace</b>, except that if the pattern matches,
 | 
			
		||||
"rewrite" is copied into "out" (an additional argument) with substitutions.
 | 
			
		||||
The non-matching portions of "text" are ignored. Returns true iff a match
 | 
			
		||||
occurred and the extraction happened successfully;  if no match occurs, the
 | 
			
		||||
string is left unaffected.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC11" href="#TOC1">AUTHOR</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The C++ wrapper was contributed by Google Inc.
 | 
			
		||||
<br>
 | 
			
		||||
Copyright © 2007 Google Inc.
 | 
			
		||||
<br>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC12" href="#TOC1">REVISION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 08 January 2012
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										426
									
								
								tools/pcre/doc/html/pcredemo.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										426
									
								
								tools/pcre/doc/html/pcredemo.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,426 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcredemo specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcredemo man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<ul>
 | 
			
		||||
</ul>
 | 
			
		||||
<PRE>
 | 
			
		||||
/*************************************************
 | 
			
		||||
*           PCRE DEMONSTRATION PROGRAM           *
 | 
			
		||||
*************************************************/
 | 
			
		||||
 | 
			
		||||
/* This is a demonstration program to illustrate the most straightforward ways
 | 
			
		||||
of calling the PCRE regular expression library from a C program. See the
 | 
			
		||||
pcresample documentation for a short discussion ("man pcresample" if you have
 | 
			
		||||
the PCRE man pages installed).
 | 
			
		||||
 | 
			
		||||
In Unix-like environments, if PCRE is installed in your standard system
 | 
			
		||||
libraries, you should be able to compile this program using this command:
 | 
			
		||||
 | 
			
		||||
gcc -Wall pcredemo.c -lpcre -o pcredemo
 | 
			
		||||
 | 
			
		||||
If PCRE is not installed in a standard place, it is likely to be installed with
 | 
			
		||||
support for the pkg-config mechanism. If you have pkg-config, you can compile
 | 
			
		||||
this program using this command:
 | 
			
		||||
 | 
			
		||||
gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
 | 
			
		||||
 | 
			
		||||
If you do not have pkg-config, you may have to use this:
 | 
			
		||||
 | 
			
		||||
gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
 | 
			
		||||
  -R/usr/local/lib -lpcre -o pcredemo
 | 
			
		||||
 | 
			
		||||
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
 | 
			
		||||
library files for PCRE are installed on your system. Only some operating
 | 
			
		||||
systems (e.g. Solaris) use the -R option.
 | 
			
		||||
 | 
			
		||||
Building under Windows:
 | 
			
		||||
 | 
			
		||||
If you want to statically link this program against a non-dll .a file, you must
 | 
			
		||||
define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
 | 
			
		||||
pcre_free() exported functions will be declared __declspec(dllimport), with
 | 
			
		||||
unwanted results. So in this environment, uncomment the following line. */
 | 
			
		||||
 | 
			
		||||
/* #define PCRE_STATIC */
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <pcre.h>
 | 
			
		||||
 | 
			
		||||
#define OVECCOUNT 30    /* should be a multiple of 3 */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int main(int argc, char **argv)
 | 
			
		||||
{
 | 
			
		||||
pcre *re;
 | 
			
		||||
const char *error;
 | 
			
		||||
char *pattern;
 | 
			
		||||
char *subject;
 | 
			
		||||
unsigned char *name_table;
 | 
			
		||||
unsigned int option_bits;
 | 
			
		||||
int erroffset;
 | 
			
		||||
int find_all;
 | 
			
		||||
int crlf_is_newline;
 | 
			
		||||
int namecount;
 | 
			
		||||
int name_entry_size;
 | 
			
		||||
int ovector[OVECCOUNT];
 | 
			
		||||
int subject_length;
 | 
			
		||||
int rc, i;
 | 
			
		||||
int utf8;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/**************************************************************************
 | 
			
		||||
* First, sort out the command line. There is only one possible option at  *
 | 
			
		||||
* the moment, "-g" to request repeated matching to find all occurrences,  *
 | 
			
		||||
* like Perl's /g option. We set the variable find_all to a non-zero value *
 | 
			
		||||
* if the -g option is present. Apart from that, there must be exactly two *
 | 
			
		||||
* arguments.                                                              *
 | 
			
		||||
**************************************************************************/
 | 
			
		||||
 | 
			
		||||
find_all = 0;
 | 
			
		||||
for (i = 1; i < argc; i++)
 | 
			
		||||
  {
 | 
			
		||||
  if (strcmp(argv[i], "-g") == 0) find_all = 1;
 | 
			
		||||
    else break;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
/* After the options, we require exactly two arguments, which are the pattern,
 | 
			
		||||
and the subject string. */
 | 
			
		||||
 | 
			
		||||
if (argc - i != 2)
 | 
			
		||||
  {
 | 
			
		||||
  printf("Two arguments required: a regex and a subject string\n");
 | 
			
		||||
  return 1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
pattern = argv[i];
 | 
			
		||||
subject = argv[i+1];
 | 
			
		||||
subject_length = (int)strlen(subject);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*************************************************************************
 | 
			
		||||
* Now we are going to compile the regular expression pattern, and handle *
 | 
			
		||||
* and errors that are detected.                                          *
 | 
			
		||||
*************************************************************************/
 | 
			
		||||
 | 
			
		||||
re = pcre_compile(
 | 
			
		||||
  pattern,              /* the pattern */
 | 
			
		||||
  0,                    /* default options */
 | 
			
		||||
  &error,               /* for error message */
 | 
			
		||||
  &erroffset,           /* for error offset */
 | 
			
		||||
  NULL);                /* use default character tables */
 | 
			
		||||
 | 
			
		||||
/* Compilation failed: print the error message and exit */
 | 
			
		||||
 | 
			
		||||
if (re == NULL)
 | 
			
		||||
  {
 | 
			
		||||
  printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
 | 
			
		||||
  return 1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*************************************************************************
 | 
			
		||||
* If the compilation succeeded, we call PCRE again, in order to do a     *
 | 
			
		||||
* pattern match against the subject string. This does just ONE match. If *
 | 
			
		||||
* further matching is needed, it will be done below.                     *
 | 
			
		||||
*************************************************************************/
 | 
			
		||||
 | 
			
		||||
rc = pcre_exec(
 | 
			
		||||
  re,                   /* the compiled pattern */
 | 
			
		||||
  NULL,                 /* no extra data - we didn't study the pattern */
 | 
			
		||||
  subject,              /* the subject string */
 | 
			
		||||
  subject_length,       /* the length of the subject */
 | 
			
		||||
  0,                    /* start at offset 0 in the subject */
 | 
			
		||||
  0,                    /* default options */
 | 
			
		||||
  ovector,              /* output vector for substring information */
 | 
			
		||||
  OVECCOUNT);           /* number of elements in the output vector */
 | 
			
		||||
 | 
			
		||||
/* Matching failed: handle error cases */
 | 
			
		||||
 | 
			
		||||
if (rc < 0)
 | 
			
		||||
  {
 | 
			
		||||
  switch(rc)
 | 
			
		||||
    {
 | 
			
		||||
    case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
 | 
			
		||||
    /*
 | 
			
		||||
    Handle other special cases if you like
 | 
			
		||||
    */
 | 
			
		||||
    default: printf("Matching error %d\n", rc); break;
 | 
			
		||||
    }
 | 
			
		||||
  pcre_free(re);     /* Release memory used for the compiled pattern */
 | 
			
		||||
  return 1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
/* Match succeded */
 | 
			
		||||
 | 
			
		||||
printf("\nMatch succeeded at offset %d\n", ovector[0]);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*************************************************************************
 | 
			
		||||
* We have found the first match within the subject string. If the output *
 | 
			
		||||
* vector wasn't big enough, say so. Then output any substrings that were *
 | 
			
		||||
* captured.                                                              *
 | 
			
		||||
*************************************************************************/
 | 
			
		||||
 | 
			
		||||
/* The output vector wasn't big enough */
 | 
			
		||||
 | 
			
		||||
if (rc == 0)
 | 
			
		||||
  {
 | 
			
		||||
  rc = OVECCOUNT/3;
 | 
			
		||||
  printf("ovector only has room for %d captured substrings\n", rc - 1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
/* Show substrings stored in the output vector by number. Obviously, in a real
 | 
			
		||||
application you might want to do things other than print them. */
 | 
			
		||||
 | 
			
		||||
for (i = 0; i < rc; i++)
 | 
			
		||||
  {
 | 
			
		||||
  char *substring_start = subject + ovector[2*i];
 | 
			
		||||
  int substring_length = ovector[2*i+1] - ovector[2*i];
 | 
			
		||||
  printf("%2d: %.*s\n", i, substring_length, substring_start);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/**************************************************************************
 | 
			
		||||
* That concludes the basic part of this demonstration program. We have    *
 | 
			
		||||
* compiled a pattern, and performed a single match. The code that follows *
 | 
			
		||||
* shows first how to access named substrings, and then how to code for    *
 | 
			
		||||
* repeated matches on the same subject.                                   *
 | 
			
		||||
**************************************************************************/
 | 
			
		||||
 | 
			
		||||
/* See if there are any named substrings, and if so, show them by name. First
 | 
			
		||||
we have to extract the count of named parentheses from the pattern. */
 | 
			
		||||
 | 
			
		||||
(void)pcre_fullinfo(
 | 
			
		||||
  re,                   /* the compiled pattern */
 | 
			
		||||
  NULL,                 /* no extra data - we didn't study the pattern */
 | 
			
		||||
  PCRE_INFO_NAMECOUNT,  /* number of named substrings */
 | 
			
		||||
  &namecount);          /* where to put the answer */
 | 
			
		||||
 | 
			
		||||
if (namecount <= 0) printf("No named substrings\n"); else
 | 
			
		||||
  {
 | 
			
		||||
  unsigned char *tabptr;
 | 
			
		||||
  printf("Named substrings\n");
 | 
			
		||||
 | 
			
		||||
  /* Before we can access the substrings, we must extract the table for
 | 
			
		||||
  translating names to numbers, and the size of each entry in the table. */
 | 
			
		||||
 | 
			
		||||
  (void)pcre_fullinfo(
 | 
			
		||||
    re,                       /* the compiled pattern */
 | 
			
		||||
    NULL,                     /* no extra data - we didn't study the pattern */
 | 
			
		||||
    PCRE_INFO_NAMETABLE,      /* address of the table */
 | 
			
		||||
    &name_table);             /* where to put the answer */
 | 
			
		||||
 | 
			
		||||
  (void)pcre_fullinfo(
 | 
			
		||||
    re,                       /* the compiled pattern */
 | 
			
		||||
    NULL,                     /* no extra data - we didn't study the pattern */
 | 
			
		||||
    PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
 | 
			
		||||
    &name_entry_size);        /* where to put the answer */
 | 
			
		||||
 | 
			
		||||
  /* Now we can scan the table and, for each entry, print the number, the name,
 | 
			
		||||
  and the substring itself. */
 | 
			
		||||
 | 
			
		||||
  tabptr = name_table;
 | 
			
		||||
  for (i = 0; i < namecount; i++)
 | 
			
		||||
    {
 | 
			
		||||
    int n = (tabptr[0] << 8) | tabptr[1];
 | 
			
		||||
    printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
 | 
			
		||||
      ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
 | 
			
		||||
    tabptr += name_entry_size;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*************************************************************************
 | 
			
		||||
* If the "-g" option was given on the command line, we want to continue  *
 | 
			
		||||
* to search for additional matches in the subject string, in a similar   *
 | 
			
		||||
* way to the /g option in Perl. This turns out to be trickier than you   *
 | 
			
		||||
* might think because of the possibility of matching an empty string.    *
 | 
			
		||||
* What happens is as follows:                                            *
 | 
			
		||||
*                                                                        *
 | 
			
		||||
* If the previous match was NOT for an empty string, we can just start   *
 | 
			
		||||
* the next match at the end of the previous one.                         *
 | 
			
		||||
*                                                                        *
 | 
			
		||||
* If the previous match WAS for an empty string, we can't do that, as it *
 | 
			
		||||
* would lead to an infinite loop. Instead, a special call of pcre_exec() *
 | 
			
		||||
* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set.    *
 | 
			
		||||
* The first of these tells PCRE that an empty string at the start of the *
 | 
			
		||||
* subject is not a valid match; other possibilities must be tried. The   *
 | 
			
		||||
* second flag restricts PCRE to one match attempt at the initial string  *
 | 
			
		||||
* position. If this match succeeds, an alternative to the empty string   *
 | 
			
		||||
* match has been found, and we can print it and proceed round the loop,  *
 | 
			
		||||
* advancing by the length of whatever was found. If this match does not  *
 | 
			
		||||
* succeed, we still stay in the loop, advancing by just one character.   *
 | 
			
		||||
* In UTF-8 mode, which can be set by (*UTF8) in the pattern, this may be *
 | 
			
		||||
* more than one byte.                                                    *
 | 
			
		||||
*                                                                        *
 | 
			
		||||
* However, there is a complication concerned with newlines. When the     *
 | 
			
		||||
* newline convention is such that CRLF is a valid newline, we must       *
 | 
			
		||||
* advance by two characters rather than one. The newline convention can  *
 | 
			
		||||
* be set in the regex by (*CR), etc.; if not, we must find the default.  *
 | 
			
		||||
*************************************************************************/
 | 
			
		||||
 | 
			
		||||
if (!find_all)     /* Check for -g */
 | 
			
		||||
  {
 | 
			
		||||
  pcre_free(re);   /* Release the memory used for the compiled pattern */
 | 
			
		||||
  return 0;        /* Finish unless -g was given */
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
 | 
			
		||||
sequence. First, find the options with which the regex was compiled; extract
 | 
			
		||||
the UTF-8 state, and mask off all but the newline options. */
 | 
			
		||||
 | 
			
		||||
(void)pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &option_bits);
 | 
			
		||||
utf8 = option_bits & PCRE_UTF8;
 | 
			
		||||
option_bits &= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_CRLF|
 | 
			
		||||
               PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF;
 | 
			
		||||
 | 
			
		||||
/* If no newline options were set, find the default newline convention from the
 | 
			
		||||
build configuration. */
 | 
			
		||||
 | 
			
		||||
if (option_bits == 0)
 | 
			
		||||
  {
 | 
			
		||||
  int d;
 | 
			
		||||
  (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
 | 
			
		||||
  /* Note that these values are always the ASCII ones, even in
 | 
			
		||||
  EBCDIC environments. CR = 13, NL = 10. */
 | 
			
		||||
  option_bits = (d == 13)? PCRE_NEWLINE_CR :
 | 
			
		||||
          (d == 10)? PCRE_NEWLINE_LF :
 | 
			
		||||
          (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
 | 
			
		||||
          (d == -2)? PCRE_NEWLINE_ANYCRLF :
 | 
			
		||||
          (d == -1)? PCRE_NEWLINE_ANY : 0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
/* See if CRLF is a valid newline sequence. */
 | 
			
		||||
 | 
			
		||||
crlf_is_newline =
 | 
			
		||||
     option_bits == PCRE_NEWLINE_ANY ||
 | 
			
		||||
     option_bits == PCRE_NEWLINE_CRLF ||
 | 
			
		||||
     option_bits == PCRE_NEWLINE_ANYCRLF;
 | 
			
		||||
 | 
			
		||||
/* Loop for second and subsequent matches */
 | 
			
		||||
 | 
			
		||||
for (;;)
 | 
			
		||||
  {
 | 
			
		||||
  int options = 0;                 /* Normally no options */
 | 
			
		||||
  int start_offset = ovector[1];   /* Start at end of previous match */
 | 
			
		||||
 | 
			
		||||
  /* If the previous match was for an empty string, we are finished if we are
 | 
			
		||||
  at the end of the subject. Otherwise, arrange to run another match at the
 | 
			
		||||
  same point to see if a non-empty match can be found. */
 | 
			
		||||
 | 
			
		||||
  if (ovector[0] == ovector[1])
 | 
			
		||||
    {
 | 
			
		||||
    if (ovector[0] == subject_length) break;
 | 
			
		||||
    options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  /* Run the next matching operation */
 | 
			
		||||
 | 
			
		||||
  rc = pcre_exec(
 | 
			
		||||
    re,                   /* the compiled pattern */
 | 
			
		||||
    NULL,                 /* no extra data - we didn't study the pattern */
 | 
			
		||||
    subject,              /* the subject string */
 | 
			
		||||
    subject_length,       /* the length of the subject */
 | 
			
		||||
    start_offset,         /* starting offset in the subject */
 | 
			
		||||
    options,              /* options */
 | 
			
		||||
    ovector,              /* output vector for substring information */
 | 
			
		||||
    OVECCOUNT);           /* number of elements in the output vector */
 | 
			
		||||
 | 
			
		||||
  /* This time, a result of NOMATCH isn't an error. If the value in "options"
 | 
			
		||||
  is zero, it just means we have found all possible matches, so the loop ends.
 | 
			
		||||
  Otherwise, it means we have failed to find a non-empty-string match at a
 | 
			
		||||
  point where there was a previous empty-string match. In this case, we do what
 | 
			
		||||
  Perl does: advance the matching position by one character, and continue. We
 | 
			
		||||
  do this by setting the "end of previous match" offset, because that is picked
 | 
			
		||||
  up at the top of the loop as the point at which to start again.
 | 
			
		||||
 | 
			
		||||
  There are two complications: (a) When CRLF is a valid newline sequence, and
 | 
			
		||||
  the current position is just before it, advance by an extra byte. (b)
 | 
			
		||||
  Otherwise we must ensure that we skip an entire UTF-8 character if we are in
 | 
			
		||||
  UTF-8 mode. */
 | 
			
		||||
 | 
			
		||||
  if (rc == PCRE_ERROR_NOMATCH)
 | 
			
		||||
    {
 | 
			
		||||
    if (options == 0) break;                    /* All matches found */
 | 
			
		||||
    ovector[1] = start_offset + 1;              /* Advance one byte */
 | 
			
		||||
    if (crlf_is_newline &&                      /* If CRLF is newline & */
 | 
			
		||||
        start_offset < subject_length - 1 &&    /* we are at CRLF, */
 | 
			
		||||
        subject[start_offset] == '\r' &&
 | 
			
		||||
        subject[start_offset + 1] == '\n')
 | 
			
		||||
      ovector[1] += 1;                          /* Advance by one more. */
 | 
			
		||||
    else if (utf8)                              /* Otherwise, ensure we */
 | 
			
		||||
      {                                         /* advance a whole UTF-8 */
 | 
			
		||||
      while (ovector[1] < subject_length)       /* character. */
 | 
			
		||||
        {
 | 
			
		||||
        if ((subject[ovector[1]] & 0xc0) != 0x80) break;
 | 
			
		||||
        ovector[1] += 1;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    continue;    /* Go round the loop again */
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  /* Other matching errors are not recoverable. */
 | 
			
		||||
 | 
			
		||||
  if (rc < 0)
 | 
			
		||||
    {
 | 
			
		||||
    printf("Matching error %d\n", rc);
 | 
			
		||||
    pcre_free(re);    /* Release memory used for the compiled pattern */
 | 
			
		||||
    return 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  /* Match succeded */
 | 
			
		||||
 | 
			
		||||
  printf("\nMatch succeeded again at offset %d\n", ovector[0]);
 | 
			
		||||
 | 
			
		||||
  /* The match succeeded, but the output vector wasn't big enough. */
 | 
			
		||||
 | 
			
		||||
  if (rc == 0)
 | 
			
		||||
    {
 | 
			
		||||
    rc = OVECCOUNT/3;
 | 
			
		||||
    printf("ovector only has room for %d captured substrings\n", rc - 1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  /* As before, show substrings stored in the output vector by number, and then
 | 
			
		||||
  also any named substrings. */
 | 
			
		||||
 | 
			
		||||
  for (i = 0; i < rc; i++)
 | 
			
		||||
    {
 | 
			
		||||
    char *substring_start = subject + ovector[2*i];
 | 
			
		||||
    int substring_length = ovector[2*i+1] - ovector[2*i];
 | 
			
		||||
    printf("%2d: %.*s\n", i, substring_length, substring_start);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  if (namecount <= 0) printf("No named substrings\n"); else
 | 
			
		||||
    {
 | 
			
		||||
    unsigned char *tabptr = name_table;
 | 
			
		||||
    printf("Named substrings\n");
 | 
			
		||||
    for (i = 0; i < namecount; i++)
 | 
			
		||||
      {
 | 
			
		||||
      int n = (tabptr[0] << 8) | tabptr[1];
 | 
			
		||||
      printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
 | 
			
		||||
        ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
 | 
			
		||||
      tabptr += name_entry_size;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }      /* End of loop to find second and subsequent matches */
 | 
			
		||||
 | 
			
		||||
printf("\n");
 | 
			
		||||
pcre_free(re);       /* Release memory used for the compiled pattern */
 | 
			
		||||
return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* End of pcredemo.c */
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										759
									
								
								tools/pcre/doc/html/pcregrep.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										759
									
								
								tools/pcre/doc/html/pcregrep.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,759 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcregrep specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcregrep man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<ul>
 | 
			
		||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
 | 
			
		||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
 | 
			
		||||
<li><a name="TOC3" href="#SEC3">SUPPORT FOR COMPRESSED FILES</a>
 | 
			
		||||
<li><a name="TOC4" href="#SEC4">BINARY FILES</a>
 | 
			
		||||
<li><a name="TOC5" href="#SEC5">OPTIONS</a>
 | 
			
		||||
<li><a name="TOC6" href="#SEC6">ENVIRONMENT VARIABLES</a>
 | 
			
		||||
<li><a name="TOC7" href="#SEC7">NEWLINES</a>
 | 
			
		||||
<li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
 | 
			
		||||
<li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
 | 
			
		||||
<li><a name="TOC10" href="#SEC10">MATCHING ERRORS</a>
 | 
			
		||||
<li><a name="TOC11" href="#SEC11">DIAGNOSTICS</a>
 | 
			
		||||
<li><a name="TOC12" href="#SEC12">SEE ALSO</a>
 | 
			
		||||
<li><a name="TOC13" href="#SEC13">AUTHOR</a>
 | 
			
		||||
<li><a name="TOC14" href="#SEC14">REVISION</a>
 | 
			
		||||
</ul>
 | 
			
		||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcregrep [options] [long options] [pattern] [path1 path2 ...]</b>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcregrep</b> searches files for character patterns, in the same way as other
 | 
			
		||||
grep commands do, but it uses the PCRE regular expression library to support
 | 
			
		||||
patterns that are compatible with the regular expressions of Perl 5. See
 | 
			
		||||
<a href="pcresyntax.html"><b>pcresyntax</b>(3)</a>
 | 
			
		||||
for a quick-reference summary of pattern syntax, or
 | 
			
		||||
<a href="pcrepattern.html"><b>pcrepattern</b>(3)</a>
 | 
			
		||||
for a full description of the syntax and semantics of the regular expressions
 | 
			
		||||
that PCRE supports.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Patterns, whether supplied on the command line or in a separate file, are given
 | 
			
		||||
without delimiters. For example:
 | 
			
		||||
<pre>
 | 
			
		||||
  pcregrep Thursday /etc/motd
 | 
			
		||||
</pre>
 | 
			
		||||
If you attempt to use delimiters (for example, by surrounding a pattern with
 | 
			
		||||
slashes, as is common in Perl scripts), they are interpreted as part of the
 | 
			
		||||
pattern. Quotes can of course be used to delimit patterns on the command line
 | 
			
		||||
because they are interpreted by the shell, and indeed quotes are required if a
 | 
			
		||||
pattern contains white space or shell metacharacters.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The first argument that follows any option settings is treated as the single
 | 
			
		||||
pattern to be matched when neither <b>-e</b> nor <b>-f</b> is present.
 | 
			
		||||
Conversely, when one or both of these options are used to specify patterns, all
 | 
			
		||||
arguments are treated as path names. At least one of <b>-e</b>, <b>-f</b>, or an
 | 
			
		||||
argument pattern must be provided.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If no files are specified, <b>pcregrep</b> reads the standard input. The
 | 
			
		||||
standard input can also be referenced by a name consisting of a single hyphen.
 | 
			
		||||
For example:
 | 
			
		||||
<pre>
 | 
			
		||||
  pcregrep some-pattern /file1 - /file3
 | 
			
		||||
</pre>
 | 
			
		||||
By default, each line that matches a pattern is copied to the standard
 | 
			
		||||
output, and if there is more than one file, the file name is output at the
 | 
			
		||||
start of each line, followed by a colon. However, there are options that can
 | 
			
		||||
change how <b>pcregrep</b> behaves. In particular, the <b>-M</b> option makes it
 | 
			
		||||
possible to search for patterns that span line boundaries. What defines a line
 | 
			
		||||
boundary is controlled by the <b>-N</b> (<b>--newline</b>) option.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The amount of memory used for buffering files that are being scanned is
 | 
			
		||||
controlled by a parameter that can be set by the <b>--buffer-size</b> option.
 | 
			
		||||
The default value for this parameter is specified when <b>pcregrep</b> is built,
 | 
			
		||||
with the default default being 20K. A block of memory three times this size is
 | 
			
		||||
used (to allow for buffering "before" and "after" lines). An error occurs if a
 | 
			
		||||
line overflows the buffer.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
 | 
			
		||||
BUFSIZ is defined in <b><stdio.h></b>. When there is more than one pattern
 | 
			
		||||
(specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to
 | 
			
		||||
each line in the order in which they are defined, except that all the <b>-e</b>
 | 
			
		||||
patterns are tried before the <b>-f</b> patterns.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
By default, as soon as one pattern matches a line, no further patterns are
 | 
			
		||||
considered. However, if <b>--colour</b> (or <b>--color</b>) is used to colour the
 | 
			
		||||
matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
 | 
			
		||||
<b>--line-offsets</b> is used to output only the part of the line that matched
 | 
			
		||||
(either shown literally, or as an offset), scanning resumes immediately
 | 
			
		||||
following the match, so that further matches on the same line can be found. If
 | 
			
		||||
there are multiple patterns, they are all tried on the remainder of the line,
 | 
			
		||||
but patterns that follow the one that matched are not tried on the earlier part
 | 
			
		||||
of the line.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
This behaviour means that the order in which multiple patterns are specified
 | 
			
		||||
can affect the output when one of the above options is used. This is no longer
 | 
			
		||||
the same behaviour as GNU grep, which now manages to display earlier matches
 | 
			
		||||
for later patterns (as long as there is no overlap).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Patterns that can match an empty string are accepted, but empty string
 | 
			
		||||
matches are never recognized. An example is the pattern "(super)?(man)?", in
 | 
			
		||||
which all components are optional. This pattern finds all occurrences of both
 | 
			
		||||
"super" and "man"; the output differs from matching with "super|man" when only
 | 
			
		||||
the matching substrings are being shown.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variable is set,
 | 
			
		||||
<b>pcregrep</b> uses the value to set a locale when calling the PCRE library.
 | 
			
		||||
The <b>--locale</b> option can be used to override this.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC3" href="#TOC1">SUPPORT FOR COMPRESSED FILES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
It is possible to compile <b>pcregrep</b> so that it uses <b>libz</b> or
 | 
			
		||||
<b>libbz2</b> to read files whose names end in <b>.gz</b> or <b>.bz2</b>,
 | 
			
		||||
respectively. You can find out whether your binary has support for one or both
 | 
			
		||||
of these file types by running it with the <b>--help</b> option. If the
 | 
			
		||||
appropriate support is not present, files are treated as plain text. The
 | 
			
		||||
standard input is always so treated.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC4" href="#TOC1">BINARY FILES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
By default, a file that contains a binary zero byte within the first 1024 bytes
 | 
			
		||||
is identified as a binary file, and is processed specially. (GNU grep also
 | 
			
		||||
identifies binary files in this manner.) See the <b>--binary-files</b> option
 | 
			
		||||
for a means of changing the way binary files are handled.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC5" href="#TOC1">OPTIONS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The order in which some of the options appear can affect the output. For
 | 
			
		||||
example, both the <b>-h</b> and <b>-l</b> options affect the printing of file
 | 
			
		||||
names. Whichever comes later in the command line will be the one that takes
 | 
			
		||||
effect. Similarly, except where noted below, if an option is given twice, the
 | 
			
		||||
later setting is used. Numerical values for options may be followed by K or M,
 | 
			
		||||
to signify multiplication by 1024 or 1024*1024 respectively.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--</b>
 | 
			
		||||
This terminates the list of options. It is useful if the next item on the
 | 
			
		||||
command line starts with a hyphen but is not an option. This allows for the
 | 
			
		||||
processing of patterns and filenames that start with hyphens.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-A</b> <i>number</i>, <b>--after-context=</b><i>number</i>
 | 
			
		||||
Output <i>number</i> lines of context after each matching line. If filenames
 | 
			
		||||
and/or line numbers are being output, a hyphen separator is used instead of a
 | 
			
		||||
colon for the context lines. A line containing "--" is output between each
 | 
			
		||||
group of lines, unless they are in fact contiguous in the input file. The value
 | 
			
		||||
of <i>number</i> is expected to be relatively small. However, <b>pcregrep</b>
 | 
			
		||||
guarantees to have up to 8K of following text available for context output.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-a</b>, <b>--text</b>
 | 
			
		||||
Treat binary files as text. This is equivalent to
 | 
			
		||||
<b>--binary-files</b>=<i>text</i>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
 | 
			
		||||
Output <i>number</i> lines of context before each matching line. If filenames
 | 
			
		||||
and/or line numbers are being output, a hyphen separator is used instead of a
 | 
			
		||||
colon for the context lines. A line containing "--" is output between each
 | 
			
		||||
group of lines, unless they are in fact contiguous in the input file. The value
 | 
			
		||||
of <i>number</i> is expected to be relatively small. However, <b>pcregrep</b>
 | 
			
		||||
guarantees to have up to 8K of preceding text available for context output.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--binary-files=</b><i>word</i>
 | 
			
		||||
Specify how binary files are to be processed. If the word is "binary" (the
 | 
			
		||||
default), pattern matching is performed on binary files, but the only output is
 | 
			
		||||
"Binary file <name> matches" when a match succeeds. If the word is "text",
 | 
			
		||||
which is equivalent to the <b>-a</b> or <b>--text</b> option, binary files are
 | 
			
		||||
processed in the same way as any other file. In this case, when a match
 | 
			
		||||
succeeds, the output may be binary garbage, which can have nasty effects if
 | 
			
		||||
sent to a terminal. If the word is "without-match", which is equivalent to the
 | 
			
		||||
<b>-I</b> option, binary files are not processed at all; they are assumed not to
 | 
			
		||||
be of interest.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--buffer-size=</b><i>number</i>
 | 
			
		||||
Set the parameter that controls how much memory is used for buffering files
 | 
			
		||||
that are being scanned.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-C</b> <i>number</i>, <b>--context=</b><i>number</i>
 | 
			
		||||
Output <i>number</i> lines of context both before and after each matching line.
 | 
			
		||||
This is equivalent to setting both <b>-A</b> and <b>-B</b> to the same value.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-c</b>, <b>--count</b>
 | 
			
		||||
Do not output individual lines from the files that are being scanned; instead
 | 
			
		||||
output the number of lines that would otherwise have been shown. If no lines
 | 
			
		||||
are selected, the number zero is output. If several files are are being
 | 
			
		||||
scanned, a count is output for each of them. However, if the
 | 
			
		||||
<b>--files-with-matches</b> option is also used, only those files whose counts
 | 
			
		||||
are greater than zero are listed. When <b>-c</b> is used, the <b>-A</b>,
 | 
			
		||||
<b>-B</b>, and <b>-C</b> options are ignored.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--colour</b>, <b>--color</b>
 | 
			
		||||
If this option is given without any data, it is equivalent to "--colour=auto".
 | 
			
		||||
If data is required, it must be given in the same shell item, separated by an
 | 
			
		||||
equals sign.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--colour=</b><i>value</i>, <b>--color=</b><i>value</i>
 | 
			
		||||
This option specifies under what circumstances the parts of a line that matched
 | 
			
		||||
a pattern should be coloured in the output. By default, the output is not
 | 
			
		||||
coloured. The value (which is optional, see above) may be "never", "always", or
 | 
			
		||||
"auto". In the latter case, colouring happens only if the standard output is
 | 
			
		||||
connected to a terminal. More resources are used when colouring is enabled,
 | 
			
		||||
because <b>pcregrep</b> has to search for all possible matches in a line, not
 | 
			
		||||
just one, in order to colour them all.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
The colour that is used can be specified by setting the environment variable
 | 
			
		||||
PCREGREP_COLOUR or PCREGREP_COLOR. The value of this variable should be a
 | 
			
		||||
string of two numbers, separated by a semicolon. They are copied directly into
 | 
			
		||||
the control string for setting colour on a terminal, so it is your
 | 
			
		||||
responsibility to ensure that they make sense. If neither of the environment
 | 
			
		||||
variables is set, the default is "1;31", which gives red.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
 | 
			
		||||
If an input path is not a regular file or a directory, "action" specifies how
 | 
			
		||||
it is to be processed. Valid values are "read" (the default) or "skip"
 | 
			
		||||
(silently skip the path).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-d</b> <i>action</i>, <b>--directories=</b><i>action</i>
 | 
			
		||||
If an input path is a directory, "action" specifies how it is to be processed.
 | 
			
		||||
Valid values are "read" (the default in non-Windows environments, for
 | 
			
		||||
compatibility with GNU grep), "recurse" (equivalent to the <b>-r</b> option), or
 | 
			
		||||
"skip" (silently skip the path, the default in Windows environments). In the
 | 
			
		||||
"read" case, directories are read as if they were ordinary files. In some
 | 
			
		||||
operating systems the effect of reading a directory like this is an immediate
 | 
			
		||||
end-of-file; in others it may provoke an error.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-e</b> <i>pattern</i>, <b>--regex=</b><i>pattern</i>, <b>--regexp=</b><i>pattern</i>
 | 
			
		||||
Specify a pattern to be matched. This option can be used multiple times in
 | 
			
		||||
order to specify several patterns. It can also be used as a way of specifying a
 | 
			
		||||
single pattern that starts with a hyphen. When <b>-e</b> is used, no argument
 | 
			
		||||
pattern is taken from the command line; all arguments are treated as file
 | 
			
		||||
names. There is no limit to the number of patterns. They are applied to each
 | 
			
		||||
line in the order in which they are defined until one matches.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
If <b>-f</b> is used with <b>-e</b>, the command line patterns are matched first,
 | 
			
		||||
followed by the patterns from the file(s), independent of the order in which
 | 
			
		||||
these options are specified. Note that multiple use of <b>-e</b> is not the same
 | 
			
		||||
as a single pattern with alternatives. For example, X|Y finds the first
 | 
			
		||||
character in a line that is X or Y, whereas if the two patterns are given
 | 
			
		||||
separately, with X first, <b>pcregrep</b> finds X if it is present, even if it
 | 
			
		||||
follows Y in the line. It finds Y only if there is no X in the line. This
 | 
			
		||||
matters only if you are using <b>-o</b> or <b>--colo(u)r</b> to show the part(s)
 | 
			
		||||
of the line that matched.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--exclude</b>=<i>pattern</i>
 | 
			
		||||
Files (but not directories) whose names match the pattern are skipped without
 | 
			
		||||
being processed. This applies to all files, whether listed on the command line,
 | 
			
		||||
obtained from <b>--file-list</b>, or by scanning a directory. The pattern is a
 | 
			
		||||
PCRE regular expression, and is matched against the final component of the file
 | 
			
		||||
name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not
 | 
			
		||||
apply to this pattern. The option may be given any number of times in order to
 | 
			
		||||
specify multiple patterns. If a file name matches both an <b>--include</b>
 | 
			
		||||
and an <b>--exclude</b> pattern, it is excluded. There is no short form for this
 | 
			
		||||
option.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--exclude-from=</b><i>filename</i>
 | 
			
		||||
Treat each non-empty line of the file as the data for an <b>--exclude</b>
 | 
			
		||||
option. What constitutes a newline when reading the file is the operating
 | 
			
		||||
system's default. The <b>--newline</b> option has no effect on this option. This
 | 
			
		||||
option may be given more than once in order to specify a number of files to
 | 
			
		||||
read.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--exclude-dir</b>=<i>pattern</i>
 | 
			
		||||
Directories whose names match the pattern are skipped without being processed,
 | 
			
		||||
whatever the setting of the <b>--recursive</b> option. This applies to all
 | 
			
		||||
directories, whether listed on the command line, obtained from
 | 
			
		||||
<b>--file-list</b>, or by scanning a parent directory. The pattern is a PCRE
 | 
			
		||||
regular expression, and is matched against the final component of the directory
 | 
			
		||||
name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not
 | 
			
		||||
apply to this pattern. The option may be given any number of times in order to
 | 
			
		||||
specify more than one pattern. If a directory matches both <b>--include-dir</b>
 | 
			
		||||
and <b>--exclude-dir</b>, it is excluded. There is no short form for this
 | 
			
		||||
option.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-F</b>, <b>--fixed-strings</b>
 | 
			
		||||
Interpret each data-matching pattern as a list of fixed strings, separated by
 | 
			
		||||
newlines, instead of as a regular expression. What constitutes a newline for
 | 
			
		||||
this purpose is controlled by the <b>--newline</b> option. The <b>-w</b> (match
 | 
			
		||||
as a word) and <b>-x</b> (match whole line) options can be used with <b>-F</b>.
 | 
			
		||||
They apply to each of the fixed strings. A line is selected if any of the fixed
 | 
			
		||||
strings are found in it (subject to <b>-w</b> or <b>-x</b>, if present). This
 | 
			
		||||
option applies only to the patterns that are matched against the contents of
 | 
			
		||||
files; it does not apply to patterns specified by any of the <b>--include</b> or
 | 
			
		||||
<b>--exclude</b> options.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
 | 
			
		||||
Read patterns from the file, one per line, and match them against
 | 
			
		||||
each line of input. What constitutes a newline when reading the file is the
 | 
			
		||||
operating system's default. The <b>--newline</b> option has no effect on this
 | 
			
		||||
option. Trailing white space is removed from each line, and blank lines are
 | 
			
		||||
ignored. An empty file contains no patterns and therefore matches nothing. See
 | 
			
		||||
also the comments about multiple patterns versus a single pattern with
 | 
			
		||||
alternatives in the description of <b>-e</b> above.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
If this option is given more than once, all the specified files are
 | 
			
		||||
read. A data line is output if any of the patterns match it. A filename can
 | 
			
		||||
be given as "-" to refer to the standard input. When <b>-f</b> is used, patterns
 | 
			
		||||
specified on the command line using <b>-e</b> may also be present; they are
 | 
			
		||||
tested before the file's patterns. However, no other pattern is taken from the
 | 
			
		||||
command line; all arguments are treated as the names of paths to be searched.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--file-list</b>=<i>filename</i>
 | 
			
		||||
Read a list of files and/or directories that are to be scanned from the given
 | 
			
		||||
file, one per line. Trailing white space is removed from each line, and blank
 | 
			
		||||
lines are ignored. These paths are processed before any that are listed on the
 | 
			
		||||
command line. The filename can be given as "-" to refer to the standard input.
 | 
			
		||||
If <b>--file</b> and <b>--file-list</b> are both specified as "-", patterns are
 | 
			
		||||
read first. This is useful only when the standard input is a terminal, from
 | 
			
		||||
which further lines (the list of files) can be read after an end-of-file
 | 
			
		||||
indication. If this option is given more than once, all the specified files are
 | 
			
		||||
read.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--file-offsets</b>
 | 
			
		||||
Instead of showing lines or parts of lines that match, show each match as an
 | 
			
		||||
offset from the start of the file and a length, separated by a comma. In this
 | 
			
		||||
mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
 | 
			
		||||
options are ignored. If there is more than one match in a line, each of them is
 | 
			
		||||
shown separately. This option is mutually exclusive with <b>--line-offsets</b>
 | 
			
		||||
and <b>--only-matching</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-H</b>, <b>--with-filename</b>
 | 
			
		||||
Force the inclusion of the filename at the start of output lines when searching
 | 
			
		||||
a single file. By default, the filename is not shown in this case. For matching
 | 
			
		||||
lines, the filename is followed by a colon; for context lines, a hyphen
 | 
			
		||||
separator is used. If a line number is also being output, it follows the file
 | 
			
		||||
name.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-h</b>, <b>--no-filename</b>
 | 
			
		||||
Suppress the output filenames when searching multiple files. By default,
 | 
			
		||||
filenames are shown when multiple files are searched. For matching lines, the
 | 
			
		||||
filename is followed by a colon; for context lines, a hyphen separator is used.
 | 
			
		||||
If a line number is also being output, it follows the file name.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--help</b>
 | 
			
		||||
Output a help message, giving brief details of the command options and file
 | 
			
		||||
type support, and then exit. Anything else on the command line is
 | 
			
		||||
ignored.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-I</b>
 | 
			
		||||
Treat binary files as never matching. This is equivalent to
 | 
			
		||||
<b>--binary-files</b>=<i>without-match</i>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-i</b>, <b>--ignore-case</b>
 | 
			
		||||
Ignore upper/lower case distinctions during comparisons.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--include</b>=<i>pattern</i>
 | 
			
		||||
If any <b>--include</b> patterns are specified, the only files that are
 | 
			
		||||
processed are those that match one of the patterns (and do not match an
 | 
			
		||||
<b>--exclude</b> pattern). This option does not affect directories, but it
 | 
			
		||||
applies to all files, whether listed on the command line, obtained from
 | 
			
		||||
<b>--file-list</b>, or by scanning a directory. The pattern is a PCRE regular
 | 
			
		||||
expression, and is matched against the final component of the file name, not
 | 
			
		||||
the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not apply to
 | 
			
		||||
this pattern. The option may be given any number of times. If a file name
 | 
			
		||||
matches both an <b>--include</b> and an <b>--exclude</b> pattern, it is excluded.
 | 
			
		||||
There is no short form for this option.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--include-from=</b><i>filename</i>
 | 
			
		||||
Treat each non-empty line of the file as the data for an <b>--include</b>
 | 
			
		||||
option. What constitutes a newline for this purpose is the operating system's
 | 
			
		||||
default. The <b>--newline</b> option has no effect on this option. This option
 | 
			
		||||
may be given any number of times; all the files are read.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--include-dir</b>=<i>pattern</i>
 | 
			
		||||
If any <b>--include-dir</b> patterns are specified, the only directories that
 | 
			
		||||
are processed are those that match one of the patterns (and do not match an
 | 
			
		||||
<b>--exclude-dir</b> pattern). This applies to all directories, whether listed
 | 
			
		||||
on the command line, obtained from <b>--file-list</b>, or by scanning a parent
 | 
			
		||||
directory. The pattern is a PCRE regular expression, and is matched against the
 | 
			
		||||
final component of the directory name, not the entire path. The <b>-F</b>,
 | 
			
		||||
<b>-w</b>, and <b>-x</b> options do not apply to this pattern. The option may be
 | 
			
		||||
given any number of times. If a directory matches both <b>--include-dir</b> and
 | 
			
		||||
<b>--exclude-dir</b>, it is excluded. There is no short form for this option.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-L</b>, <b>--files-without-match</b>
 | 
			
		||||
Instead of outputting lines from the files, just output the names of the files
 | 
			
		||||
that do not contain any lines that would have been output. Each file name is
 | 
			
		||||
output once, on a separate line.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-l</b>, <b>--files-with-matches</b>
 | 
			
		||||
Instead of outputting lines from the files, just output the names of the files
 | 
			
		||||
containing lines that would have been output. Each file name is output
 | 
			
		||||
once, on a separate line. Searching normally stops as soon as a matching line
 | 
			
		||||
is found in a file. However, if the <b>-c</b> (count) option is also used,
 | 
			
		||||
matching continues in order to obtain the correct count, and those files that
 | 
			
		||||
have at least one match are listed along with their counts. Using this option
 | 
			
		||||
with <b>-c</b> is a way of suppressing the listing of files with no matches.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--label</b>=<i>name</i>
 | 
			
		||||
This option supplies a name to be used for the standard input when file names
 | 
			
		||||
are being output. If not supplied, "(standard input)" is used. There is no
 | 
			
		||||
short form for this option.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--line-buffered</b>
 | 
			
		||||
When this option is given, input is read and processed line by line, and the
 | 
			
		||||
output is flushed after each write. By default, input is read in large chunks,
 | 
			
		||||
unless <b>pcregrep</b> can determine that it is reading from a terminal (which
 | 
			
		||||
is currently possible only in Unix-like environments). Output to terminal is
 | 
			
		||||
normally automatically flushed by the operating system. This option can be
 | 
			
		||||
useful when the input or output is attached to a pipe and you do not want
 | 
			
		||||
<b>pcregrep</b> to buffer up large amounts of data. However, its use will affect
 | 
			
		||||
performance, and the <b>-M</b> (multiline) option ceases to work.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--line-offsets</b>
 | 
			
		||||
Instead of showing lines or parts of lines that match, show each match as a
 | 
			
		||||
line number, the offset from the start of the line, and a length. The line
 | 
			
		||||
number is terminated by a colon (as usual; see the <b>-n</b> option), and the
 | 
			
		||||
offset and length are separated by a comma. In this mode, no context is shown.
 | 
			
		||||
That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is
 | 
			
		||||
more than one match in a line, each of them is shown separately. This option is
 | 
			
		||||
mutually exclusive with <b>--file-offsets</b> and <b>--only-matching</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--locale</b>=<i>locale-name</i>
 | 
			
		||||
This option specifies a locale to be used for pattern matching. It overrides
 | 
			
		||||
the value in the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variables. If no
 | 
			
		||||
locale is specified, the PCRE library's default (usually the "C" locale) is
 | 
			
		||||
used. There is no short form for this option.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--match-limit</b>=<i>number</i>
 | 
			
		||||
Processing some regular expression patterns can require a very large amount of
 | 
			
		||||
memory, leading in some cases to a program crash if not enough is available.
 | 
			
		||||
Other patterns may take a very long time to search for all possible matching
 | 
			
		||||
strings. The <b>pcre_exec()</b> function that is called by <b>pcregrep</b> to do
 | 
			
		||||
the matching has two parameters that can limit the resources that it uses.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
The <b>--match-limit</b> option provides a means of limiting resource usage
 | 
			
		||||
when processing patterns that are not going to match, but which have a very
 | 
			
		||||
large number of possibilities in their search trees. The classic example is a
 | 
			
		||||
pattern that uses nested unlimited repeats. Internally, PCRE uses a function
 | 
			
		||||
called <b>match()</b> which it calls repeatedly (sometimes recursively). The
 | 
			
		||||
limit set by <b>--match-limit</b> is imposed on the number of times this
 | 
			
		||||
function is called during a match, which has the effect of limiting the amount
 | 
			
		||||
of backtracking that can take place.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
The <b>--recursion-limit</b> option is similar to <b>--match-limit</b>, but
 | 
			
		||||
instead of limiting the total number of times that <b>match()</b> is called, it
 | 
			
		||||
limits the depth of recursive calls, which in turn limits the amount of memory
 | 
			
		||||
that can be used. The recursion depth is a smaller number than the total number
 | 
			
		||||
of calls, because not all calls to <b>match()</b> are recursive. This limit is
 | 
			
		||||
of use only if it is set smaller than <b>--match-limit</b>.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
There are no short forms for these options. The default settings are specified
 | 
			
		||||
when the PCRE library is compiled, with the default default being 10 million.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-M</b>, <b>--multiline</b>
 | 
			
		||||
Allow patterns to match more than one line. When this option is given, patterns
 | 
			
		||||
may usefully contain literal newline characters and internal occurrences of ^
 | 
			
		||||
and $ characters. The output for a successful match may consist of more than
 | 
			
		||||
one line, the last of which is the one in which the match ended. If the matched
 | 
			
		||||
string ends with a newline sequence the output ends at the end of that line.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
When this option is set, the PCRE library is called in "multiline" mode.
 | 
			
		||||
There is a limit to the number of lines that can be matched, imposed by the way
 | 
			
		||||
that <b>pcregrep</b> buffers the input file as it scans it. However,
 | 
			
		||||
<b>pcregrep</b> ensures that at least 8K characters or the rest of the document
 | 
			
		||||
(whichever is the shorter) are available for forward matching, and similarly
 | 
			
		||||
the previous 8K characters (or all the previous characters, if fewer than 8K)
 | 
			
		||||
are guaranteed to be available for lookbehind assertions. This option does not
 | 
			
		||||
work when input is read line by line (see \fP--line-buffered\fP.)
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i>
 | 
			
		||||
The PCRE library supports five different conventions for indicating
 | 
			
		||||
the ends of lines. They are the single-character sequences CR (carriage return)
 | 
			
		||||
and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
 | 
			
		||||
which recognizes any of the preceding three types, and an "any" convention, in
 | 
			
		||||
which any Unicode line ending sequence is assumed to end a line. The Unicode
 | 
			
		||||
sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
 | 
			
		||||
(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
 | 
			
		||||
PS (paragraph separator, U+2029).
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
When the PCRE library is built, a default line-ending sequence is specified.
 | 
			
		||||
This is normally the standard sequence for the operating system. Unless
 | 
			
		||||
otherwise specified by this option, <b>pcregrep</b> uses the library's default.
 | 
			
		||||
The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
 | 
			
		||||
makes it possible to use <b>pcregrep</b> to scan files that have come from other
 | 
			
		||||
environments without having to modify their line endings. If the data that is
 | 
			
		||||
being scanned does not agree with the convention set by this option,
 | 
			
		||||
<b>pcregrep</b> may behave in strange ways. Note that this option does not
 | 
			
		||||
apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
 | 
			
		||||
<b>--include-from</b> options, which are expected to use the operating system's
 | 
			
		||||
standard newline sequence.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-n</b>, <b>--line-number</b>
 | 
			
		||||
Precede each output line by its line number in the file, followed by a colon
 | 
			
		||||
for matching lines or a hyphen for context lines. If the filename is also being
 | 
			
		||||
output, it precedes the line number. This option is forced if
 | 
			
		||||
<b>--line-offsets</b> is used.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--no-jit</b>
 | 
			
		||||
If the PCRE library is built with support for just-in-time compiling (which
 | 
			
		||||
speeds up matching), <b>pcregrep</b> automatically makes use of this, unless it
 | 
			
		||||
was explicitly disabled at build time. This option can be used to disable the
 | 
			
		||||
use of JIT at run time. It is provided for testing and working round problems.
 | 
			
		||||
It should never be needed in normal use.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-o</b>, <b>--only-matching</b>
 | 
			
		||||
Show only the part of the line that matched a pattern instead of the whole
 | 
			
		||||
line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
 | 
			
		||||
<b>-C</b> options are ignored. If there is more than one match in a line, each
 | 
			
		||||
of them is shown separately. If <b>-o</b> is combined with <b>-v</b> (invert the
 | 
			
		||||
sense of the match to find non-matching lines), no output is generated, but the
 | 
			
		||||
return code is set appropriately. If the matched portion of the line is empty,
 | 
			
		||||
nothing is output unless the file name or line number are being printed, in
 | 
			
		||||
which case they are shown on an otherwise empty line. This option is mutually
 | 
			
		||||
exclusive with <b>--file-offsets</b> and <b>--line-offsets</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i>
 | 
			
		||||
Show only the part of the line that matched the capturing parentheses of the
 | 
			
		||||
given number. Up to 32 capturing parentheses are supported, and -o0 is
 | 
			
		||||
equivalent to <b>-o</b> without a number. Because these options can be given
 | 
			
		||||
without an argument (see above), if an argument is present, it must be given in
 | 
			
		||||
the same shell item, for example, -o3 or --only-matching=2. The comments given
 | 
			
		||||
for the non-argument case above also apply to this case. If the specified
 | 
			
		||||
capturing parentheses do not exist in the pattern, or were not set in the
 | 
			
		||||
match, nothing is output unless the file name or line number are being printed.
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
If this option is given multiple times, multiple substrings are output, in the
 | 
			
		||||
order the options are given. For example, -o3 -o1 -o3 causes the substrings
 | 
			
		||||
matched by capturing parentheses 3 and 1 and then 3 again to be output. By
 | 
			
		||||
default, there is no separator (but see the next option).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--om-separator</b>=<i>text</i>
 | 
			
		||||
Specify a separating string for multiple occurrences of <b>-o</b>. The default
 | 
			
		||||
is an empty string. Separating strings are never coloured.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-q</b>, <b>--quiet</b>
 | 
			
		||||
Work quietly, that is, display nothing except error messages. The exit
 | 
			
		||||
status indicates whether or not any matches were found.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-r</b>, <b>--recursive</b>
 | 
			
		||||
If any given path is a directory, recursively scan the files it contains,
 | 
			
		||||
taking note of any <b>--include</b> and <b>--exclude</b> settings. By default, a
 | 
			
		||||
directory is read as a normal file; in some operating systems this gives an
 | 
			
		||||
immediate end-of-file. This option is a shorthand for setting the <b>-d</b>
 | 
			
		||||
option to "recurse".
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>--recursion-limit</b>=<i>number</i>
 | 
			
		||||
See <b>--match-limit</b> above.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-s</b>, <b>--no-messages</b>
 | 
			
		||||
Suppress error messages about non-existent or unreadable files. Such files are
 | 
			
		||||
quietly skipped. However, the return code is still 2, even if matches were
 | 
			
		||||
found in other files.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-u</b>, <b>--utf-8</b>
 | 
			
		||||
Operate in UTF-8 mode. This option is available only if PCRE has been compiled
 | 
			
		||||
with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
 | 
			
		||||
<b>--include</b> options) and all subject lines that are scanned must be valid
 | 
			
		||||
strings of UTF-8 characters.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-V</b>, <b>--version</b>
 | 
			
		||||
Write the version numbers of <b>pcregrep</b> and the PCRE library to the
 | 
			
		||||
standard output and then exit. Anything else on the command line is
 | 
			
		||||
ignored.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-v</b>, <b>--invert-match</b>
 | 
			
		||||
Invert the sense of the match, so that lines which do <i>not</i> match any of
 | 
			
		||||
the patterns are the ones that are found.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
 | 
			
		||||
Force the patterns to match only whole words. This is equivalent to having \b
 | 
			
		||||
at the start and end of the pattern. This option applies only to the patterns
 | 
			
		||||
that are matched against the contents of files; it does not apply to patterns
 | 
			
		||||
specified by any of the <b>--include</b> or <b>--exclude</b> options.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
 | 
			
		||||
Force the patterns to be anchored (each must start matching at the beginning of
 | 
			
		||||
a line) and in addition, require them to match entire lines. This is equivalent
 | 
			
		||||
to having ^ and $ characters at the start and end of each alternative branch in
 | 
			
		||||
every pattern. This option applies only to the patterns that are matched
 | 
			
		||||
against the contents of files; it does not apply to patterns specified by any
 | 
			
		||||
of the <b>--include</b> or <b>--exclude</b> options.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC6" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
 | 
			
		||||
order, for a locale. The first one that is set is used. This can be overridden
 | 
			
		||||
by the <b>--locale</b> option. If no locale is set, the PCRE library's default
 | 
			
		||||
(usually the "C" locale) is used.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC7" href="#TOC1">NEWLINES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The <b>-N</b> (<b>--newline</b>) option allows <b>pcregrep</b> to scan files with
 | 
			
		||||
different newline conventions from the default. Any parts of the input files
 | 
			
		||||
that are written to the standard output are copied identically, with whatever
 | 
			
		||||
newline sequences they have in the input. However, the setting of this option
 | 
			
		||||
does not affect the interpretation of files specified by the <b>-f</b>,
 | 
			
		||||
<b>--exclude-from</b>, or <b>--include-from</b> options, which are assumed to use
 | 
			
		||||
the operating system's standard newline sequence, nor does it affect the way in
 | 
			
		||||
which <b>pcregrep</b> writes informational messages to the standard error and
 | 
			
		||||
output streams. For these it uses the string "\n" to indicate newlines,
 | 
			
		||||
relying on the C I/O library to convert this to an appropriate sequence.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC8" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Many of the short and long forms of <b>pcregrep</b>'s options are the same
 | 
			
		||||
as in the GNU <b>grep</b> program. Any long option of the form
 | 
			
		||||
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
 | 
			
		||||
(PCRE terminology). However, the <b>--file-list</b>, <b>--file-offsets</b>,
 | 
			
		||||
<b>--include-dir</b>, <b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>,
 | 
			
		||||
<b>-M</b>, <b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
 | 
			
		||||
<b>--recursion-limit</b>, <b>-u</b>, and <b>--utf-8</b> options are specific to
 | 
			
		||||
<b>pcregrep</b>, as is the use of the <b>--only-matching</b> option with a
 | 
			
		||||
capturing parentheses number.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Although most of the common options work the same way, a few are different in
 | 
			
		||||
<b>pcregrep</b>. For example, the <b>--include</b> option's argument is a glob
 | 
			
		||||
for GNU <b>grep</b>, but a regular expression for <b>pcregrep</b>. If both the
 | 
			
		||||
<b>-c</b> and <b>-l</b> options are given, GNU grep lists only file names,
 | 
			
		||||
without counts, but <b>pcregrep</b> gives the counts.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC9" href="#TOC1">OPTIONS WITH DATA</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
There are four different ways in which an option with data can be specified.
 | 
			
		||||
If a short form option is used, the data may follow immediately, or (with one
 | 
			
		||||
exception) in the next command line item. For example:
 | 
			
		||||
<pre>
 | 
			
		||||
  -f/some/file
 | 
			
		||||
  -f /some/file
 | 
			
		||||
</pre>
 | 
			
		||||
The exception is the <b>-o</b> option, which may appear with or without data.
 | 
			
		||||
Because of this, if data is present, it must follow immediately in the same
 | 
			
		||||
item, for example -o3.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If a long form option is used, the data may appear in the same command line
 | 
			
		||||
item, separated by an equals character, or (with two exceptions) it may appear
 | 
			
		||||
in the next command line item. For example:
 | 
			
		||||
<pre>
 | 
			
		||||
  --file=/some/file
 | 
			
		||||
  --file /some/file
 | 
			
		||||
</pre>
 | 
			
		||||
Note, however, that if you want to supply a file name beginning with ~ as data
 | 
			
		||||
in a shell command, and have the shell expand ~ to a home directory, you must
 | 
			
		||||
separate the file name from the option, because the shell does not treat ~
 | 
			
		||||
specially unless it is at the start of an item.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The exceptions to the above are the <b>--colour</b> (or <b>--color</b>) and
 | 
			
		||||
<b>--only-matching</b> options, for which the data is optional. If one of these
 | 
			
		||||
options does have data, it must be given in the first form, using an equals
 | 
			
		||||
character. Otherwise <b>pcregrep</b> will assume that it has no data.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC10" href="#TOC1">MATCHING ERRORS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
It is possible to supply a regular expression that takes a very long time to
 | 
			
		||||
fail to match certain lines. Such patterns normally involve nested indefinite
 | 
			
		||||
repeats, for example: (a+)*\d when matched against a line of a's with no final
 | 
			
		||||
digit. The PCRE matching function has a resource limit that causes it to abort
 | 
			
		||||
in these circumstances. If this happens, <b>pcregrep</b> outputs an error
 | 
			
		||||
message and the line that caused the problem to the standard error stream. If
 | 
			
		||||
there are more than 20 such errors, <b>pcregrep</b> gives up.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <b>--match-limit</b> option of <b>pcregrep</b> can be used to set the overall
 | 
			
		||||
resource limit; there is a second option called <b>--recursion-limit</b> that
 | 
			
		||||
sets a limit on the amount of memory (usually stack) that is used (see the
 | 
			
		||||
discussion of these options above).
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC11" href="#TOC1">DIAGNOSTICS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Exit status is 0 if any matches were found, 1 if no matches were found, and 2
 | 
			
		||||
for syntax errors, overlong lines, non-existent or inaccessible files (even if
 | 
			
		||||
matches were found in other files) or too many matching errors. Using the
 | 
			
		||||
<b>-s</b> option to suppress error messages about inaccessible files does not
 | 
			
		||||
affect the return code.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC12" href="#TOC1">SEE ALSO</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcrepattern</b>(3), <b>pcresyntax</b>(3), <b>pcretest</b>(1).
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC13" href="#TOC1">AUTHOR</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Philip Hazel
 | 
			
		||||
<br>
 | 
			
		||||
University Computing Service
 | 
			
		||||
<br>
 | 
			
		||||
Cambridge CB2 3QH, England.
 | 
			
		||||
<br>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 03 April 2014
 | 
			
		||||
<br>
 | 
			
		||||
Copyright © 1997-2014 University of Cambridge.
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										452
									
								
								tools/pcre/doc/html/pcrejit.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										452
									
								
								tools/pcre/doc/html/pcrejit.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,452 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcrejit specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcrejit man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<ul>
 | 
			
		||||
<li><a name="TOC1" href="#SEC1">PCRE JUST-IN-TIME COMPILER SUPPORT</a>
 | 
			
		||||
<li><a name="TOC2" href="#SEC2">8-BIT, 16-BIT AND 32-BIT SUPPORT</a>
 | 
			
		||||
<li><a name="TOC3" href="#SEC3">AVAILABILITY OF JIT SUPPORT</a>
 | 
			
		||||
<li><a name="TOC4" href="#SEC4">SIMPLE USE OF JIT</a>
 | 
			
		||||
<li><a name="TOC5" href="#SEC5">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a>
 | 
			
		||||
<li><a name="TOC6" href="#SEC6">RETURN VALUES FROM JIT EXECUTION</a>
 | 
			
		||||
<li><a name="TOC7" href="#SEC7">SAVING AND RESTORING COMPILED PATTERNS</a>
 | 
			
		||||
<li><a name="TOC8" href="#SEC8">CONTROLLING THE JIT STACK</a>
 | 
			
		||||
<li><a name="TOC9" href="#SEC9">JIT STACK FAQ</a>
 | 
			
		||||
<li><a name="TOC10" href="#SEC10">EXAMPLE CODE</a>
 | 
			
		||||
<li><a name="TOC11" href="#SEC11">JIT FAST PATH API</a>
 | 
			
		||||
<li><a name="TOC12" href="#SEC12">SEE ALSO</a>
 | 
			
		||||
<li><a name="TOC13" href="#SEC13">AUTHOR</a>
 | 
			
		||||
<li><a name="TOC14" href="#SEC14">REVISION</a>
 | 
			
		||||
</ul>
 | 
			
		||||
<br><a name="SEC1" href="#TOC1">PCRE JUST-IN-TIME COMPILER SUPPORT</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Just-in-time compiling is a heavyweight optimization that can greatly speed up
 | 
			
		||||
pattern matching. However, it comes at the cost of extra processing before the
 | 
			
		||||
match is performed. Therefore, it is of most benefit when the same pattern is
 | 
			
		||||
going to be matched many times. This does not necessarily mean many calls of a
 | 
			
		||||
matching function; if the pattern is not anchored, matching attempts may take
 | 
			
		||||
place many times at various positions in the subject, even for a single call.
 | 
			
		||||
Therefore, if the subject string is very long, it may still pay to use JIT for
 | 
			
		||||
one-off matches.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
JIT support applies only to the traditional Perl-compatible matching function.
 | 
			
		||||
It does not apply when the DFA matching function is being used. The code for
 | 
			
		||||
this support was written by Zoltan Herczeg.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC2" href="#TOC1">8-BIT, 16-BIT AND 32-BIT SUPPORT</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
JIT support is available for all of the 8-bit, 16-bit and 32-bit PCRE
 | 
			
		||||
libraries. To keep this documentation simple, only the 8-bit interface is
 | 
			
		||||
described in what follows. If you are using the 16-bit library, substitute the
 | 
			
		||||
16-bit functions and 16-bit structures (for example, <i>pcre16_jit_stack</i>
 | 
			
		||||
instead of <i>pcre_jit_stack</i>). If you are using the 32-bit library,
 | 
			
		||||
substitute the 32-bit functions and 32-bit structures (for example,
 | 
			
		||||
<i>pcre32_jit_stack</i> instead of <i>pcre_jit_stack</i>).
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC3" href="#TOC1">AVAILABILITY OF JIT SUPPORT</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
JIT support is an optional feature of PCRE. The "configure" option --enable-jit
 | 
			
		||||
(or equivalent CMake option) must be set when PCRE is built if you want to use
 | 
			
		||||
JIT. The support is limited to the following hardware platforms:
 | 
			
		||||
<pre>
 | 
			
		||||
  ARM v5, v7, and Thumb2
 | 
			
		||||
  Intel x86 32-bit and 64-bit
 | 
			
		||||
  MIPS 32-bit
 | 
			
		||||
  Power PC 32-bit and 64-bit
 | 
			
		||||
  SPARC 32-bit (experimental)
 | 
			
		||||
</pre>
 | 
			
		||||
If --enable-jit is set on an unsupported platform, compilation fails.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
A program that is linked with PCRE 8.20 or later can tell if JIT support is
 | 
			
		||||
available by calling <b>pcre_config()</b> with the PCRE_CONFIG_JIT option. The
 | 
			
		||||
result is 1 when JIT is available, and 0 otherwise. However, a simple program
 | 
			
		||||
does not need to check this in order to use JIT. The normal API is implemented
 | 
			
		||||
in a way that falls back to the interpretive code if JIT is not available. For
 | 
			
		||||
programs that need the best possible performance, there is also a "fast path"
 | 
			
		||||
API that is JIT-specific.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If your program may sometimes be linked with versions of PCRE that are older
 | 
			
		||||
than 8.20, but you want to use JIT when it is available, you can test
 | 
			
		||||
the values of PCRE_MAJOR and PCRE_MINOR, or the existence of a JIT macro such
 | 
			
		||||
as PCRE_CONFIG_JIT, for compile-time control of your code.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC4" href="#TOC1">SIMPLE USE OF JIT</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
You have to do two things to make use of the JIT support in the simplest way:
 | 
			
		||||
<pre>
 | 
			
		||||
  (1) Call <b>pcre_study()</b> with the PCRE_STUDY_JIT_COMPILE option for
 | 
			
		||||
      each compiled pattern, and pass the resulting <b>pcre_extra</b> block to
 | 
			
		||||
      <b>pcre_exec()</b>.
 | 
			
		||||
 | 
			
		||||
  (2) Use <b>pcre_free_study()</b> to free the <b>pcre_extra</b> block when it is
 | 
			
		||||
      no longer needed, instead of just freeing it yourself. This ensures that
 | 
			
		||||
      any JIT data is also freed.
 | 
			
		||||
</pre>
 | 
			
		||||
For a program that may be linked with pre-8.20 versions of PCRE, you can insert
 | 
			
		||||
<pre>
 | 
			
		||||
  #ifndef PCRE_STUDY_JIT_COMPILE
 | 
			
		||||
  #define PCRE_STUDY_JIT_COMPILE 0
 | 
			
		||||
  #endif
 | 
			
		||||
</pre>
 | 
			
		||||
so that no option is passed to <b>pcre_study()</b>, and then use something like
 | 
			
		||||
this to free the study data:
 | 
			
		||||
<pre>
 | 
			
		||||
  #ifdef PCRE_CONFIG_JIT
 | 
			
		||||
      pcre_free_study(study_ptr);
 | 
			
		||||
  #else
 | 
			
		||||
      pcre_free(study_ptr);
 | 
			
		||||
  #endif
 | 
			
		||||
</pre>
 | 
			
		||||
PCRE_STUDY_JIT_COMPILE requests the JIT compiler to generate code for complete
 | 
			
		||||
matches. If you want to run partial matches using the PCRE_PARTIAL_HARD or
 | 
			
		||||
PCRE_PARTIAL_SOFT options of <b>pcre_exec()</b>, you should set one or both of
 | 
			
		||||
the following options in addition to, or instead of, PCRE_STUDY_JIT_COMPILE
 | 
			
		||||
when you call <b>pcre_study()</b>:
 | 
			
		||||
<pre>
 | 
			
		||||
  PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
 | 
			
		||||
  PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
 | 
			
		||||
</pre>
 | 
			
		||||
The JIT compiler generates different optimized code for each of the three
 | 
			
		||||
modes (normal, soft partial, hard partial). When <b>pcre_exec()</b> is called,
 | 
			
		||||
the appropriate code is run if it is available. Otherwise, the pattern is
 | 
			
		||||
matched using interpretive code.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
In some circumstances you may need to call additional functions. These are
 | 
			
		||||
described in the section entitled
 | 
			
		||||
<a href="#stackcontrol">"Controlling the JIT stack"</a>
 | 
			
		||||
below.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If JIT support is not available, PCRE_STUDY_JIT_COMPILE etc. are ignored, and
 | 
			
		||||
no JIT data is created. Otherwise, the compiled pattern is passed to the JIT
 | 
			
		||||
compiler, which turns it into machine code that executes much faster than the
 | 
			
		||||
normal interpretive code. When <b>pcre_exec()</b> is passed a <b>pcre_extra</b>
 | 
			
		||||
block containing a pointer to JIT code of the appropriate mode (normal or
 | 
			
		||||
hard/soft partial), it obeys that code instead of running the interpreter. The
 | 
			
		||||
result is identical, but the compiled JIT code runs much faster.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There are some <b>pcre_exec()</b> options that are not supported for JIT
 | 
			
		||||
execution. There are also some pattern items that JIT cannot handle. Details
 | 
			
		||||
are given below. In both cases, execution automatically falls back to the
 | 
			
		||||
interpretive code. If you want to know whether JIT was actually used for a
 | 
			
		||||
particular match, you should arrange for a JIT callback function to be set up
 | 
			
		||||
as described in the section entitled
 | 
			
		||||
<a href="#stackcontrol">"Controlling the JIT stack"</a>
 | 
			
		||||
below, even if you do not need to supply a non-default JIT stack. Such a
 | 
			
		||||
callback function is called whenever JIT code is about to be obeyed. If the
 | 
			
		||||
execution options are not right for JIT execution, the callback function is not
 | 
			
		||||
obeyed.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If the JIT compiler finds an unsupported item, no JIT data is generated. You
 | 
			
		||||
can find out if JIT execution is available after studying a pattern by calling
 | 
			
		||||
<b>pcre_fullinfo()</b> with the PCRE_INFO_JIT option. A result of 1 means that
 | 
			
		||||
JIT compilation was successful. A result of 0 means that JIT support is not
 | 
			
		||||
available, or the pattern was not studied with PCRE_STUDY_JIT_COMPILE etc., or
 | 
			
		||||
the JIT compiler was not able to handle the pattern.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Once a pattern has been studied, with or without JIT, it can be used as many
 | 
			
		||||
times as you like for matching different subject strings.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC5" href="#TOC1">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The only <b>pcre_exec()</b> options that are supported for JIT execution are
 | 
			
		||||
PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK, PCRE_NO_UTF32_CHECK, PCRE_NOTBOL,
 | 
			
		||||
PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and
 | 
			
		||||
PCRE_PARTIAL_SOFT.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The only unsupported pattern items are \C (match a single data unit) when
 | 
			
		||||
running in a UTF mode, and a callout immediately before an assertion condition
 | 
			
		||||
in a conditional group.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC6" href="#TOC1">RETURN VALUES FROM JIT EXECUTION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
When a pattern is matched using JIT execution, the return values are the same
 | 
			
		||||
as those given by the interpretive <b>pcre_exec()</b> code, with the addition of
 | 
			
		||||
one new error code: PCRE_ERROR_JIT_STACKLIMIT. This means that the memory used
 | 
			
		||||
for the JIT stack was insufficient. See
 | 
			
		||||
<a href="#stackcontrol">"Controlling the JIT stack"</a>
 | 
			
		||||
below for a discussion of JIT stack usage. For compatibility with the
 | 
			
		||||
interpretive <b>pcre_exec()</b> code, no more than two-thirds of the
 | 
			
		||||
<i>ovector</i> argument is used for passing back captured substrings.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The error code PCRE_ERROR_MATCHLIMIT is returned by the JIT code if searching a
 | 
			
		||||
very large pattern tree goes on for too long, as it is in the same circumstance
 | 
			
		||||
when JIT is not used, but the details of exactly what is counted are not the
 | 
			
		||||
same. The PCRE_ERROR_RECURSIONLIMIT error code is never returned by JIT
 | 
			
		||||
execution.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC7" href="#TOC1">SAVING AND RESTORING COMPILED PATTERNS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The code that is generated by the JIT compiler is architecture-specific, and is
 | 
			
		||||
also position dependent. For those reasons it cannot be saved (in a file or
 | 
			
		||||
database) and restored later like the bytecode and other data of a compiled
 | 
			
		||||
pattern. Saving and restoring compiled patterns is not something many people
 | 
			
		||||
do. More detail about this facility is given in the
 | 
			
		||||
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
 | 
			
		||||
documentation. It should be possible to run <b>pcre_study()</b> on a saved and
 | 
			
		||||
restored pattern, and thereby recreate the JIT data, but because JIT
 | 
			
		||||
compilation uses significant resources, it is probably not worth doing this;
 | 
			
		||||
you might as well recompile the original pattern.
 | 
			
		||||
<a name="stackcontrol"></a></P>
 | 
			
		||||
<br><a name="SEC8" href="#TOC1">CONTROLLING THE JIT STACK</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
When the compiled JIT code runs, it needs a block of memory to use as a stack.
 | 
			
		||||
By default, it uses 32K on the machine stack. However, some large or
 | 
			
		||||
complicated patterns need more than this. The error PCRE_ERROR_JIT_STACKLIMIT
 | 
			
		||||
is given when there is not enough stack. Three functions are provided for
 | 
			
		||||
managing blocks of memory for use as JIT stacks. There is further discussion
 | 
			
		||||
about the use of JIT stacks in the section entitled
 | 
			
		||||
<a href="#stackcontrol">"JIT stack FAQ"</a>
 | 
			
		||||
below.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <b>pcre_jit_stack_alloc()</b> function creates a JIT stack. Its arguments
 | 
			
		||||
are a starting size and a maximum size, and it returns a pointer to an opaque
 | 
			
		||||
structure of type <b>pcre_jit_stack</b>, or NULL if there is an error. The
 | 
			
		||||
<b>pcre_jit_stack_free()</b> function can be used to free a stack that is no
 | 
			
		||||
longer needed. (For the technically minded: the address space is allocated by
 | 
			
		||||
mmap or VirtualAlloc.)
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
JIT uses far less memory for recursion than the interpretive code,
 | 
			
		||||
and a maximum stack size of 512K to 1M should be more than enough for any
 | 
			
		||||
pattern.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The <b>pcre_assign_jit_stack()</b> function specifies which stack JIT code
 | 
			
		||||
should use. Its arguments are as follows:
 | 
			
		||||
<pre>
 | 
			
		||||
  pcre_extra         *extra
 | 
			
		||||
  pcre_jit_callback  callback
 | 
			
		||||
  void               *data
 | 
			
		||||
</pre>
 | 
			
		||||
The <i>extra</i> argument must be the result of studying a pattern with
 | 
			
		||||
PCRE_STUDY_JIT_COMPILE etc. There are three cases for the values of the other
 | 
			
		||||
two options:
 | 
			
		||||
<pre>
 | 
			
		||||
  (1) If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32K block
 | 
			
		||||
      on the machine stack is used.
 | 
			
		||||
 | 
			
		||||
  (2) If <i>callback</i> is NULL and <i>data</i> is not NULL, <i>data</i> must be
 | 
			
		||||
      a valid JIT stack, the result of calling <b>pcre_jit_stack_alloc()</b>.
 | 
			
		||||
 | 
			
		||||
  (3) If <i>callback</i> is not NULL, it must point to a function that is
 | 
			
		||||
      called with <i>data</i> as an argument at the start of matching, in
 | 
			
		||||
      order to set up a JIT stack. If the return from the callback
 | 
			
		||||
      function is NULL, the internal 32K stack is used; otherwise the
 | 
			
		||||
      return value must be a valid JIT stack, the result of calling
 | 
			
		||||
      <b>pcre_jit_stack_alloc()</b>.
 | 
			
		||||
</pre>
 | 
			
		||||
A callback function is obeyed whenever JIT code is about to be run; it is not
 | 
			
		||||
obeyed when <b>pcre_exec()</b> is called with options that are incompatible for
 | 
			
		||||
JIT execution. A callback function can therefore be used to determine whether a
 | 
			
		||||
match operation was executed by JIT or by the interpreter.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
You may safely use the same JIT stack for more than one pattern (either by
 | 
			
		||||
assigning directly or by callback), as long as the patterns are all matched
 | 
			
		||||
sequentially in the same thread. In a multithread application, if you do not
 | 
			
		||||
specify a JIT stack, or if you assign or pass back NULL from a callback, that
 | 
			
		||||
is thread-safe, because each thread has its own machine stack. However, if you
 | 
			
		||||
assign or pass back a non-NULL JIT stack, this must be a different stack for
 | 
			
		||||
each thread so that the application is thread-safe.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Strictly speaking, even more is allowed. You can assign the same non-NULL stack
 | 
			
		||||
to any number of patterns as long as they are not used for matching by multiple
 | 
			
		||||
threads at the same time. For example, you can assign the same stack to all
 | 
			
		||||
compiled patterns, and use a global mutex in the callback to wait until the
 | 
			
		||||
stack is available for use. However, this is an inefficient solution, and not
 | 
			
		||||
recommended.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
This is a suggestion for how a multithreaded program that needs to set up
 | 
			
		||||
non-default JIT stacks might operate:
 | 
			
		||||
<pre>
 | 
			
		||||
  During thread initalization
 | 
			
		||||
    thread_local_var = pcre_jit_stack_alloc(...)
 | 
			
		||||
 | 
			
		||||
  During thread exit
 | 
			
		||||
    pcre_jit_stack_free(thread_local_var)
 | 
			
		||||
 | 
			
		||||
  Use a one-line callback function
 | 
			
		||||
    return thread_local_var
 | 
			
		||||
</pre>
 | 
			
		||||
All the functions described in this section do nothing if JIT is not available,
 | 
			
		||||
and <b>pcre_assign_jit_stack()</b> does nothing unless the <b>extra</b> argument
 | 
			
		||||
is non-NULL and points to a <b>pcre_extra</b> block that is the result of a
 | 
			
		||||
successful study with PCRE_STUDY_JIT_COMPILE etc.
 | 
			
		||||
<a name="stackfaq"></a></P>
 | 
			
		||||
<br><a name="SEC9" href="#TOC1">JIT STACK FAQ</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
(1) Why do we need JIT stacks?
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
PCRE (and JIT) is a recursive, depth-first engine, so it needs a stack where
 | 
			
		||||
the local data of the current node is pushed before checking its child nodes.
 | 
			
		||||
Allocating real machine stack on some platforms is difficult. For example, the
 | 
			
		||||
stack chain needs to be updated every time if we extend the stack on PowerPC.
 | 
			
		||||
Although it is possible, its updating time overhead decreases performance. So
 | 
			
		||||
we do the recursion in memory.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
(2) Why don't we simply allocate blocks of memory with <b>malloc()</b>?
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
Modern operating systems have a nice feature: they can reserve an address space
 | 
			
		||||
instead of allocating memory. We can safely allocate memory pages inside this
 | 
			
		||||
address space, so the stack could grow without moving memory data (this is
 | 
			
		||||
important because of pointers). Thus we can allocate 1M address space, and use
 | 
			
		||||
only a single memory page (usually 4K) if that is enough. However, we can still
 | 
			
		||||
grow up to 1M anytime if needed.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
(3) Who "owns" a JIT stack?
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
The owner of the stack is the user program, not the JIT studied pattern or
 | 
			
		||||
anything else. The user program must ensure that if a stack is used by
 | 
			
		||||
<b>pcre_exec()</b>, (that is, it is assigned to the pattern currently running),
 | 
			
		||||
that stack must not be used by any other threads (to avoid overwriting the same
 | 
			
		||||
memory area). The best practice for multithreaded programs is to allocate a
 | 
			
		||||
stack for each thread, and return this stack through the JIT callback function.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
(4) When should a JIT stack be freed?
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
You can free a JIT stack at any time, as long as it will not be used by
 | 
			
		||||
<b>pcre_exec()</b> again. When you assign the stack to a pattern, only a pointer
 | 
			
		||||
is set. There is no reference counting or any other magic. You can free the
 | 
			
		||||
patterns and stacks in any order, anytime. Just <i>do not</i> call
 | 
			
		||||
<b>pcre_exec()</b> with a pattern pointing to an already freed stack, as that
 | 
			
		||||
will cause SEGFAULT. (Also, do not free a stack currently used by
 | 
			
		||||
<b>pcre_exec()</b> in another thread). You can also replace the stack for a
 | 
			
		||||
pattern at any time. You can even free the previous stack before assigning a
 | 
			
		||||
replacement.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
(5) Should I allocate/free a stack every time before/after calling
 | 
			
		||||
<b>pcre_exec()</b>?
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
No, because this is too costly in terms of resources. However, you could
 | 
			
		||||
implement some clever idea which release the stack if it is not used in let's
 | 
			
		||||
say two minutes. The JIT callback can help to achieve this without keeping a
 | 
			
		||||
list of the currently JIT studied patterns.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
(6) OK, the stack is for long term memory allocation. But what happens if a
 | 
			
		||||
pattern causes stack overflow with a stack of 1M? Is that 1M kept until the
 | 
			
		||||
stack is freed?
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
Especially on embedded sytems, it might be a good idea to release memory
 | 
			
		||||
sometimes without freeing the stack. There is no API for this at the moment.
 | 
			
		||||
Probably a function call which returns with the currently allocated memory for
 | 
			
		||||
any stack and another which allows releasing memory (shrinking the stack) would
 | 
			
		||||
be a good idea if someone needs this.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
(7) This is too much of a headache. Isn't there any better solution for JIT
 | 
			
		||||
stack handling?
 | 
			
		||||
<br>
 | 
			
		||||
<br>
 | 
			
		||||
No, thanks to Windows. If POSIX threads were used everywhere, we could throw
 | 
			
		||||
out this complicated API.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC10" href="#TOC1">EXAMPLE CODE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
This is a single-threaded example that specifies a JIT stack without using a
 | 
			
		||||
callback.
 | 
			
		||||
<pre>
 | 
			
		||||
  int rc;
 | 
			
		||||
  int ovector[30];
 | 
			
		||||
  pcre *re;
 | 
			
		||||
  pcre_extra *extra;
 | 
			
		||||
  pcre_jit_stack *jit_stack;
 | 
			
		||||
 | 
			
		||||
  re = pcre_compile(pattern, 0, &error, &erroffset, NULL);
 | 
			
		||||
  /* Check for errors */
 | 
			
		||||
  extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
 | 
			
		||||
  jit_stack = pcre_jit_stack_alloc(32*1024, 512*1024);
 | 
			
		||||
  /* Check for error (NULL) */
 | 
			
		||||
  pcre_assign_jit_stack(extra, NULL, jit_stack);
 | 
			
		||||
  rc = pcre_exec(re, extra, subject, length, 0, 0, ovector, 30);
 | 
			
		||||
  /* Check results */
 | 
			
		||||
  pcre_free(re);
 | 
			
		||||
  pcre_free_study(extra);
 | 
			
		||||
  pcre_jit_stack_free(jit_stack);
 | 
			
		||||
 | 
			
		||||
</PRE>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC11" href="#TOC1">JIT FAST PATH API</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Because the API described above falls back to interpreted execution when JIT is
 | 
			
		||||
not available, it is convenient for programs that are written for general use
 | 
			
		||||
in many environments. However, calling JIT via <b>pcre_exec()</b> does have a
 | 
			
		||||
performance impact. Programs that are written for use where JIT is known to be
 | 
			
		||||
available, and which need the best possible performance, can instead use a
 | 
			
		||||
"fast path" API to call JIT execution directly instead of calling
 | 
			
		||||
<b>pcre_exec()</b> (obviously only for patterns that have been successfully
 | 
			
		||||
studied by JIT).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The fast path function is called <b>pcre_jit_exec()</b>, and it takes exactly
 | 
			
		||||
the same arguments as <b>pcre_exec()</b>, plus one additional argument that
 | 
			
		||||
must point to a JIT stack. The JIT stack arrangements described above do not
 | 
			
		||||
apply. The return values are the same as for <b>pcre_exec()</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
When you call <b>pcre_exec()</b>, as well as testing for invalid options, a
 | 
			
		||||
number of other sanity checks are performed on the arguments. For example, if
 | 
			
		||||
the subject pointer is NULL, or its length is negative, an immediate error is
 | 
			
		||||
given. Also, unless PCRE_NO_UTF[8|16|32] is set, a UTF subject string is tested
 | 
			
		||||
for validity. In the interests of speed, these checks do not happen on the JIT
 | 
			
		||||
fast path, and if invalid data is passed, the result is undefined.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Bypassing the sanity checks and the <b>pcre_exec()</b> wrapping can give
 | 
			
		||||
speedups of more than 10%.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC12" href="#TOC1">SEE ALSO</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
<b>pcreapi</b>(3)
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC13" href="#TOC1">AUTHOR</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Philip Hazel (FAQ by Zoltan Herczeg)
 | 
			
		||||
<br>
 | 
			
		||||
University Computing Service
 | 
			
		||||
<br>
 | 
			
		||||
Cambridge CB2 3QH, England.
 | 
			
		||||
<br>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 17 March 2013
 | 
			
		||||
<br>
 | 
			
		||||
Copyright © 1997-2013 University of Cambridge.
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										90
									
								
								tools/pcre/doc/html/pcrelimits.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								tools/pcre/doc/html/pcrelimits.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,90 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcrelimits specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcrelimits man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
SIZE AND OTHER LIMITATIONS
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
There are some size limitations in PCRE but it is hoped that they will never in
 | 
			
		||||
practice be relevant.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The maximum length of a compiled pattern is approximately 64K data units (bytes
 | 
			
		||||
for the 8-bit library, 16-bit units for the 16-bit library, and 32-bit units for
 | 
			
		||||
the 32-bit library) if PCRE is compiled with the default internal linkage size,
 | 
			
		||||
which is 2 bytes for the 8-bit and 16-bit libraries, and 4 bytes for the 32-bit
 | 
			
		||||
library. If you want to process regular expressions that are truly enormous,
 | 
			
		||||
you can compile PCRE with an internal linkage size of 3 or 4 (when building the
 | 
			
		||||
16-bit or 32-bit library, 3 is rounded up to 4). See the <b>README</b> file in
 | 
			
		||||
the source distribution and the
 | 
			
		||||
<a href="pcrebuild.html"><b>pcrebuild</b></a>
 | 
			
		||||
documentation for details. In these cases the limit is substantially larger.
 | 
			
		||||
However, the speed of execution is slower.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
All values in repeating quantifiers must be less than 65536.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is no limit to the number of parenthesized subpatterns, but there can be
 | 
			
		||||
no more than 65535 capturing subpatterns. There is, however, a limit to the
 | 
			
		||||
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
 | 
			
		||||
order to limit the amount of system stack used at compile time. The limit can
 | 
			
		||||
be specified when PCRE is built; the default is 250.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There is a limit to the number of forward references to subsequent subpatterns
 | 
			
		||||
of around 200,000. Repeated forward references with fixed upper limits, for
 | 
			
		||||
example, (?2){0,100} when subpattern number 2 is to the right, are included in
 | 
			
		||||
the count. There is no limit to the number of backward references.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The maximum length of name for a named subpattern is 32 characters, and the
 | 
			
		||||
maximum number of named subpatterns is 10000.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
 | 
			
		||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The maximum length of a subject string is the largest positive number that an
 | 
			
		||||
integer variable can hold. However, when using the traditional matching
 | 
			
		||||
function, PCRE uses recursion to handle subpatterns and indefinite repetition.
 | 
			
		||||
This means that the available stack space may limit the size of a subject
 | 
			
		||||
string that can be processed by certain patterns. For a discussion of stack
 | 
			
		||||
issues, see the
 | 
			
		||||
<a href="pcrestack.html"><b>pcrestack</b></a>
 | 
			
		||||
documentation.
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
AUTHOR
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
Philip Hazel
 | 
			
		||||
<br>
 | 
			
		||||
University Computing Service
 | 
			
		||||
<br>
 | 
			
		||||
Cambridge CB2 3QH, England.
 | 
			
		||||
<br>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
REVISION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 05 November 2013
 | 
			
		||||
<br>
 | 
			
		||||
Copyright © 1997-2013 University of Cambridge.
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										242
									
								
								tools/pcre/doc/html/pcrematching.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										242
									
								
								tools/pcre/doc/html/pcrematching.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,242 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcrematching specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcrematching man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<ul>
 | 
			
		||||
<li><a name="TOC1" href="#SEC1">PCRE MATCHING ALGORITHMS</a>
 | 
			
		||||
<li><a name="TOC2" href="#SEC2">REGULAR EXPRESSIONS AS TREES</a>
 | 
			
		||||
<li><a name="TOC3" href="#SEC3">THE STANDARD MATCHING ALGORITHM</a>
 | 
			
		||||
<li><a name="TOC4" href="#SEC4">THE ALTERNATIVE MATCHING ALGORITHM</a>
 | 
			
		||||
<li><a name="TOC5" href="#SEC5">ADVANTAGES OF THE ALTERNATIVE ALGORITHM</a>
 | 
			
		||||
<li><a name="TOC6" href="#SEC6">DISADVANTAGES OF THE ALTERNATIVE ALGORITHM</a>
 | 
			
		||||
<li><a name="TOC7" href="#SEC7">AUTHOR</a>
 | 
			
		||||
<li><a name="TOC8" href="#SEC8">REVISION</a>
 | 
			
		||||
</ul>
 | 
			
		||||
<br><a name="SEC1" href="#TOC1">PCRE MATCHING ALGORITHMS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
This document describes the two different algorithms that are available in PCRE
 | 
			
		||||
for matching a compiled regular expression against a given subject string. The
 | 
			
		||||
"standard" algorithm is the one provided by the <b>pcre_exec()</b>,
 | 
			
		||||
<b>pcre16_exec()</b> and <b>pcre32_exec()</b> functions. These work in the same
 | 
			
		||||
as as Perl's matching function, and provide a Perl-compatible matching operation.
 | 
			
		||||
The just-in-time (JIT) optimization that is described in the
 | 
			
		||||
<a href="pcrejit.html"><b>pcrejit</b></a>
 | 
			
		||||
documentation is compatible with these functions.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
An alternative algorithm is provided by the <b>pcre_dfa_exec()</b>,
 | 
			
		||||
<b>pcre16_dfa_exec()</b> and <b>pcre32_dfa_exec()</b> functions; they operate in
 | 
			
		||||
a different way, and are not Perl-compatible. This alternative has advantages
 | 
			
		||||
and disadvantages compared with the standard algorithm, and these are described
 | 
			
		||||
below.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
When there is only one possible way in which a given subject string can match a
 | 
			
		||||
pattern, the two algorithms give the same answer. A difference arises, however,
 | 
			
		||||
when there are multiple possibilities. For example, if the pattern
 | 
			
		||||
<pre>
 | 
			
		||||
  ^<.*>
 | 
			
		||||
</pre>
 | 
			
		||||
is matched against the string
 | 
			
		||||
<pre>
 | 
			
		||||
  <something> <something else> <something further>
 | 
			
		||||
</pre>
 | 
			
		||||
there are three possible answers. The standard algorithm finds only one of
 | 
			
		||||
them, whereas the alternative algorithm finds all three.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC2" href="#TOC1">REGULAR EXPRESSIONS AS TREES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The set of strings that are matched by a regular expression can be represented
 | 
			
		||||
as a tree structure. An unlimited repetition in the pattern makes the tree of
 | 
			
		||||
infinite size, but it is still a tree. Matching the pattern to a given subject
 | 
			
		||||
string (from a given starting point) can be thought of as a search of the tree.
 | 
			
		||||
There are two ways to search a tree: depth-first and breadth-first, and these
 | 
			
		||||
correspond to the two matching algorithms provided by PCRE.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC3" href="#TOC1">THE STANDARD MATCHING ALGORITHM</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
In the terminology of Jeffrey Friedl's book "Mastering Regular
 | 
			
		||||
Expressions", the standard algorithm is an "NFA algorithm". It conducts a
 | 
			
		||||
depth-first search of the pattern tree. That is, it proceeds along a single
 | 
			
		||||
path through the tree, checking that the subject matches what is required. When
 | 
			
		||||
there is a mismatch, the algorithm tries any alternatives at the current point,
 | 
			
		||||
and if they all fail, it backs up to the previous branch point in the tree, and
 | 
			
		||||
tries the next alternative branch at that level. This often involves backing up
 | 
			
		||||
(moving to the left) in the subject string as well. The order in which
 | 
			
		||||
repetition branches are tried is controlled by the greedy or ungreedy nature of
 | 
			
		||||
the quantifier.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If a leaf node is reached, a matching string has been found, and at that point
 | 
			
		||||
the algorithm stops. Thus, if there is more than one possible match, this
 | 
			
		||||
algorithm returns the first one that it finds. Whether this is the shortest,
 | 
			
		||||
the longest, or some intermediate length depends on the way the greedy and
 | 
			
		||||
ungreedy repetition quantifiers are specified in the pattern.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Because it ends up with a single path through the tree, it is relatively
 | 
			
		||||
straightforward for this algorithm to keep track of the substrings that are
 | 
			
		||||
matched by portions of the pattern in parentheses. This provides support for
 | 
			
		||||
capturing parentheses and back references.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC4" href="#TOC1">THE ALTERNATIVE MATCHING ALGORITHM</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
This algorithm conducts a breadth-first search of the tree. Starting from the
 | 
			
		||||
first matching point in the subject, it scans the subject string from left to
 | 
			
		||||
right, once, character by character, and as it does this, it remembers all the
 | 
			
		||||
paths through the tree that represent valid matches. In Friedl's terminology,
 | 
			
		||||
this is a kind of "DFA algorithm", though it is not implemented as a
 | 
			
		||||
traditional finite state machine (it keeps multiple states active
 | 
			
		||||
simultaneously).
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Although the general principle of this matching algorithm is that it scans the
 | 
			
		||||
subject string only once, without backtracking, there is one exception: when a
 | 
			
		||||
lookaround assertion is encountered, the characters following or preceding the
 | 
			
		||||
current point have to be independently inspected.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
The scan continues until either the end of the subject is reached, or there are
 | 
			
		||||
no more unterminated paths. At this point, terminated paths represent the
 | 
			
		||||
different matching possibilities (if there are none, the match has failed).
 | 
			
		||||
Thus, if there is more than one possible match, this algorithm finds all of
 | 
			
		||||
them, and in particular, it finds the longest. The matches are returned in
 | 
			
		||||
decreasing order of length. There is an option to stop the algorithm after the
 | 
			
		||||
first match (which is necessarily the shortest) is found.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Note that all the matches that are found start at the same point in the
 | 
			
		||||
subject. If the pattern
 | 
			
		||||
<pre>
 | 
			
		||||
  cat(er(pillar)?)?
 | 
			
		||||
</pre>
 | 
			
		||||
is matched against the string "the caterpillar catchment", the result will be
 | 
			
		||||
the three strings "caterpillar", "cater", and "cat" that start at the fifth
 | 
			
		||||
character of the subject. The algorithm does not automatically move on to find
 | 
			
		||||
matches that start at later positions.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
PCRE's "auto-possessification" optimization usually applies to character
 | 
			
		||||
repeats at the end of a pattern (as well as internally). For example, the
 | 
			
		||||
pattern "a\d+" is compiled as if it were "a\d++" because there is no point
 | 
			
		||||
even considering the possibility of backtracking into the repeated digits. For
 | 
			
		||||
DFA matching, this means that only one possible match is found. If you really
 | 
			
		||||
do want multiple matches in such cases, either use an ungreedy repeat
 | 
			
		||||
("a\d+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
There are a number of features of PCRE regular expressions that are not
 | 
			
		||||
supported by the alternative matching algorithm. They are as follows:
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
1. Because the algorithm finds all possible matches, the greedy or ungreedy
 | 
			
		||||
nature of repetition quantifiers is not relevant. Greedy and ungreedy
 | 
			
		||||
quantifiers are treated in exactly the same way. However, possessive
 | 
			
		||||
quantifiers can make a difference when what follows could also match what is
 | 
			
		||||
quantified, for example in a pattern like this:
 | 
			
		||||
<pre>
 | 
			
		||||
  ^a++\w!
 | 
			
		||||
</pre>
 | 
			
		||||
This pattern matches "aaab!" but not "aaa!", which would be matched by a
 | 
			
		||||
non-possessive quantifier. Similarly, if an atomic group is present, it is
 | 
			
		||||
matched as if it were a standalone pattern at the current point, and the
 | 
			
		||||
longest match is then "locked in" for the rest of the overall pattern.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
2. When dealing with multiple paths through the tree simultaneously, it is not
 | 
			
		||||
straightforward to keep track of captured substrings for the different matching
 | 
			
		||||
possibilities, and PCRE's implementation of this algorithm does not attempt to
 | 
			
		||||
do this. This means that no captured substrings are available.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
3. Because no substrings are captured, back references within the pattern are
 | 
			
		||||
not supported, and cause errors if encountered.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
4. For the same reason, conditional expressions that use a backreference as the
 | 
			
		||||
condition or test for a specific group recursion are not supported.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
5. Because many paths through the tree may be active, the \K escape sequence,
 | 
			
		||||
which resets the start of the match when encountered (but may be on some paths
 | 
			
		||||
and not on others), is not supported. It causes an error if encountered.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
6. Callouts are supported, but the value of the <i>capture_top</i> field is
 | 
			
		||||
always 1, and the value of the <i>capture_last</i> field is always -1.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
7. The \C escape sequence, which (in the standard algorithm) always matches a
 | 
			
		||||
single data unit, even in UTF-8, UTF-16 or UTF-32 modes, is not supported in
 | 
			
		||||
these modes, because the alternative algorithm moves through the subject string
 | 
			
		||||
one character (not data unit) at a time, for all active paths through the tree.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
 | 
			
		||||
supported. (*FAIL) is supported, and behaves like a failing negative assertion.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC5" href="#TOC1">ADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Using the alternative matching algorithm provides the following advantages:
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
1. All possible matches (at a single point in the subject) are automatically
 | 
			
		||||
found, and in particular, the longest match is found. To find more than one
 | 
			
		||||
match using the standard algorithm, you have to do kludgy things with
 | 
			
		||||
callouts.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
2. Because the alternative algorithm scans the subject string just once, and
 | 
			
		||||
never needs to backtrack (except for lookbehinds), it is possible to pass very
 | 
			
		||||
long subject strings to the matching function in several pieces, checking for
 | 
			
		||||
partial matching each time. Although it is possible to do multi-segment
 | 
			
		||||
matching using the standard algorithm by retaining partially matched
 | 
			
		||||
substrings, it is more complicated. The
 | 
			
		||||
<a href="pcrepartial.html"><b>pcrepartial</b></a>
 | 
			
		||||
documentation gives details of partial matching and discusses multi-segment
 | 
			
		||||
matching.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC6" href="#TOC1">DISADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The alternative algorithm suffers from a number of disadvantages:
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
1. It is substantially slower than the standard algorithm. This is partly
 | 
			
		||||
because it has to search for all possible matches, but is also because it is
 | 
			
		||||
less susceptible to optimization.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
2. Capturing parentheses and back references are not supported.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
3. Although atomic groups are supported, their use does not provide the
 | 
			
		||||
performance advantage that it does for the standard algorithm.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC7" href="#TOC1">AUTHOR</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Philip Hazel
 | 
			
		||||
<br>
 | 
			
		||||
University Computing Service
 | 
			
		||||
<br>
 | 
			
		||||
Cambridge CB2 3QH, England.
 | 
			
		||||
<br>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 12 November 2013
 | 
			
		||||
<br>
 | 
			
		||||
Copyright © 1997-2012 University of Cambridge.
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										509
									
								
								tools/pcre/doc/html/pcrepartial.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										509
									
								
								tools/pcre/doc/html/pcrepartial.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,509 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcrepartial specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcrepartial man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<ul>
 | 
			
		||||
<li><a name="TOC1" href="#SEC1">PARTIAL MATCHING IN PCRE</a>
 | 
			
		||||
<li><a name="TOC2" href="#SEC2">PARTIAL MATCHING USING pcre_exec() OR pcre[16|32]_exec()</a>
 | 
			
		||||
<li><a name="TOC3" href="#SEC3">PARTIAL MATCHING USING pcre_dfa_exec() OR pcre[16|32]_dfa_exec()</a>
 | 
			
		||||
<li><a name="TOC4" href="#SEC4">PARTIAL MATCHING AND WORD BOUNDARIES</a>
 | 
			
		||||
<li><a name="TOC5" href="#SEC5">FORMERLY RESTRICTED PATTERNS</a>
 | 
			
		||||
<li><a name="TOC6" href="#SEC6">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a>
 | 
			
		||||
<li><a name="TOC7" href="#SEC7">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec() OR pcre[16|32]_dfa_exec()</a>
 | 
			
		||||
<li><a name="TOC8" href="#SEC8">MULTI-SEGMENT MATCHING WITH pcre_exec() OR pcre[16|32]_exec()</a>
 | 
			
		||||
<li><a name="TOC9" href="#SEC9">ISSUES WITH MULTI-SEGMENT MATCHING</a>
 | 
			
		||||
<li><a name="TOC10" href="#SEC10">AUTHOR</a>
 | 
			
		||||
<li><a name="TOC11" href="#SEC11">REVISION</a>
 | 
			
		||||
</ul>
 | 
			
		||||
<br><a name="SEC1" href="#TOC1">PARTIAL MATCHING IN PCRE</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
In normal use of PCRE, if the subject string that is passed to a matching
 | 
			
		||||
function matches as far as it goes, but is too short to match the entire
 | 
			
		||||
pattern, PCRE_ERROR_NOMATCH is returned. There are circumstances where it might
 | 
			
		||||
be helpful to distinguish this case from other cases in which there is no
 | 
			
		||||
match.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Consider, for example, an application where a human is required to type in data
 | 
			
		||||
for a field with specific formatting requirements. An example might be a date
 | 
			
		||||
in the form <i>ddmmmyy</i>, defined by this pattern:
 | 
			
		||||
<pre>
 | 
			
		||||
  ^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$
 | 
			
		||||
</pre>
 | 
			
		||||
If the application sees the user's keystrokes one by one, and can check that
 | 
			
		||||
what has been typed so far is potentially valid, it is able to raise an error
 | 
			
		||||
as soon as a mistake is made, by beeping and not reflecting the character that
 | 
			
		||||
has been typed, for example. This immediate feedback is likely to be a better
 | 
			
		||||
user interface than a check that is delayed until the entire string has been
 | 
			
		||||
entered. Partial matching can also be useful when the subject string is very
 | 
			
		||||
long and is not all available at once.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
PCRE supports partial matching by means of the PCRE_PARTIAL_SOFT and
 | 
			
		||||
PCRE_PARTIAL_HARD options, which can be set when calling any of the matching
 | 
			
		||||
functions. For backwards compatibility, PCRE_PARTIAL is a synonym for
 | 
			
		||||
PCRE_PARTIAL_SOFT. The essential difference between the two options is whether
 | 
			
		||||
or not a partial match is preferred to an alternative complete match, though
 | 
			
		||||
the details differ between the two types of matching function. If both options
 | 
			
		||||
are set, PCRE_PARTIAL_HARD takes precedence.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If you want to use partial matching with just-in-time optimized code, you must
 | 
			
		||||
call <b>pcre_study()</b>, <b>pcre16_study()</b> or  <b>pcre32_study()</b> with one
 | 
			
		||||
or both of these options:
 | 
			
		||||
<pre>
 | 
			
		||||
  PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
 | 
			
		||||
  PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
 | 
			
		||||
</pre>
 | 
			
		||||
PCRE_STUDY_JIT_COMPILE should also be set if you are going to run non-partial
 | 
			
		||||
matches on the same pattern. If the appropriate JIT study mode has not been set
 | 
			
		||||
for a match, the interpretive matching code is used.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Setting a partial matching option disables two of PCRE's standard
 | 
			
		||||
optimizations. PCRE remembers the last literal data unit in a pattern, and
 | 
			
		||||
abandons matching immediately if it is not present in the subject string. This
 | 
			
		||||
optimization cannot be used for a subject string that might match only
 | 
			
		||||
partially. If the pattern was studied, PCRE knows the minimum length of a
 | 
			
		||||
matching string, and does not bother to run the matching function on shorter
 | 
			
		||||
strings. This optimization is also disabled for partial matching.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC2" href="#TOC1">PARTIAL MATCHING USING pcre_exec() OR pcre[16|32]_exec()</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
A partial match occurs during a call to <b>pcre_exec()</b> or
 | 
			
		||||
<b>pcre[16|32]_exec()</b> when the end of the subject string is reached
 | 
			
		||||
successfully, but matching cannot continue because more characters are needed.
 | 
			
		||||
However, at least one character in the subject must have been inspected. This
 | 
			
		||||
character need not form part of the final matched string; lookbehind assertions
 | 
			
		||||
and the \K escape sequence provide ways of inspecting characters before the
 | 
			
		||||
start of a matched substring. The requirement for inspecting at least one
 | 
			
		||||
character exists because an empty string can always be matched; without such a
 | 
			
		||||
restriction there would always be a partial match of an empty string at the end
 | 
			
		||||
of the subject.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If there are at least two slots in the offsets vector when a partial match is
 | 
			
		||||
returned, the first slot is set to the offset of the earliest character that
 | 
			
		||||
was inspected. For convenience, the second offset points to the end of the
 | 
			
		||||
subject so that a substring can easily be identified. If there are at least
 | 
			
		||||
three slots in the offsets vector, the third slot is set to the offset of the
 | 
			
		||||
character where matching started.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
For the majority of patterns, the contents of the first and third slots will be
 | 
			
		||||
the same. However, for patterns that contain lookbehind assertions, or begin
 | 
			
		||||
with \b or \B, characters before the one where matching started may have been
 | 
			
		||||
inspected while carrying out the match. For example, consider this pattern:
 | 
			
		||||
<pre>
 | 
			
		||||
  /(?<=abc)123/
 | 
			
		||||
</pre>
 | 
			
		||||
This pattern matches "123", but only if it is preceded by "abc". If the subject
 | 
			
		||||
string is "xyzabc12", the first two offsets after a partial match are for the
 | 
			
		||||
substring "abc12", because all these characters were inspected. However, the
 | 
			
		||||
third offset is set to 6, because that is the offset where matching began.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
What happens when a partial match is identified depends on which of the two
 | 
			
		||||
partial matching options are set.
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
PCRE_PARTIAL_SOFT WITH pcre_exec() OR pcre[16|32]_exec()
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
If PCRE_PARTIAL_SOFT is set when <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b>
 | 
			
		||||
identifies a partial match, the partial match is remembered, but matching
 | 
			
		||||
continues as normal, and other alternatives in the pattern are tried. If no
 | 
			
		||||
complete match can be found, PCRE_ERROR_PARTIAL is returned instead of
 | 
			
		||||
PCRE_ERROR_NOMATCH.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
This option is "soft" because it prefers a complete match over a partial match.
 | 
			
		||||
All the various matching items in a pattern behave as if the subject string is
 | 
			
		||||
potentially complete. For example, \z, \Z, and $ match at the end of the
 | 
			
		||||
subject, as normal, and for \b and \B the end of the subject is treated as a
 | 
			
		||||
non-alphanumeric.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If there is more than one partial match, the first one that was found provides
 | 
			
		||||
the data that is returned. Consider this pattern:
 | 
			
		||||
<pre>
 | 
			
		||||
  /123\w+X|dogY/
 | 
			
		||||
</pre>
 | 
			
		||||
If this is matched against the subject string "abc123dog", both
 | 
			
		||||
alternatives fail to match, but the end of the subject is reached during
 | 
			
		||||
matching, so PCRE_ERROR_PARTIAL is returned. The offsets are set to 3 and 9,
 | 
			
		||||
identifying "123dog" as the first partial match that was found. (In this
 | 
			
		||||
example, there are two partial matches, because "dog" on its own partially
 | 
			
		||||
matches the second alternative.)
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
PCRE_PARTIAL_HARD WITH pcre_exec() OR pcre[16|32]_exec()
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
If PCRE_PARTIAL_HARD is set for <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b>,
 | 
			
		||||
PCRE_ERROR_PARTIAL is returned as soon as a partial match is found, without
 | 
			
		||||
continuing to search for possible complete matches. This option is "hard"
 | 
			
		||||
because it prefers an earlier partial match over a later complete match. For
 | 
			
		||||
this reason, the assumption is made that the end of the supplied subject string
 | 
			
		||||
may not be the true end of the available data, and so, if \z, \Z, \b, \B,
 | 
			
		||||
or $ are encountered at the end of the subject, the result is
 | 
			
		||||
PCRE_ERROR_PARTIAL, provided that at least one character in the subject has
 | 
			
		||||
been inspected.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Setting PCRE_PARTIAL_HARD also affects the way UTF-8 and UTF-16
 | 
			
		||||
subject strings are checked for validity. Normally, an invalid sequence
 | 
			
		||||
causes the error PCRE_ERROR_BADUTF8 or PCRE_ERROR_BADUTF16. However, in the
 | 
			
		||||
special case of a truncated character at the end of the subject,
 | 
			
		||||
PCRE_ERROR_SHORTUTF8 or PCRE_ERROR_SHORTUTF16 is returned when
 | 
			
		||||
PCRE_PARTIAL_HARD is set.
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
Comparing hard and soft partial matching
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
The difference between the two partial matching options can be illustrated by a
 | 
			
		||||
pattern such as:
 | 
			
		||||
<pre>
 | 
			
		||||
  /dog(sbody)?/
 | 
			
		||||
</pre>
 | 
			
		||||
This matches either "dog" or "dogsbody", greedily (that is, it prefers the
 | 
			
		||||
longer string if possible). If it is matched against the string "dog" with
 | 
			
		||||
PCRE_PARTIAL_SOFT, it yields a complete match for "dog". However, if
 | 
			
		||||
PCRE_PARTIAL_HARD is set, the result is PCRE_ERROR_PARTIAL. On the other hand,
 | 
			
		||||
if the pattern is made ungreedy the result is different:
 | 
			
		||||
<pre>
 | 
			
		||||
  /dog(sbody)??/
 | 
			
		||||
</pre>
 | 
			
		||||
In this case the result is always a complete match because that is found first,
 | 
			
		||||
and matching never continues after finding a complete match. It might be easier
 | 
			
		||||
to follow this explanation by thinking of the two patterns like this:
 | 
			
		||||
<pre>
 | 
			
		||||
  /dog(sbody)?/    is the same as  /dogsbody|dog/
 | 
			
		||||
  /dog(sbody)??/   is the same as  /dog|dogsbody/
 | 
			
		||||
</pre>
 | 
			
		||||
The second pattern will never match "dogsbody", because it will always find the
 | 
			
		||||
shorter match first.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC3" href="#TOC1">PARTIAL MATCHING USING pcre_dfa_exec() OR pcre[16|32]_dfa_exec()</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
The DFA functions move along the subject string character by character, without
 | 
			
		||||
backtracking, searching for all possible matches simultaneously. If the end of
 | 
			
		||||
the subject is reached before the end of the pattern, there is the possibility
 | 
			
		||||
of a partial match, again provided that at least one character has been
 | 
			
		||||
inspected.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
When PCRE_PARTIAL_SOFT is set, PCRE_ERROR_PARTIAL is returned only if there
 | 
			
		||||
have been no complete matches. Otherwise, the complete matches are returned.
 | 
			
		||||
However, if PCRE_PARTIAL_HARD is set, a partial match takes precedence over any
 | 
			
		||||
complete matches. The portion of the string that was inspected when the longest
 | 
			
		||||
partial match was found is set as the first matching string, provided there are
 | 
			
		||||
at least two slots in the offsets vector.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Because the DFA functions always search for all possible matches, and there is
 | 
			
		||||
no difference between greedy and ungreedy repetition, their behaviour is
 | 
			
		||||
different from the standard functions when PCRE_PARTIAL_HARD is set. Consider
 | 
			
		||||
the string "dog" matched against the ungreedy pattern shown above:
 | 
			
		||||
<pre>
 | 
			
		||||
  /dog(sbody)??/
 | 
			
		||||
</pre>
 | 
			
		||||
Whereas the standard functions stop as soon as they find the complete match for
 | 
			
		||||
"dog", the DFA functions also find the partial match for "dogsbody", and so
 | 
			
		||||
return that when PCRE_PARTIAL_HARD is set.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC4" href="#TOC1">PARTIAL MATCHING AND WORD BOUNDARIES</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
If a pattern ends with one of sequences \b or \B, which test for word
 | 
			
		||||
boundaries, partial matching with PCRE_PARTIAL_SOFT can give counter-intuitive
 | 
			
		||||
results. Consider this pattern:
 | 
			
		||||
<pre>
 | 
			
		||||
  /\bcat\b/
 | 
			
		||||
</pre>
 | 
			
		||||
This matches "cat", provided there is a word boundary at either end. If the
 | 
			
		||||
subject string is "the cat", the comparison of the final "t" with a following
 | 
			
		||||
character cannot take place, so a partial match is found. However, normal
 | 
			
		||||
matching carries on, and \b matches at the end of the subject when the last
 | 
			
		||||
character is a letter, so a complete match is found. The result, therefore, is
 | 
			
		||||
<i>not</i> PCRE_ERROR_PARTIAL. Using PCRE_PARTIAL_HARD in this case does yield
 | 
			
		||||
PCRE_ERROR_PARTIAL, because then the partial match takes precedence.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC5" href="#TOC1">FORMERLY RESTRICTED PATTERNS</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
For releases of PCRE prior to 8.00, because of the way certain internal
 | 
			
		||||
optimizations were implemented in the <b>pcre_exec()</b> function, the
 | 
			
		||||
PCRE_PARTIAL option (predecessor of PCRE_PARTIAL_SOFT) could not be used with
 | 
			
		||||
all patterns. From release 8.00 onwards, the restrictions no longer apply, and
 | 
			
		||||
partial matching with can be requested for any pattern.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Items that were formerly restricted were repeated single characters and
 | 
			
		||||
repeated metasequences. If PCRE_PARTIAL was set for a pattern that did not
 | 
			
		||||
conform to the restrictions, <b>pcre_exec()</b> returned the error code
 | 
			
		||||
PCRE_ERROR_BADPARTIAL (-13). This error code is no longer in use. The
 | 
			
		||||
PCRE_INFO_OKPARTIAL call to <b>pcre_fullinfo()</b> to find out if a compiled
 | 
			
		||||
pattern can be used for partial matching now always returns 1.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC6" href="#TOC1">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
If the escape sequence \P is present in a <b>pcretest</b> data line, the
 | 
			
		||||
PCRE_PARTIAL_SOFT option is used for the match. Here is a run of <b>pcretest</b>
 | 
			
		||||
that uses the date example quoted above:
 | 
			
		||||
<pre>
 | 
			
		||||
    re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
 | 
			
		||||
  data> 25jun04\P
 | 
			
		||||
   0: 25jun04
 | 
			
		||||
   1: jun
 | 
			
		||||
  data> 25dec3\P
 | 
			
		||||
  Partial match: 23dec3
 | 
			
		||||
  data> 3ju\P
 | 
			
		||||
  Partial match: 3ju
 | 
			
		||||
  data> 3juj\P
 | 
			
		||||
  No match
 | 
			
		||||
  data> j\P
 | 
			
		||||
  No match
 | 
			
		||||
</pre>
 | 
			
		||||
The first data string is matched completely, so <b>pcretest</b> shows the
 | 
			
		||||
matched substrings. The remaining four strings do not match the complete
 | 
			
		||||
pattern, but the first two are partial matches. Similar output is obtained
 | 
			
		||||
if DFA matching is used.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If the escape sequence \P is present more than once in a <b>pcretest</b> data
 | 
			
		||||
line, the PCRE_PARTIAL_HARD option is set for the match.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC7" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec() OR pcre[16|32]_dfa_exec()</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
When a partial match has been found using a DFA matching function, it is
 | 
			
		||||
possible to continue the match by providing additional subject data and calling
 | 
			
		||||
the function again with the same compiled regular expression, this time setting
 | 
			
		||||
the PCRE_DFA_RESTART option. You must pass the same working space as before,
 | 
			
		||||
because this is where details of the previous partial match are stored. Here is
 | 
			
		||||
an example using <b>pcretest</b>, using the \R escape sequence to set the
 | 
			
		||||
PCRE_DFA_RESTART option (\D specifies the use of the DFA matching function):
 | 
			
		||||
<pre>
 | 
			
		||||
    re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
 | 
			
		||||
  data> 23ja\P\D
 | 
			
		||||
  Partial match: 23ja
 | 
			
		||||
  data> n05\R\D
 | 
			
		||||
   0: n05
 | 
			
		||||
</pre>
 | 
			
		||||
The first call has "23ja" as the subject, and requests partial matching; the
 | 
			
		||||
second call has "n05" as the subject for the continued (restarted) match.
 | 
			
		||||
Notice that when the match is complete, only the last part is shown; PCRE does
 | 
			
		||||
not retain the previously partially-matched string. It is up to the calling
 | 
			
		||||
program to do that if it needs to.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
That means that, for an unanchored pattern, if a continued match fails, it is
 | 
			
		||||
not possible to try again at a new starting point. All this facility is capable
 | 
			
		||||
of doing is continuing with the previous match attempt. In the previous
 | 
			
		||||
example, if the second set of data is "ug23" the result is no match, even
 | 
			
		||||
though there would be a match for "aug23" if the entire string were given at
 | 
			
		||||
once. Depending on the application, this may or may not be what you want.
 | 
			
		||||
The only way to allow for starting again at the next character is to retain the
 | 
			
		||||
matched part of the subject and try a new complete match.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
 | 
			
		||||
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
 | 
			
		||||
facility can be used to pass very long subject strings to the DFA matching
 | 
			
		||||
functions.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC8" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre_exec() OR pcre[16|32]_exec()</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
From release 8.00, the standard matching functions can also be used to do
 | 
			
		||||
multi-segment matching. Unlike the DFA functions, it is not possible to
 | 
			
		||||
restart the previous match with a new segment of data. Instead, new data must
 | 
			
		||||
be added to the previous subject string, and the entire match re-run, starting
 | 
			
		||||
from the point where the partial match occurred. Earlier data can be discarded.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
It is best to use PCRE_PARTIAL_HARD in this situation, because it does not
 | 
			
		||||
treat the end of a segment as the end of the subject when matching \z, \Z,
 | 
			
		||||
\b, \B, and $. Consider an unanchored pattern that matches dates:
 | 
			
		||||
<pre>
 | 
			
		||||
    re> /\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d/
 | 
			
		||||
  data> The date is 23ja\P\P
 | 
			
		||||
  Partial match: 23ja
 | 
			
		||||
</pre>
 | 
			
		||||
At this stage, an application could discard the text preceding "23ja", add on
 | 
			
		||||
text from the next segment, and call the matching function again. Unlike the
 | 
			
		||||
DFA matching functions, the entire matching string must always be available,
 | 
			
		||||
and the complete matching process occurs for each call, so more memory and more
 | 
			
		||||
processing time is needed.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
<b>Note:</b> If the pattern contains lookbehind assertions, or \K, or starts
 | 
			
		||||
with \b or \B, the string that is returned for a partial match includes
 | 
			
		||||
characters that precede the start of what would be returned for a complete
 | 
			
		||||
match, because it contains all the characters that were inspected during the
 | 
			
		||||
partial match.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC9" href="#TOC1">ISSUES WITH MULTI-SEGMENT MATCHING</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Certain types of pattern may give problems with multi-segment matching,
 | 
			
		||||
whichever matching function is used.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
1. If the pattern contains a test for the beginning of a line, you need to pass
 | 
			
		||||
the PCRE_NOTBOL option when the subject string for any call does start at the
 | 
			
		||||
beginning of a line. There is also a PCRE_NOTEOL option, but in practice when
 | 
			
		||||
doing multi-segment matching you should be using PCRE_PARTIAL_HARD, which
 | 
			
		||||
includes the effect of PCRE_NOTEOL.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
2. Lookbehind assertions that have already been obeyed are catered for in the
 | 
			
		||||
offsets that are returned for a partial match. However a lookbehind assertion
 | 
			
		||||
later in the pattern could require even earlier characters to be inspected. You
 | 
			
		||||
can handle this case by using the PCRE_INFO_MAXLOOKBEHIND option of the
 | 
			
		||||
<b>pcre_fullinfo()</b> or <b>pcre[16|32]_fullinfo()</b> functions to obtain the
 | 
			
		||||
length of the longest lookbehind in the pattern. This length is given in
 | 
			
		||||
characters, not bytes. If you always retain at least that many characters
 | 
			
		||||
before the partially matched string, all should be well. (Of course, near the
 | 
			
		||||
start of the subject, fewer characters may be present; in that case all
 | 
			
		||||
characters should be retained.)
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
From release 8.33, there is a more accurate way of deciding which characters to
 | 
			
		||||
retain. Instead of subtracting the length of the longest lookbehind from the
 | 
			
		||||
earliest inspected character (<i>offsets[0]</i>), the match start position
 | 
			
		||||
(<i>offsets[2]</i>) should be used, and the next match attempt started at the
 | 
			
		||||
<i>offsets[2]</i> character by setting the <i>startoffset</i> argument of
 | 
			
		||||
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
For example, if the pattern "(?<=123)abc" is partially
 | 
			
		||||
matched against the string "xx123a", the three offset values returned are 2, 6,
 | 
			
		||||
and 5. This indicates that the matching process that gave a partial match
 | 
			
		||||
started at offset 5, but the characters "123a" were all inspected. The maximum
 | 
			
		||||
lookbehind for that pattern is 3, so taking that away from 5 shows that we need
 | 
			
		||||
only keep "123a", and the next match attempt can be started at offset 3 (that
 | 
			
		||||
is, at "a") when further characters have been added. When the match start is
 | 
			
		||||
not the earliest inspected character, <b>pcretest</b> shows it explicitly:
 | 
			
		||||
<pre>
 | 
			
		||||
    re> "(?<=123)abc"
 | 
			
		||||
  data> xx123a\P\P
 | 
			
		||||
  Partial match at offset 5: 123a
 | 
			
		||||
</PRE>
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
3. Because a partial match must always contain at least one character, what
 | 
			
		||||
might be considered a partial match of an empty string actually gives a "no
 | 
			
		||||
match" result. For example:
 | 
			
		||||
<pre>
 | 
			
		||||
    re> /c(?<=abc)x/
 | 
			
		||||
  data> ab\P
 | 
			
		||||
  No match
 | 
			
		||||
</pre>
 | 
			
		||||
If the next segment begins "cx", a match should be found, but this will only
 | 
			
		||||
happen if characters from the previous segment are retained. For this reason, a
 | 
			
		||||
"no match" result should be interpreted as "partial match of an empty string"
 | 
			
		||||
when the pattern contains lookbehinds.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
4. Matching a subject string that is split into multiple segments may not
 | 
			
		||||
always produce exactly the same result as matching over one single long string,
 | 
			
		||||
especially when PCRE_PARTIAL_SOFT is used. The section "Partial Matching and
 | 
			
		||||
Word Boundaries" above describes an issue that arises if the pattern ends with
 | 
			
		||||
\b or \B. Another kind of difference may occur when there are multiple
 | 
			
		||||
matching possibilities, because (for PCRE_PARTIAL_SOFT) a partial match result
 | 
			
		||||
is given only when there are no completed matches. This means that as soon as
 | 
			
		||||
the shortest match has been found, continuation to a new subject segment is no
 | 
			
		||||
longer possible. Consider again this <b>pcretest</b> example:
 | 
			
		||||
<pre>
 | 
			
		||||
    re> /dog(sbody)?/
 | 
			
		||||
  data> dogsb\P
 | 
			
		||||
   0: dog
 | 
			
		||||
  data> do\P\D
 | 
			
		||||
  Partial match: do
 | 
			
		||||
  data> gsb\R\P\D
 | 
			
		||||
   0: g
 | 
			
		||||
  data> dogsbody\D
 | 
			
		||||
   0: dogsbody
 | 
			
		||||
   1: dog
 | 
			
		||||
</pre>
 | 
			
		||||
The first data line passes the string "dogsb" to a standard matching function,
 | 
			
		||||
setting the PCRE_PARTIAL_SOFT option. Although the string is a partial match
 | 
			
		||||
for "dogsbody", the result is not PCRE_ERROR_PARTIAL, because the shorter
 | 
			
		||||
string "dog" is a complete match. Similarly, when the subject is presented to
 | 
			
		||||
a DFA matching function in several parts ("do" and "gsb" being the first two)
 | 
			
		||||
the match stops when "dog" has been found, and it is not possible to continue.
 | 
			
		||||
On the other hand, if "dogsbody" is presented as a single string, a DFA
 | 
			
		||||
matching function finds both matches.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Because of these problems, it is best to use PCRE_PARTIAL_HARD when matching
 | 
			
		||||
multi-segment data. The example above then behaves differently:
 | 
			
		||||
<pre>
 | 
			
		||||
    re> /dog(sbody)?/
 | 
			
		||||
  data> dogsb\P\P
 | 
			
		||||
  Partial match: dogsb
 | 
			
		||||
  data> do\P\D
 | 
			
		||||
  Partial match: do
 | 
			
		||||
  data> gsb\R\P\P\D
 | 
			
		||||
  Partial match: gsb
 | 
			
		||||
</pre>
 | 
			
		||||
5. Patterns that contain alternatives at the top level which do not all start
 | 
			
		||||
with the same pattern item may not work as expected when PCRE_DFA_RESTART is
 | 
			
		||||
used. For example, consider this pattern:
 | 
			
		||||
<pre>
 | 
			
		||||
  1234|3789
 | 
			
		||||
</pre>
 | 
			
		||||
If the first part of the subject is "ABC123", a partial match of the first
 | 
			
		||||
alternative is found at offset 3. There is no partial match for the second
 | 
			
		||||
alternative, because such a match does not start at the same point in the
 | 
			
		||||
subject string. Attempting to continue with the string "7890" does not yield a
 | 
			
		||||
match because only those alternatives that match at one point in the subject
 | 
			
		||||
are remembered. The problem arises because the start of the second alternative
 | 
			
		||||
matches within the first alternative. There is no problem with anchored
 | 
			
		||||
patterns or patterns such as:
 | 
			
		||||
<pre>
 | 
			
		||||
  1234|ABCD
 | 
			
		||||
</pre>
 | 
			
		||||
where no string can be a partial match for both alternatives. This is not a
 | 
			
		||||
problem if a standard matching function is used, because the entire match has
 | 
			
		||||
to be rerun each time:
 | 
			
		||||
<pre>
 | 
			
		||||
    re> /1234|3789/
 | 
			
		||||
  data> ABC123\P\P
 | 
			
		||||
  Partial match: 123
 | 
			
		||||
  data> 1237890
 | 
			
		||||
   0: 3789
 | 
			
		||||
</pre>
 | 
			
		||||
Of course, instead of using PCRE_DFA_RESTART, the same technique of re-running
 | 
			
		||||
the entire match can also be used with the DFA matching functions. Another
 | 
			
		||||
possibility is to work with two buffers. If a partial match at offset <i>n</i>
 | 
			
		||||
in the first buffer is followed by "no match" when PCRE_DFA_RESTART is used on
 | 
			
		||||
the second buffer, you can then try a new match starting at offset <i>n+1</i> in
 | 
			
		||||
the first buffer.
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC10" href="#TOC1">AUTHOR</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Philip Hazel
 | 
			
		||||
<br>
 | 
			
		||||
University Computing Service
 | 
			
		||||
<br>
 | 
			
		||||
Cambridge CB2 3QH, England.
 | 
			
		||||
<br>
 | 
			
		||||
</P>
 | 
			
		||||
<br><a name="SEC11" href="#TOC1">REVISION</a><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 02 July 2013
 | 
			
		||||
<br>
 | 
			
		||||
Copyright © 1997-2013 University of Cambridge.
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
							
								
								
									
										3235
									
								
								tools/pcre/doc/html/pcrepattern.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3235
									
								
								tools/pcre/doc/html/pcrepattern.html
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										195
									
								
								tools/pcre/doc/html/pcreperform.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										195
									
								
								tools/pcre/doc/html/pcreperform.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,195 @@
 | 
			
		||||
<html>
 | 
			
		||||
<head>
 | 
			
		||||
<title>pcreperform specification</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
 | 
			
		||||
<h1>pcreperform man page</h1>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
<p>
 | 
			
		||||
This page is part of the PCRE HTML documentation. It was generated automatically
 | 
			
		||||
from the original man page. If there is any nonsense in it, please consult the
 | 
			
		||||
man page, in case the conversion went wrong.
 | 
			
		||||
<br>
 | 
			
		||||
<br><b>
 | 
			
		||||
PCRE PERFORMANCE
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
Two aspects of performance are discussed below: memory usage and processing
 | 
			
		||||
time. The way you express your pattern as a regular expression can affect both
 | 
			
		||||
of them.
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
COMPILED PATTERN MEMORY USAGE
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
Patterns are compiled by PCRE into a reasonably efficient interpretive code, so
 | 
			
		||||
that most simple patterns do not use much memory. However, there is one case
 | 
			
		||||
where the memory usage of a compiled pattern can be unexpectedly large. If a
 | 
			
		||||
parenthesized subpattern has a quantifier with a minimum greater than 1 and/or
 | 
			
		||||
a limited maximum, the whole subpattern is repeated in the compiled code. For
 | 
			
		||||
example, the pattern
 | 
			
		||||
<pre>
 | 
			
		||||
  (abc|def){2,4}
 | 
			
		||||
</pre>
 | 
			
		||||
is compiled as if it were
 | 
			
		||||
<pre>
 | 
			
		||||
  (abc|def)(abc|def)((abc|def)(abc|def)?)?
 | 
			
		||||
</pre>
 | 
			
		||||
(Technical aside: It is done this way so that backtrack points within each of
 | 
			
		||||
the repetitions can be independently maintained.)
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
For regular expressions whose quantifiers use only small numbers, this is not
 | 
			
		||||
usually a problem. However, if the numbers are large, and particularly if such
 | 
			
		||||
repetitions are nested, the memory usage can become an embarrassment. For
 | 
			
		||||
example, the very simple pattern
 | 
			
		||||
<pre>
 | 
			
		||||
  ((ab){1,1000}c){1,3}
 | 
			
		||||
</pre>
 | 
			
		||||
uses 51K bytes when compiled using the 8-bit library. When PCRE is compiled
 | 
			
		||||
with its default internal pointer size of two bytes, the size limit on a
 | 
			
		||||
compiled pattern is 64K data units, and this is reached with the above pattern
 | 
			
		||||
if the outer repetition is increased from 3 to 4. PCRE can be compiled to use
 | 
			
		||||
larger internal pointers and thus handle larger compiled patterns, but it is
 | 
			
		||||
better to try to rewrite your pattern to use less memory if you can.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
One way of reducing the memory usage for such patterns is to make use of PCRE's
 | 
			
		||||
<a href="pcrepattern.html#subpatternsassubroutines">"subroutine"</a>
 | 
			
		||||
facility. Re-writing the above pattern as
 | 
			
		||||
<pre>
 | 
			
		||||
  ((ab)(?2){0,999}c)(?1){0,2}
 | 
			
		||||
</pre>
 | 
			
		||||
reduces the memory requirements to 18K, and indeed it remains under 20K even
 | 
			
		||||
with the outer repetition increased to 100. However, this pattern is not
 | 
			
		||||
exactly equivalent, because the "subroutine" calls are treated as
 | 
			
		||||
<a href="pcrepattern.html#atomicgroup">atomic groups</a>
 | 
			
		||||
into which there can be no backtracking if there is a subsequent matching
 | 
			
		||||
failure. Therefore, PCRE cannot do this kind of rewriting automatically.
 | 
			
		||||
Furthermore, there is a noticeable loss of speed when executing the modified
 | 
			
		||||
pattern. Nevertheless, if the atomic grouping is not a problem and the loss of
 | 
			
		||||
speed is acceptable, this kind of rewriting will allow you to process patterns
 | 
			
		||||
that PCRE cannot otherwise handle.
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
STACK USAGE AT RUN TIME
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
When <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> is used for matching, certain
 | 
			
		||||
kinds of pattern can cause it to use large amounts of the process stack. In
 | 
			
		||||
some environments the default process stack is quite small, and if it runs out
 | 
			
		||||
the result is often SIGSEGV. This issue is probably the most frequently raised
 | 
			
		||||
problem with PCRE. Rewriting your pattern can often help. The
 | 
			
		||||
<a href="pcrestack.html"><b>pcrestack</b></a>
 | 
			
		||||
documentation discusses this issue in detail.
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
PROCESSING TIME
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
Certain items in regular expression patterns are processed more efficiently
 | 
			
		||||
than others. It is more efficient to use a character class like [aeiou] than a
 | 
			
		||||
set of single-character alternatives such as (a|e|i|o|u). In general, the
 | 
			
		||||
simplest construction that provides the required behaviour is usually the most
 | 
			
		||||
efficient. Jeffrey Friedl's book contains a lot of useful general discussion
 | 
			
		||||
about optimizing regular expressions for efficient performance. This document
 | 
			
		||||
contains a few observations about PCRE.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Using Unicode character properties (the \p, \P, and \X escapes) is slow,
 | 
			
		||||
because PCRE has to use a multi-stage table lookup whenever it needs a
 | 
			
		||||
character's property. If you can find an alternative pattern that does not use
 | 
			
		||||
character properties, it will probably be faster.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
By default, the escape sequences \b, \d, \s, and \w, and the POSIX
 | 
			
		||||
character classes such as [:alpha:] do not use Unicode properties, partly for
 | 
			
		||||
backwards compatibility, and partly for performance reasons. However, you can
 | 
			
		||||
set PCRE_UCP if you want Unicode character properties to be used. This can
 | 
			
		||||
double the matching time for items such as \d, when matched with
 | 
			
		||||
a traditional matching function; the performance loss is less with
 | 
			
		||||
a DFA matching function, and in both cases there is not much difference for
 | 
			
		||||
\b.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
When a pattern begins with .* not in parentheses, or in parentheses that are
 | 
			
		||||
not the subject of a backreference, and the PCRE_DOTALL option is set, the
 | 
			
		||||
pattern is implicitly anchored by PCRE, since it can match only at the start of
 | 
			
		||||
a subject string. However, if PCRE_DOTALL is not set, PCRE cannot make this
 | 
			
		||||
optimization, because the . metacharacter does not then match a newline, and if
 | 
			
		||||
the subject string contains newlines, the pattern may match from the character
 | 
			
		||||
immediately following one of them instead of from the very start. For example,
 | 
			
		||||
the pattern
 | 
			
		||||
<pre>
 | 
			
		||||
  .*second
 | 
			
		||||
</pre>
 | 
			
		||||
matches the subject "first\nand second" (where \n stands for a newline
 | 
			
		||||
character), with the match starting at the seventh character. In order to do
 | 
			
		||||
this, PCRE has to retry the match starting after every newline in the subject.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
If you are using such a pattern with subject strings that do not contain
 | 
			
		||||
newlines, the best performance is obtained by setting PCRE_DOTALL, or starting
 | 
			
		||||
the pattern with ^.* or ^.*? to indicate explicit anchoring. That saves PCRE
 | 
			
		||||
from having to scan along the subject looking for a newline to restart at.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
Beware of patterns that contain nested indefinite repeats. These can take a
 | 
			
		||||
long time to run when applied to a string that does not match. Consider the
 | 
			
		||||
pattern fragment
 | 
			
		||||
<pre>
 | 
			
		||||
  ^(a+)*
 | 
			
		||||
</pre>
 | 
			
		||||
This can match "aaaa" in 16 different ways, and this number increases very
 | 
			
		||||
rapidly as the string gets longer. (The * repeat can match 0, 1, 2, 3, or 4
 | 
			
		||||
times, and for each of those cases other than 0 or 4, the + repeats can match
 | 
			
		||||
different numbers of times.) When the remainder of the pattern is such that the
 | 
			
		||||
entire match is going to fail, PCRE has in principle to try every possible
 | 
			
		||||
variation, and this can take an extremely long time, even for relatively short
 | 
			
		||||
strings.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
An optimization catches some of the more simple cases such as
 | 
			
		||||
<pre>
 | 
			
		||||
  (a+)*b
 | 
			
		||||
</pre>
 | 
			
		||||
where a literal character follows. Before embarking on the standard matching
 | 
			
		||||
procedure, PCRE checks that there is a "b" later in the subject string, and if
 | 
			
		||||
there is not, it fails the match immediately. However, when there is no
 | 
			
		||||
following literal this optimization cannot be used. You can see the difference
 | 
			
		||||
by comparing the behaviour of
 | 
			
		||||
<pre>
 | 
			
		||||
  (a+)*\d
 | 
			
		||||
</pre>
 | 
			
		||||
with the pattern above. The former gives a failure almost instantly when
 | 
			
		||||
applied to a whole line of "a" characters, whereas the latter takes an
 | 
			
		||||
appreciable time with strings longer than about 20 characters.
 | 
			
		||||
</P>
 | 
			
		||||
<P>
 | 
			
		||||
In many cases, the solution to this kind of performance issue is to use an
 | 
			
		||||
atomic group or a possessive quantifier.
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
AUTHOR
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
Philip Hazel
 | 
			
		||||
<br>
 | 
			
		||||
University Computing Service
 | 
			
		||||
<br>
 | 
			
		||||
Cambridge CB2 3QH, England.
 | 
			
		||||
<br>
 | 
			
		||||
</P>
 | 
			
		||||
<br><b>
 | 
			
		||||
REVISION
 | 
			
		||||
</b><br>
 | 
			
		||||
<P>
 | 
			
		||||
Last updated: 25 August 2012
 | 
			
		||||
<br>
 | 
			
		||||
Copyright © 1997-2012 University of Cambridge.
 | 
			
		||||
<br>
 | 
			
		||||
<p>
 | 
			
		||||
Return to the <a href="index.html">PCRE index page</a>.
 | 
			
		||||
</p>
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user