Regex: Update PCRE to v8.35.

I was über lazy at first, so took libs from SM.
But actually it's quite easy to compile, so let's update to latest version \o/.
This commit is contained in:
Arkshine 2014-07-05 13:53:30 +02:00
parent d1153b8049
commit d4de0e6f1e
241 changed files with 51074 additions and 15011 deletions

View File

@ -108,9 +108,9 @@ while (<STDIN>)
# Handling .sp is subtle. If it is inside a literal section, do nothing if
# the next line is a non literal text line; similarly, if not inside a
# literal section, do nothing if a literal follows. The point being that
# the <pre> and </pre> that delimit literal sections will do the spacing.
# Always skip if no previous output.
# literal section, do nothing if a literal follows, unless we are inside
# a .nf/.ne section. The point being that the <pre> and </pre> that delimit
# literal sections will do the spacing. Always skip if no previous output.
elsif (/^\.sp/)
{
@ -123,7 +123,7 @@ while (<STDIN>)
}
else
{
print TEMP "<br>\n<br>\n" if (!/^[\s.]/);
print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
}
redo; # Now process the lookahead line we just read
}

View File

@ -8,7 +8,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2014 University of Cambridge
All rights reserved
@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2012 Zoltan Herczeg
Copyright(c) 2010-2014 Zoltan Herczeg
All rights reserved.
@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2012 Zoltan Herczeg
Copyright(c) 2009-2014 Zoltan Herczeg
All rights reserved.

View File

@ -60,6 +60,11 @@
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
# 2012-09-08 ChPe added PCRE32 support
# 2012-10-23 PH added support for VALGRIND and GCOV
# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings
# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and
# so it has been removed.
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
PROJECT(PCRE C CXX)
@ -128,6 +133,9 @@ SET(PCRE_EBCDIC_NL25 OFF CACHE BOOL
SET(PCRE_LINK_SIZE "2" CACHE STRING
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
SET(PCRE_PARENS_NEST_LIMIT "250" CACHE STRING
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
SET(PCRE_MATCH_LIMIT "10000000" CACHE STRING
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
@ -164,9 +172,6 @@ SET(PCRE_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
SET(PCRE_SUPPORT_VALGRIND OFF CACHE BOOL
"Enable Valgrind support.")
SET(PCRE_SUPPORT_COVERAGE OFF CACHE BOOL
"Enable code coverage support using gcov.")
OPTION(PCRE_SHOW_REPORT "Show the final configuration report" ON)
OPTION(PCRE_BUILD_PCREGREP "Build pcregrep" ON)
OPTION(PCRE_BUILD_TESTS "Build the tests" ON)
@ -181,6 +186,12 @@ IF (MINGW)
OFF)
ENDIF(MINGW)
IF(MSVC)
OPTION(INSTALL_MSVC_PDB
"ON=Install .pdb files built by MSVC, if generated"
OFF)
ENDIF(MSVC)
# bzip2 lib
IF(BZIP2_FOUND)
OPTION (PCRE_SUPPORT_LIBBZ2 "Enable support for linking pcregrep with libbz2." ON)
@ -296,13 +307,6 @@ IF(PCRE_SUPPORT_VALGRIND)
SET(SUPPORT_VALGRIND 1)
ENDIF(PCRE_SUPPORT_VALGRIND)
IF(PCRE_SUPPORT_COVERAGE)
SET(SUPPORT_GCOV 1)
IF(NOT CMAKE_COMPILER_IS_GNUCC)
MESSAGE(FATAL_ERROR "Code coverage reports can only be generated when using GCC")
ENDIF(NOT CMAKE_COMPILER_IS_GNUCC)
ENDIF(PCRE_SUPPORT_COVERAGE)
# This next one used to contain
# SET(PCRETEST_LIBS ${READLINE_LIBRARY})
# but I was advised to add the NCURSES test as well, along with
@ -552,6 +556,17 @@ SET(PCREPOSIX_SOURCES
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
ENDIF(MINGW AND NOT PCRE_STATIC)
IF(MSVC AND NOT PCRE_STATIC)
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
SET(PCRE_SOURCES
${PCRE_SOURCES} pcre.rc)
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
SET(PCREPOSIX_SOURCES
${PCREPOSIX_SOURCES} pcreposix.rc)
ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
ENDIF(MSVC AND NOT PCRE_STATIC)
SET(PCRECPP_HEADERS
pcrecpp.h
pcre_scanner.h
@ -570,7 +585,7 @@ SET(PCRECPP_SOURCES
ADD_DEFINITIONS(-DHAVE_CONFIG_H)
IF(MSVC)
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE)
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS)
ENDIF(MSVC)
SET(CMAKE_INCLUDE_CURRENT_DIR 1)
@ -772,7 +787,7 @@ MESSAGE(\" \")
# This is a generated file.
srcdir=${PROJECT_SOURCE_DIR}
pcretest=${PCRETEST_EXE}
source ${PROJECT_SOURCE_DIR}/RunTest
. ${PROJECT_SOURCE_DIR}/RunTest
if test \"$?\" != \"0\"; then exit 1; fi
# End
")
@ -788,7 +803,7 @@ if test \"$?\" != \"0\"; then exit 1; fi
srcdir=${PROJECT_SOURCE_DIR}
pcregrep=${PCREGREP_EXE}
pcretest=${PCRETEST_EXE}
source ${PROJECT_SOURCE_DIR}/RunGrepTest
. ${PROJECT_SOURCE_DIR}/RunGrepTest
if test \"$?\" != \"0\"; then exit 1; fi
# End
")
@ -877,6 +892,17 @@ INSTALL(FILES ${man1} DESTINATION man/man1)
INSTALL(FILES ${man3} DESTINATION man/man3)
INSTALL(FILES ${html} DESTINATION share/doc/pcre/html)
IF(MSVC AND INSTALL_MSVC_PDB)
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre.pdb
${PROJECT_BINARY_DIR}/pcreposix.pdb
DESTINATION bin
CONFIGURATIONS RelWithDebInfo)
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcred.pdb
${PROJECT_BINARY_DIR}/pcreposixd.pdb
DESTINATION bin
CONFIGURATIONS Debug)
ENDIF(MSVC AND INSTALL_MSVC_PDB)
# help, only for nice output
IF(BUILD_SHARED_LIBS)
SET(BUILD_STATIC_LIBS OFF)
@ -917,6 +943,7 @@ IF(PCRE_SHOW_REPORT)
MESSAGE(STATUS " No stack recursion .............. : ${PCRE_NO_RECURSE}")
MESSAGE(STATUS " POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}")
MESSAGE(STATUS " Internal link size .............. : ${PCRE_LINK_SIZE}")
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE_PARENS_NEST_LIMIT}")
MESSAGE(STATUS " Match limit ..................... : ${PCRE_MATCH_LIMIT}")
MESSAGE(STATUS " Match limit recursion ........... : ${PCRE_MATCH_LIMIT_RECURSION}")
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
@ -953,6 +980,11 @@ IF(PCRE_SHOW_REPORT)
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")
ENDIF(MINGW AND NOT PCRE_STATIC)
IF(MSVC)
MESSAGE(STATUS " Install MSVC .pdb files ..........: ${INSTALL_MSVC_PDB}")
ENDIF(MSVC)
MESSAGE(STATUS "")
ENDIF(PCRE_SHOW_REPORT)

View File

@ -1,6 +1,501 @@
ChangeLog for PCRE
------------------
Version 8.35 04-April-2014
--------------------------
1. A new flag is set, when property checks are present in an XCLASS.
When this flag is not set, PCRE can perform certain optimizations
such as studying these XCLASS-es.
2. The auto-possessification of character sets were improved: a normal
and an extended character set can be compared now. Furthermore
the JIT compiler optimizes more character set checks.
3. Got rid of some compiler warnings for potentially uninitialized variables
that show up only when compiled with -O2.
4. A pattern such as (?=ab\K) that uses \K in an assertion can set the start
of a match later then the end of the match. The pcretest program was not
handling the case sensibly - it was outputting from the start to the next
binary zero. It now reports this situation in a message, and outputs the
text from the end to the start.
5. Fast forward search is improved in JIT. Instead of the first three
characters, any three characters with fixed position can be searched.
Search order: first, last, middle.
6. Improve character range checks in JIT. Characters are read by an inprecise
function now, which returns with an unknown value if the character code is
above a certain treshold (e.g: 256). The only limitation is that the value
must be bigger than the treshold as well. This function is useful, when
the characters above the treshold are handled in the same way.
7. The macros whose names start with RAWUCHAR are placeholders for a future
mode in which only the bottom 21 bits of 32-bit data items are used. To
make this more memorable for those maintaining the code, the names have
been changed to start with UCHAR21, and an extensive comment has been added
to their definition.
8. Add missing (new) files sljitNativeTILEGX.c and sljitNativeTILEGX-encoder.c
to the export list in Makefile.am (they were accidentally omitted from the
8.34 tarball).
9. The informational output from pcretest used the phrase "starting byte set"
which is inappropriate for the 16-bit and 32-bit libraries. As the output
for "first char" and "need char" really means "non-UTF-char", I've changed
"byte" to "char", and slightly reworded the output. The documentation about
these values has also been (I hope) clarified.
10. Another JIT related optimization: use table jumps for selecting the correct
backtracking path, when more than four alternatives are present inside a
bracket.
11. Empty match is not possible, when the minimum length is greater than zero,
and there is no \K in the pattern. JIT should avoid empty match checks in
such cases.
12. In a caseless character class with UCP support, when a character with more
than one alternative case was not the first character of a range, not all
the alternative cases were added to the class. For example, s and \x{17f}
are both alternative cases for S: the class [RST] was handled correctly,
but [R-T] was not.
13. The configure.ac file always checked for pthread support when JIT was
enabled. This is not used in Windows, so I have put this test inside a
check for the presence of windows.h (which was already tested for).
14. Improve pattern prefix search by a simplified Boyer-Moore algorithm in JIT.
The algorithm provides a way to skip certain starting offsets, and usually
faster than linear prefix searches.
15. Change 13 for 8.20 updated RunTest to check for the 'fr' locale as well
as for 'fr_FR' and 'french'. For some reason, however, it then used the
Windows-specific input and output files, which have 'french' screwed in.
So this could never have worked. One of the problems with locales is that
they aren't always the same. I have now updated RunTest so that it checks
the output of the locale test (test 3) against three different output
files, and it allows the test to pass if any one of them matches. With luck
this should make the test pass on some versions of Solaris where it was
failing. Because of the uncertainty, the script did not used to stop if
test 3 failed; it now does. If further versions of a French locale ever
come to light, they can now easily be added.
16. If --with-pcregrep-bufsize was given a non-integer value such as "50K",
there was a message during ./configure, but it did not stop. This now
provokes an error. The invalid example in README has been corrected.
If a value less than the minimum is given, the minimum value has always
been used, but now a warning is given.
17. If --enable-bsr-anycrlf was set, the special 16/32-bit test failed. This
was a bug in the test system, which is now fixed. Also, the list of various
configurations that are tested for each release did not have one with both
16/32 bits and --enable-bar-anycrlf. It now does.
18. pcretest was missing "-C bsr" for displaying the \R default setting.
19. Little endian PowerPC systems are supported now by the JIT compiler.
20. The fast forward newline mechanism could enter to an infinite loop on
certain invalid UTF-8 input. Although we don't support these cases
this issue can be fixed by a performance optimization.
21. Change 33 of 8.34 is not sufficient to ensure stack safety because it does
not take account if existing stack usage. There is now a new global
variable called pcre_stack_guard that can be set to point to an external
function to check stack availability. It is called at the start of
processing every parenthesized group.
22. A typo in the code meant that in ungreedy mode the max/min qualifier
behaved like a min-possessive qualifier, and, for example, /a{1,3}b/U did
not match "ab".
23. When UTF was disabled, the JIT program reported some incorrect compile
errors. These messages are silenced now.
24. Experimental support for ARM-64 and MIPS-64 has been added to the JIT
compiler.
25. Change all the temporary files used in RunGrepTest to be different to those
used by RunTest so that the tests can be run simultaneously, for example by
"make -j check".
Version 8.34 15-December-2013
-----------------------------
1. Add pcre[16|32]_jit_free_unused_memory to forcibly free unused JIT
executable memory. Patch inspired by Carsten Klein.
2. ./configure --enable-coverage defined SUPPORT_GCOV in config.h, although
this macro is never tested and has no effect, because the work to support
coverage involves only compiling and linking options and special targets in
the Makefile. The comment in config.h implied that defining the macro would
enable coverage support, which is totally false. There was also support for
setting this macro in the CMake files (my fault, I just copied it from
configure). SUPPORT_GCOV has now been removed.
3. Make a small performance improvement in strlen16() and strlen32() in
pcretest.
4. Change 36 for 8.33 left some unreachable statements in pcre_exec.c,
detected by the Solaris compiler (gcc doesn't seem to be able to diagnose
these cases). There was also one in pcretest.c.
5. Cleaned up a "may be uninitialized" compiler warning in pcre_exec.c.
6. In UTF mode, the code for checking whether a group could match an empty
string (which is used for indefinitely repeated groups to allow for
breaking an infinite loop) was broken when the group contained a repeated
negated single-character class with a character that occupied more than one
data item and had a minimum repetition of zero (for example, [^\x{100}]* in
UTF-8 mode). The effect was undefined: the group might or might not be
deemed as matching an empty string, or the program might have crashed.
7. The code for checking whether a group could match an empty string was not
recognizing that \h, \H, \v, \V, and \R must match a character.
8. Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
an empty string. If it can, pcretest shows this in its information output.
9. Fixed two related bugs that applied to Unicode extended grapheme clusters
that were repeated with a maximizing qualifier (e.g. \X* or \X{2,5}) when
matched by pcre_exec() without using JIT:
(a) If the rest of the pattern did not match after a maximal run of
grapheme clusters, the code for backing up to try with fewer of them
did not always back up over a full grapheme when characters that do not
have the modifier quality were involved, e.g. Hangul syllables.
(b) If the match point in a subject started with modifier character, and
there was no match, the code could incorrectly back up beyond the match
point, and potentially beyond the first character in the subject,
leading to a segfault or an incorrect match result.
10. A conditional group with an assertion condition could lead to PCRE
recording an incorrect first data item for a match if no other first data
item was recorded. For example, the pattern (?(?=ab)ab) recorded "a" as a
first data item, and therefore matched "ca" after "c" instead of at the
start.
11. Change 40 for 8.33 (allowing pcregrep to find empty strings) showed up a
bug that caused the command "echo a | ./pcregrep -M '|a'" to loop.
12. The source of pcregrep now includes z/OS-specific code so that it can be
compiled for z/OS as part of the special z/OS distribution.
13. Added the -T and -TM options to pcretest.
14. The code in pcre_compile.c for creating the table of named capturing groups
has been refactored. Instead of creating the table dynamically during the
actual compiling pass, the information is remembered during the pre-compile
pass (on the stack unless there are more than 20 named groups, in which
case malloc() is used) and the whole table is created before the actual
compile happens. This has simplified the code (it is now nearly 150 lines
shorter) and prepared the way for better handling of references to groups
with duplicate names.
15. A back reference to a named subpattern when there is more than one of the
same name now checks them in the order in which they appear in the pattern.
The first one that is set is used for the reference. Previously only the
first one was inspected. This change makes PCRE more compatible with Perl.
16. Unicode character properties were updated from Unicode 6.3.0.
17. The compile-time code for auto-possessification has been refactored, based
on a patch by Zoltan Herczeg. It now happens after instead of during
compilation. The code is cleaner, and more cases are handled. The option
PCRE_NO_AUTO_POSSESS is added for testing purposes, and the -O and /O
options in pcretest are provided to set it. It can also be set by
(*NO_AUTO_POSSESS) at the start of a pattern.
18. The character VT has been added to the default ("C" locale) set of
characters that match \s and are generally treated as white space,
following this same change in Perl 5.18. There is now no difference between
"Perl space" and "POSIX space". Whether VT is treated as white space in
other locales depends on the locale.
19. The code for checking named groups as conditions, either for being set or
for being recursed, has been refactored (this is related to 14 and 15
above). Processing unduplicated named groups should now be as fast at
numerical groups, and processing duplicated groups should be faster than
before.
20. Two patches to the CMake build system, by Alexander Barkov:
(1) Replace the "source" command by "." in CMakeLists.txt because
"source" is a bash-ism.
(2) Add missing HAVE_STDINT_H and HAVE_INTTYPES_H to config-cmake.h.in;
without these the CMake build does not work on Solaris.
21. Perl has changed its handling of \8 and \9. If there is no previously
encountered capturing group of those numbers, they are treated as the
literal characters 8 and 9 instead of a binary zero followed by the
literals. PCRE now does the same.
22. Following Perl, added \o{} to specify codepoints in octal, making it
possible to specify values greater than 0777 and also making them
unambiguous.
23. Perl now gives an error for missing closing braces after \x{... instead of
treating the string as literal. PCRE now does the same.
24. RunTest used to grumble if an inappropriate test was selected explicitly,
but just skip it when running all tests. This make it awkward to run ranges
of tests when one of them was inappropriate. Now it just skips any
inappropriate tests, as it always did when running all tests.
25. If PCRE_AUTO_CALLOUT and PCRE_UCP were set for a pattern that contained
character types such as \d or \w, too many callouts were inserted, and the
data that they returned was rubbish.
26. In UCP mode, \s was not matching two of the characters that Perl matches,
namely NEL (U+0085) and MONGOLIAN VOWEL SEPARATOR (U+180E), though they
were matched by \h. The code has now been refactored so that the lists of
the horizontal and vertical whitespace characters used for \h and \v (which
are defined only in one place) are now also used for \s.
27. Add JIT support for the 64 bit TileGX architecture.
Patch by Jiong Wang (Tilera Corporation).
28. Possessive quantifiers for classes (both explicit and automatically
generated) now use special opcodes instead of wrapping in ONCE brackets.
29. Whereas an item such as A{4}+ ignored the possessivenes of the quantifier
(because it's meaningless), this was not happening when PCRE_CASELESS was
set. Not wrong, but inefficient.
30. Updated perltest.pl to add /u (force Unicode mode) when /W (use Unicode
properties for \w, \d, etc) is present in a test regex. Otherwise if the
test contains no characters greater than 255, Perl doesn't realise it
should be using Unicode semantics.
31. Upgraded the handling of the POSIX classes [:graph:], [:print:], and
[:punct:] when PCRE_UCP is set so as to include the same characters as Perl
does in Unicode mode.
32. Added the "forbid" facility to pcretest so that putting tests into the
wrong test files can sometimes be quickly detected.
33. There is now a limit (default 250) on the depth of nesting of parentheses.
This limit is imposed to control the amount of system stack used at compile
time. It can be changed at build time by --with-parens-nest-limit=xxx or
the equivalent in CMake.
34. Character classes such as [A-\d] or [a-[:digit:]] now cause compile-time
errors. Perl warns for these when in warning mode, but PCRE has no facility
for giving warnings.
35. Change 34 for 8.13 allowed quantifiers on assertions, because Perl does.
However, this was not working for (?!) because it is optimized to (*FAIL),
for which PCRE does not allow quantifiers. The optimization is now disabled
when a quantifier follows (?!). I can't see any use for this, but it makes
things uniform.
36. Perl no longer allows group names to start with digits, so I have made this
change also in PCRE. It simplifies the code a bit.
37. In extended mode, Perl ignores spaces before a + that indicates a
possessive quantifier. PCRE allowed a space before the quantifier, but not
before the possessive +. It now does.
38. The use of \K (reset reported match start) within a repeated possessive
group such as (a\Kb)*+ was not working.
40. Document that the same character tables must be used at compile time and
run time, and that the facility to pass tables to pcre_exec() and
pcre_dfa_exec() is for use only with saved/restored patterns.
41. Applied Jeff Trawick's patch CMakeLists.txt, which "provides two new
features for Builds with MSVC:
1. Support pcre.rc and/or pcreposix.rc (as is already done for MinGW
builds). The .rc files can be used to set FileDescription and many other
attributes.
2. Add an option (-DINSTALL_MSVC_PDB) to enable installation of .pdb files.
This allows higher-level build scripts which want .pdb files to avoid
hard-coding the exact files needed."
42. Added support for [[:<:]] and [[:>:]] as used in the BSD POSIX library to
mean "start of word" and "end of word", respectively, as a transition aid.
43. A minimizing repeat of a class containing codepoints greater than 255 in
non-UTF 16-bit or 32-bit modes caused an internal error when PCRE was
compiled to use the heap for recursion.
44. Got rid of some compiler warnings for unused variables when UTF but not UCP
is configured.
Version 8.33 28-May-2013
------------------------
1. Added 'U' to some constants that are compared to unsigned integers, to
avoid compiler signed/unsigned warnings. Added (int) casts to unsigned
variables that are added to signed variables, to ensure the result is
signed and can be negated.
2. Applied patch by Daniel Richard G for quashing MSVC warnings to the
CMake config files.
3. Revise the creation of config.h.generic so that all boolean macros are
#undefined, whereas non-boolean macros are #ifndef/#endif-ed. This makes
overriding via -D on the command line possible.
4. Changing the definition of the variable "op" in pcre_exec.c from pcre_uchar
to unsigned int is reported to make a quite noticeable speed difference in
a specific Windows environment. Testing on Linux did also appear to show
some benefit (and it is clearly not harmful). Also fixed the definition of
Xop which should be unsigned.
5. Related to (4), changing the definition of the intermediate variable cc
in repeated character loops from pcre_uchar to pcre_uint32 also gave speed
improvements.
6. Fix forward search in JIT when link size is 3 or greater. Also removed some
unnecessary spaces.
7. Adjust autogen.sh and configure.ac to lose warnings given by automake 1.12
and later.
8. Fix two buffer over read issues in 16 and 32 bit modes. Affects JIT only.
9. Optimizing fast_forward_start_bits in JIT.
10. Adding support for callouts in JIT, and fixing some issues revealed
during this work. Namely:
(a) Unoptimized capturing brackets incorrectly reset on backtrack.
(b) Minimum length was not checked before the matching is started.
11. The value of capture_last that is passed to callouts was incorrect in some
cases when there was a capture on one path that was subsequently abandoned
after a backtrack. Also, the capture_last value is now reset after a
recursion, since all captures are also reset in this case.
12. The interpreter no longer returns the "too many substrings" error in the
case when an overflowing capture is in a branch that is subsequently
abandoned after a backtrack.
13. In the pathological case when an offset vector of size 2 is used, pcretest
now prints out the matched string after a yield of 0 or 1.
14. Inlining subpatterns in recursions, when certain conditions are fulfilled.
Only supported by the JIT compiler at the moment.
15. JIT compiler now supports 32 bit Macs thanks to Lawrence Velazquez.
16. Partial matches now set offsets[2] to the "bumpalong" value, that is, the
offset of the starting point of the matching process, provided the offsets
vector is large enough.
17. The \A escape now records a lookbehind value of 1, though its execution
does not actually inspect the previous character. This is to ensure that,
in partial multi-segment matching, at least one character from the old
segment is retained when a new segment is processed. Otherwise, if there
are no lookbehinds in the pattern, \A might match incorrectly at the start
of a new segment.
18. Added some #ifdef __VMS code into pcretest.c to help VMS implementations.
19. Redefined some pcre_uchar variables in pcre_exec.c as pcre_uint32; this
gives some modest performance improvement in 8-bit mode.
20. Added the PCRE-specific property \p{Xuc} for matching characters that can
be expressed in certain programming languages using Universal Character
Names.
21. Unicode validation has been updated in the light of Unicode Corrigendum #9,
which points out that "non characters" are not "characters that may not
appear in Unicode strings" but rather "characters that are reserved for
internal use and have only local meaning".
22. When a pattern was compiled with automatic callouts (PCRE_AUTO_CALLOUT) and
there was a conditional group that depended on an assertion, if the
assertion was false, the callout that immediately followed the alternation
in the condition was skipped when pcre_exec() was used for matching.
23. Allow an explicit callout to be inserted before an assertion that is the
condition for a conditional group, for compatibility with automatic
callouts, which always insert a callout at this point.
24. In 8.31, (*COMMIT) was confined to within a recursive subpattern. Perl also
confines (*SKIP) and (*PRUNE) in the same way, and this has now been done.
25. (*PRUNE) is now supported by the JIT compiler.
26. Fix infinite loop when /(?<=(*SKIP)ac)a/ is matched against aa.
27. Fix the case where there are two or more SKIPs with arguments that may be
ignored.
28. (*SKIP) is now supported by the JIT compiler.
29. (*THEN) is now supported by the JIT compiler.
30. Update RunTest with additional test selector options.
31. The way PCRE handles backtracking verbs has been changed in two ways.
(1) Previously, in something like (*COMMIT)(*SKIP), COMMIT would override
SKIP. Now, PCRE acts on whichever backtracking verb is reached first by
backtracking. In some cases this makes it more Perl-compatible, but Perl's
rather obscure rules do not always do the same thing.
(2) Previously, backtracking verbs were confined within assertions. This is
no longer the case for positive assertions, except for (*ACCEPT). Again,
this sometimes improves Perl compatibility, and sometimes does not.
32. A number of tests that were in test 2 because Perl did things differently
have been moved to test 1, because either Perl or PCRE has changed, and
these tests are now compatible.
32. Backtracking control verbs are now handled in the same way in JIT and
interpreter.
33. An opening parenthesis in a MARK/PRUNE/SKIP/THEN name in a pattern that
contained a forward subroutine reference caused a compile error.
34. Auto-detect and optimize limited repetitions in JIT.
35. Implement PCRE_NEVER_UTF to lock out the use of UTF, in particular,
blocking (*UTF) etc.
36. In the interpreter, maximizing pattern repetitions for characters and
character types now use tail recursion, which reduces stack usage.
37. The value of the max lookbehind was not correctly preserved if a compiled
and saved regex was reloaded on a host of different endianness.
38. Implemented (*LIMIT_MATCH) and (*LIMIT_RECURSION). As part of the extension
of the compiled pattern block, expand the flags field from 16 to 32 bits
because it was almost full.
39. Try madvise first before posix_madvise.
40. Change 7 for PCRE 7.9 made it impossible for pcregrep to find empty lines
with a pattern such as ^$. It has taken 4 years for anybody to notice! The
original change locked out all matches of empty strings. This has been
changed so that one match of an empty string per line is recognized.
Subsequent searches on the same line (for colouring or for --only-matching,
for example) do not recognize empty strings.
41. Applied a user patch to fix a number of spelling mistakes in comments.
42. Data lines longer than 65536 caused pcretest to crash.
43. Clarified the data type for length and startoffset arguments for pcre_exec
and pcre_dfa_exec in the function-specific man pages, where they were
explicitly stated to be in bytes, never having been updated. I also added
some clarification to the pcreapi man page.
44. A call to pcre_dfa_exec() with an output vector size less than 2 caused
a segmentation fault.
Version 8.32 30-November-2012
-----------------------------
@ -1508,7 +2003,8 @@ Version 7.9 11-Apr-09
7. A pattern that could match an empty string could cause pcregrep to loop; it
doesn't make sense to accept an empty string match in pcregrep, so I have
locked it out (using PCRE's PCRE_NOTEMPTY option). By experiment, this
seems to be how GNU grep behaves.
seems to be how GNU grep behaves. [But see later change 40 for release
8.33.]
8. The pattern (?(?=.*b)b|^) was incorrectly compiled as "match must be at
start or after a newline", because the conditional assertion was not being
@ -1751,7 +2247,7 @@ Version 7.7 07-May-08
containing () gave an internal compiling error instead of "reference to
non-existent subpattern". Fortunately, when the pattern did exist, the
compiled code was correct. (When scanning forwards to check for the
existencd of the subpattern, it was treating the data ']' as terminating
existence of the subpattern, it was treating the data ']' as terminating
the class, so got the count wrong. When actually compiling, the reference
was subsequently set up correctly.)

View File

@ -29,9 +29,9 @@ while (scalar(@ARGV) > 0)
^\.TH\s\S|
^\.SH\s\S|
^\.SS\s\S|
^\.TP(?:\s\d+)?\s*$|
^\.ti\s\S|
^\.TP(?:\s?\d+)?\s*$|
^\.SM\s*$|
^\.br\s*$|
^\.rs\s*$|
^\.sp\s*$|
^\.nf\s*$|

View File

@ -54,12 +54,12 @@ Support for 16-bit and 32-bit data strings
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
in any combination of 8-bit, 16-bit or 32-bit modes, creating different
libraries. In the description that follows, the word "short" is
used for a 16-bit data quantity, and the word "unit" is used for a quantity
that is a byte in 8-bit mode, a short in 16-bit mode and a 32-bit unsigned
integer in 32-bit mode. However, so as not to over-complicate the text, the
names of PCRE functions are given in 8-bit form only.
in any combination of 8-bit, 16-bit or 32-bit modes, creating up to three
different libraries. In the description that follows, the word "short" is used
for a 16-bit data quantity, and the word "unit" is used for a quantity that is
a byte in 8-bit mode, a short in 16-bit mode and a 32-bit word in 32-bit mode.
However, so as not to over-complicate the text, the names of PCRE functions are
given in 8-bit form only.
Computing the memory requirement: how it was
@ -94,6 +94,11 @@ runs more slowly than before (30% or more, depending on the pattern) because it
is doing a full analysis of the pattern. My hope was that this would not be a
big issue, and in the event, nobody has commented on it.
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
(default 250, settable at build time) so as to put a limit on the amount of
system stack used by pcre_compile(). This is a safety feature for environments
with small stacks where the patterns are provided by users.
Traditional matching function
-----------------------------
@ -120,29 +125,30 @@ facilities are available, and those that are do not always work in quite the
same way. See the user documentation for details.
The algorithm that is used for pcre_dfa_exec() is not a traditional FSM,
because it may have a number of states active at one time. More work would be
needed at compile time to produce a traditional FSM where only one state is
ever active at once. I believe some other regex matchers work this way.
because it may have a number of states active at one time. More work would be
needed at compile time to produce a traditional FSM where only one state is
ever active at once. I believe some other regex matchers work this way. JIT
support is not available for this kind of matching.
Changeable options
------------------
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL) may
change in the middle of patterns. From PCRE 8.13, their processing is handled
entirely at compile time by generating different opcodes for the different
settings. The runtime functions do not need to keep track of an options state
any more.
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and some
others) may change in the middle of patterns. From PCRE 8.13, their processing
is handled entirely at compile time by generating different opcodes for the
different settings. The runtime functions do not need to keep track of an
options state any more.
Format of compiled patterns
---------------------------
The compiled form of a pattern is a vector of units (bytes in 8-bit mode, or
shorts in 16-bit mode, 32-bit unsigned integers in 32-bit mode), containing
items of variable length. The first unit in an item contains an opcode, and
the length of the item is either implicit in the opcode or contained in the
data that follows it.
The compiled form of a pattern is a vector of unsigned units (bytes in 8-bit
mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing items of
variable length. The first unit in an item contains an opcode, and the length
of the item is either implicit in the opcode or contained in the data that
follows it.
In many cases listed below, LINK_SIZE data values are specified for offsets
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
@ -151,8 +157,10 @@ default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or
LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
larger than 2 is necessary only when patterns whose compiled length is greater
than 64K are going to be processed. In this description, we assume the "normal"
compilation options. Data values that are counts (e.g. for quantifiers) are
always just two bytes long (one short in 16-bit mode).
compilation options. Data values that are counts (e.g. quantifiers) are two
bytes long in 8-bit mode (most significant byte first), or one unit in 16-bit
and 32-bit modes.
Opcodes with no following data
------------------------------
@ -162,7 +170,7 @@ These items are all just one unit long
OP_END end of pattern
OP_ANY match any one character other than newline
OP_ALLANY match any one character, including newline
OP_ANYBYTE match any single byte, even in UTF-8 mode
OP_ANYBYTE match any single unit, even in UTF-8/16 mode
OP_SOD match start of data: \A
OP_SOM, start of match (subject + offset): \G
OP_SET_SOM, set start of match (\K)
@ -180,28 +188,33 @@ These items are all just one unit long
OP_VSPACE \v
OP_NOT_WORDCHAR \W
OP_WORDCHAR \w
OP_EODN match end of data or \n at end: \Z
OP_EODN match end of data or newline at end: \Z
OP_EOD match end of data: \z
OP_DOLL $ (end of data, or before final newline)
OP_DOLLM $ multiline mode (end of data or before newline)
OP_EXTUNI match an extended Unicode character
OP_EXTUNI match an extended Unicode grapheme cluster
OP_ANYNL match any Unicode newline sequence
OP_ASSERT_ACCEPT )
OP_ACCEPT ) These are Perl 5.10's "backtracking control
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
OP_FAIL ) parentheses, it may be preceded by one or more
OP_PRUNE ) OP_CLOSE, followed by a 2-byte number,
OP_SKIP ) indicating which parentheses must be closed.
OP_PRUNE ) OP_CLOSE, each followed by a count that
OP_SKIP ) indicates which parentheses must be closed.
OP_THEN )
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
This ends the assertion, not the entire pattern match.
Backtracking control verbs with (optional) data
-----------------------------------------------
Backtracking control verbs with optional data
---------------------------------------------
(*THEN) without an argument generates the opcode OP_THEN and no following data.
OP_MARK is followed by the mark name, preceded by a one-unit length, and
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
following in the same format.
following in the same format as OP_MARK.
Matching literal characters
@ -212,6 +225,10 @@ casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
the character may be more than one unit long. In UTF-32 mode, characters
are always exactly one unit long.
If there is only one character in a character class, OP_CHAR or OP_CHARI is
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
for something like [^a]).
Repeating single characters
---------------------------
@ -232,10 +249,9 @@ following opcodes, which come in caseful and caseless versions:
Each opcode is followed by the character that is to be repeated. In ASCII mode,
these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
UTF-32 mode these are one-unit items.
Those with "MIN" in their names are the minimizing versions. Those with "POS"
in their names are possessive versions. Other repeats make use of these
opcodes:
UTF-32 mode these are one-unit items. Those with "MIN" in their names are the
minimizing versions. Those with "POS" in their names are possessive versions.
Other repeats make use of these opcodes:
Caseful Caseless
OP_UPTO OP_UPTOI
@ -243,10 +259,15 @@ opcodes:
OP_POSUPTO OP_POSUPTOI
OP_EXACT OP_EXACTI
Each of these is followed by a two-byte (one short) count (most significant
byte first in 8-bit mode) and then the repeated character. OP_UPTO matches from
0 to the given number. A repeat with a non-zero minimum and a fixed maximum is
coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or OPT_POSUPTO).
Each of these is followed by a count and then the repeated character. OP_UPTO
matches from 0 to the given number. A repeat with a non-zero minimum and a
fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or
OPT_POSUPTO).
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
etc.) are used for repeated, negated, single-character classes such as [^a]*.
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
positive single-character classes.
Repeating character types
@ -277,7 +298,10 @@ Match by Unicode property
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
character by testing its Unicode property (the \p and \P escape sequences).
Each is followed by two units that encode the desired property as a type and a
value.
value. The types are a set of #defines of the form PT_xxx, and the values are
enumerations of the form ucp_xx, defined in the ucp.h source file. The value is
relevant only for PT_GC (General Category), PT_PC (Particular Category), and
PT_SC (Script).
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
three units: OP_PROP or OP_NOTPROP, and then the desired property type and
@ -287,67 +311,88 @@ value.
Character classes
-----------------
If there is only one character in the class, OP_CHAR or OP_CHARI is used for a
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
something like [^a]).
Another set of 13 repeating opcodes (called OP_NOTSTAR etc.) are used for
repeated, negated, single-character classes. The normal single-character
opcodes (OP_STAR, etc.) are used for repeated positive single-character
classes.
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
negated, single-character classes. The normal single-character opcodes
(OP_STAR, etc.) are used for repeated positive single-character classes.
When there is more than one character in a class and all the characters are
When there is more than one character in a class, and all the code points are
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
negative one. In either case, the opcode is followed by a 32-byte (16-short)
bit map containing a 1 bit for every character that is acceptable. The bits are
counted from the least significant end of each unit. In caseless mode, bits for
both cases are set.
negative one. In either case, the opcode is followed by a 32-byte (16-short,
8-word) bit map containing a 1 bit for every character that is acceptable. The
bits are counted from the least significant end of each unit. In caseless mode,
bits for both cases are set.
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32 mode,
subject characters with values greater than 255 can be handled correctly. For
OP_CLASS they do not match, whereas for OP_NCLASS they do.
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32
mode, subject characters with values greater than 255 can be handled correctly.
For OP_CLASS they do not match, whereas for OP_NCLASS they do.
For classes containing characters with values greater than 255, OP_XCLASS is
used. It optionally uses a bit map (if any characters lie within it), followed
by a list of pairs (for a range) and single characters. In caseless mode, both
cases are explicitly listed. There is a flag character than indicates whether
it is a positive or a negative class.
For classes containing characters with values greater than 255 or that contain
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any code points
are less than 256, followed by a list of pairs (for a range) and single
characters. In caseless mode, both cases are explicitly listed.
OP_XCLASS is followed by a unit containing flag bits: XCL_NOT indicates that
this is a negative class, and XCL_MAP indicates that a bit map is present.
There follows the bit map, if XCL_MAP is set, and then a sequence of items
coded as follows:
XCL_END marks the end of the list
XCL_SINGLE one character follows
XCL_RANGE two characters follow
XCL_PROP a Unicode property (type, value) follows
XCL_NOTPROP a Unicode property (type, value) follows
If a range starts with a code point less than 256 and ends with one greater
than 256, an XCL_RANGE item is used, without setting any bits in the bit map.
This means that if no other items in the class set bits in the map, a map is
not needed.
Back references
---------------
OP_REF (caseful) or OP_REFI (caseless) is followed by two bytes (one short)
containing the reference number.
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
reference number if the reference is to a unique capturing group (either by
number or by name). When named groups are used, there may be more than one
group with the same name. In this case, a reference by name generates OP_DNREF
or OP_DNREFI. These are followed by two counts: the index (not the byte offset)
in the group name table of the first entry for the requred name, followed by
the number of groups with the same name.
Repeating character classes and back references
-----------------------------------------------
Single-character classes are handled specially (see above). This section
applies to OP_CLASS and OP_REF[I]. In both cases, the repeat information
follows the base item. The matching code looks at the following opcode to see
if it is one of
applies to other classes and also to back references. In both cases, the repeat
information follows the base item. The matching code looks at the following
opcode to see if it is one of
OP_CRSTAR
OP_CRMINSTAR
OP_CRPOSSTAR
OP_CRPLUS
OP_CRMINPLUS
OP_CRPOSPLUS
OP_CRQUERY
OP_CRMINQUERY
OP_CRPOSQUERY
OP_CRRANGE
OP_CRMINRANGE
OP_CRPOSRANGE
All but the last two are just single-unit items. The others are followed by
four bytes (two shorts) of data, comprising the minimum and maximum repeat
counts. There are no special possessive opcodes for these repeats; a possessive
repeat is compiled into an atomic group.
All but the last three are single-unit items, with no data. The others are
followed by the minimum and maximum repeat counts.
Brackets and alternation
------------------------
A pair of non-capturing (round) brackets is wrapped round each expression at
A pair of non-capturing round brackets is wrapped round each expression at
compile time, so alternation always happens in the context of brackets.
[Note for North Americans: "bracket" to some English speakers, including
@ -364,20 +409,20 @@ A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
next alternative OP_ALT or, if there aren't any branches, to the matching
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
number immediately follows the offset, always as a 2-byte (one short) item.
number is a count that immediately follows the offset.
OP_KET is used for subpatterns that do not repeat indefinitely, and
OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or
maximally respectively (see below for possessive repetitions). All three are
followed by LINK_SIZE bytes giving (as a positive number) the offset back to
the matching bracket opcode.
OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN
and OP_KETRMAX are used for indefinite repetitions, minimally or maximally
respectively (see below for possessive repetitions). All three are followed by
LINK_SIZE bytes giving (as a positive number) the offset back to the matching
bracket opcode.
If a subpattern is quantified such that it is permitted to match zero times, it
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
single-unit opcodes that tell the matcher that skipping the following
subpattern entirely is a valid branch. In the case of the first two, not
skipping the pattern is also valid (greedy and non-greedy). The third is used
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
because it may be called as a subroutine from elsewhere in the regex.
A subpattern with an indefinite maximum repetition is replicated in the
@ -397,6 +442,7 @@ final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
that it needs to check for matching an empty string when it hits OP_KETRMIN or
OP_KETRMAX, and if so, to break the loop.
Possessive brackets
-------------------
@ -407,26 +453,34 @@ of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
repetition is zero, the group is preceded by OP_BRAPOSZERO.
Once-only (atomic) groups
-------------------------
These are just like other subpatterns, but they start with the opcode
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
within the atomic group; the latter when there are. The distinction is needed
for when there is a backtrack to before the group - any captures within the
group must be reset, so it is necessary to retain backtracking points inside
the group even after it is complete in order to do this. When there are no
captures in an atomic group, all the backtracking can be discarded when it is
complete. This is more efficient, and also uses less stack.
The check for matching an empty string in an unbounded repeat is handled
entirely at runtime, so there are just these two opcodes for atomic groups.
Assertions
----------
Forward assertions are just like other subpatterns, but starting with one of
the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
Forward assertions are also just like other subpatterns, but starting with one
of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
is OP_REVERSE, followed by a two byte (one short) count of the number of
characters to move back the pointer in the subject string. In ASCII mode, the
count is a number of units, but in UTF-8/16 mode each character may occupy more
than one unit; in UTF-32 mode each character occupies exactly one unit.
A separate count is present in each alternative of a lookbehind
assertion, allowing them to have different fixed lengths.
Once-only (atomic) subpatterns
------------------------------
These are also just like other subpatterns, but they start with the opcode
OP_ONCE. The check for matching an empty string in an unbounded repeat is
handled entirely at runtime, so there is just this one opcode.
is OP_REVERSE, followed by a count of the number of characters to move back the
pointer in the subject string. In ASCII mode, the count is a number of units,
but in UTF-8/16 mode each character may occupy more than one unit; in UTF-32
mode each character occupies exactly one unit. A separate count is present in
each alternative of a lookbehind assertion, allowing them to have different
fixed lengths.
Conditional subpatterns
@ -435,28 +489,29 @@ Conditional subpatterns
These are like other subpatterns, but they start with the opcode OP_COND, or
OP_SCOND for one that might match an empty string in an unbounded repeat. If
the condition is a back reference, this is stored at the start of the
subpattern using the opcode OP_CREF followed by two bytes (one short)
containing the reference number. OP_NCREF is used instead if the reference was
generated by name (so that the runtime code knows to check for duplicate
names).
subpattern using the opcode OP_CREF followed by a count containing the
reference number, provided that the reference is to a unique capturing group.
If the reference was by name and there is more than one group with that name,
OP_DNCREF is used instead. It is followed by two counts: the index in the group
names table, and the number of groups with the same name.
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
subpattern using the opcode OP_RREF or OP_NRREF (cf OP_NCREF), and a value of
zero for "the whole pattern". For a DEFINE condition, just the single unit
OP_DEF is used (it has no associated data). Otherwise, a conditional subpattern
always starts with one of the assertions.
subpattern using the opcode OP_RREF (with a value of zero for "the whole
pattern") or OP_DNRREF (with data as for OP_DNCREF). For a DEFINE condition,
just the single unit OP_DEF is used (it has no associated data). Otherwise, a
conditional subpattern always starts with one of the assertions.
Recursion
---------
Recursion either matches the current regex, or some subexpression. The opcode
OP_RECURSE is followed by an value which is the offset to the starting bracket
from the start of the whole pattern. From release 6.5, OP_RECURSE is
automatically wrapped inside OP_ONCE brackets (because otherwise some patterns
broke it). OP_RECURSE is also used for "subroutine" calls, even though they
are not strictly a recursion.
OP_RECURSE is followed by aLINK_SIZE value that is the offset to the starting
bracket from the start of the whole pattern. From release 6.5, OP_RECURSE is
automatically wrapped inside OP_ONCE brackets, because otherwise some patterns
broke it. OP_RECURSE is also used for "subroutine" calls, even though they are
not strictly a recursion.
Callout
@ -464,10 +519,10 @@ Callout
OP_CALLOUT is followed by one unit of data that holds a callout number in the
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
cases there follows a two-byte (one short) value giving the offset in the
pattern to the start of the following item, and another two-byte (one short)
item giving the length of the next item.
cases there follows a count giving the offset in the pattern string to the
start of the following item, and another count giving the length of this item.
These values make is possible for pcretest to output useful tracing information
using automatic callouts.
Philip Hazel
February 2012
November 2013

View File

@ -1,7 +1,7 @@
Installation Instructions
*************************
Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation,
Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation,
Inc.
Copying and distribution of this file, with or without modification,
@ -12,8 +12,8 @@ without warranty of any kind.
Basic Installation
==================
Briefly, the shell commands `./configure; make; make install' should
configure, build, and install this package. The following
Briefly, the shell command `./configure && make && make install'
should configure, build, and install this package. The following
more-detailed instructions are generic; see the `README' file for
instructions specific to this package. Some packages provide this
`INSTALL' file but do not implement all of the features documented
@ -309,9 +309,10 @@ causes the specified `gcc' to be used as the C compiler (unless it is
overridden in the site shell script).
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
an Autoconf bug. Until the bug is fixed you can use this workaround:
an Autoconf limitation. Until the limitation is lifted, you can use
this workaround:
CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
`configure' Invocation
======================
@ -367,4 +368,3 @@ operates.
`configure' also accepts some other, not widely useful, options. Run
`configure --help' for more details.

View File

@ -24,7 +24,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2014 University of Cambridge
All rights reserved.
@ -35,7 +35,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2012 Zoltan Herczeg
Copyright(c) 2010-2014 Zoltan Herczeg
All rights reserved.
@ -46,7 +46,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2012 Zoltan Herczeg
Copyright(c) 2009-2014 Zoltan Herczeg
All rights reserved.

View File

@ -14,11 +14,16 @@ dist_doc_DATA = \
NEWS \
README
# Note that pcrecpp.html is not in this list; it is listed separately below.
dist_html_DATA = \
doc/html/NON-AUTOTOOLS-BUILD.txt \
doc/html/README.txt \
doc/html/index.html \
doc/html/pcre-config.html \
doc/html/pcre.html \
doc/html/pcre16.html \
doc/html/pcre-config.html \
doc/html/pcre32.html \
doc/html/pcre_assign_jit_stack.html \
doc/html/pcre_compile.html \
doc/html/pcre_compile2.html \
@ -44,6 +49,7 @@ dist_html_DATA = \
doc/html/pcre_refcount.html \
doc/html/pcre_study.html \
doc/html/pcre_utf16_to_host_byte_order.html \
doc/html/pcre_utf32_to_host_byte_order.html \
doc/html/pcre_version.html \
doc/html/pcreapi.html \
doc/html/pcrebuild.html \
@ -65,10 +71,6 @@ dist_html_DATA = \
doc/html/pcretest.html \
doc/html/pcreunicode.html
# doc/html/pcre32.html \
# doc/html/pcre_utf32_to_host_byte_order.html \
#
pcrecpp_html = doc/html/pcrecpp.html
dist_noinst_DATA = $(pcrecpp_html)
@ -140,14 +142,16 @@ pcre.h.generic: pcre.h.in configure.ac
cp -p pcre.h $@
# It is more complicated for config.h.generic. We need the version that results
# from a default configuration. We can get this by doing a configure in a
# temporary directory. However, some trickery is needed,
# because the source directory may already be configured. If you
# just try running configure in a new directory, it complains. For this reason,
# we move config.status out of the way while doing the default configuration.
# The resulting config.h is munged by perl to put #ifdefs round any #defines
# and to get rid of any gcc-specific visibility settings. Make sure that
# PCRE_EXP_DEFN is unset (in case it has visibility settings).
# from a default configuration so as to get all the default values for PCRE
# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by
# doing a configure in a temporary directory. However, some trickery is needed,
# because the source directory may already be configured. If you just try
# running configure in a new directory, it complains. For this reason, we move
# config.status out of the way while doing the default configuration. The
# resulting config.h is munged by perl to put #ifdefs round any #defines for
# macros with values, and to #undef all boolean macros such as HAVE_xxx and
# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. Make
# sure that PCRE_EXP_DEFN is unset (in case it has visibility settings).
config.h.generic: configure.ac
rm -rf $@ _generic
mkdir _generic
@ -160,8 +164,10 @@ config.h.generic: configure.ac
-e 'if(/PCRE_EXP_DEFN/){print"/* #undef PCRE_EXP_DEFN */\n";$$blank=0;next;}' \
-e 'if(/to make a symbol visible/){next;}' \
-e 'if(/__attribute__ \(\(visibility/){next;}' \
-e 'if(/^#define\s(?!PACKAGE)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;}' \
-e 'else {if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}}' \
-e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \
-e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \
-e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \
-e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \
_generic/config.h >$@
rm -rf _generic
@ -344,15 +350,19 @@ EXTRA_DIST += \
sljit/sljitExecAllocator.c \
sljit/sljitLir.c \
sljit/sljitLir.h \
sljit/sljitNativeARM_Thumb2.c \
sljit/sljitNativeARM_v5.c \
sljit/sljitNativeARM_32.c \
sljit/sljitNativeARM_64.c \
sljit/sljitNativeARM_T2_32.c \
sljit/sljitNativeMIPS_32.c \
sljit/sljitNativeMIPS_64.c \
sljit/sljitNativeMIPS_common.c \
sljit/sljitNativePPC_32.c \
sljit/sljitNativePPC_64.c \
sljit/sljitNativePPC_common.c \
sljit/sljitNativeSPARC_32.c \
sljit/sljitNativeSPARC_common.c \
sljit/sljitNativeTILEGX_64.c \
sljit/sljitNativeTILEGX-encoder.c \
sljit/sljitNativeX86_32.c \
sljit/sljitNativeX86_64.c \
sljit/sljitNativeX86_common.c \
@ -572,6 +582,8 @@ EXTRA_DIST += \
testdata/testoutput1 \
testdata/testoutput2 \
testdata/testoutput3 \
testdata/testoutput3A \
testdata/testoutput3B \
testdata/testoutput4 \
testdata/testoutput5 \
testdata/testoutput6 \
@ -610,8 +622,10 @@ CLEANFILES += \
teststderr \
testtemp* \
testtry \
testNinput
testNinput \
testtrygrep \
teststderrgrep \
testNinputgrep
# PCRE demonstration program. No longer built automatcally. The point is that
# the users should build it themselves. So just distribute the source.
@ -659,11 +673,13 @@ if WITH_PCRE_CPP
pkgconfig_DATA += libpcrecpp.pc
endif
# Note that pcrecpp.3 is not in this list, but is included separately below.
dist_man_MANS = \
doc/pcre-config.1 \
doc/pcre.3 \
doc/pcre16.3 \
doc/pcre32.3 \
doc/pcre-config.1 \
doc/pcre_assign_jit_stack.3 \
doc/pcre_compile.3 \
doc/pcre_compile2.3 \
@ -695,6 +711,7 @@ dist_man_MANS = \
doc/pcrebuild.3 \
doc/pcrecallout.3 \
doc/pcrecompat.3 \
doc/pcredemo.3 \
doc/pcregrep.1 \
doc/pcrejit.3 \
doc/pcrelimits.3 \

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,88 @@
News about PCRE releases
------------------------
Release 8.35 04-April-2014
--------------------------
There have been performance improvements for classes containing non-ASCII
characters and the "auto-possessification" feature has been extended. Other
minor improvements have been implemented and bugs fixed. There is a new callout
feature to enable applications to do detailed stack checks at compile time, to
avoid running out of stack for deeply nested parentheses. The JIT compiler has
been extended with experimental support for ARM-64, MIPS-64, and PPC-LE.
Release 8.34 15-December-2013
-----------------------------
As well as fixing the inevitable bugs, performance has been improved by
refactoring and extending the amount of "auto-possessification" that PCRE does.
Other notable changes:
. Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
an empty string. If it can, pcretest shows this in its information output.
. A back reference to a named subpattern when there is more than one of the
same name now checks them in the order in which they appear in the pattern.
The first one that is set is used for the reference. Previously only the
first one was inspected. This change makes PCRE more compatible with Perl.
. Unicode character properties were updated from Unicode 6.3.0.
. The character VT has been added to the set of characters that match \s and
are generally treated as white space, following this same change in Perl
5.18. There is now no difference between "Perl space" and "POSIX space".
. Perl has changed its handling of \8 and \9. If there is no previously
encountered capturing group of those numbers, they are treated as the
literal characters 8 and 9 instead of a binary zero followed by the
literals. PCRE now does the same.
. Following Perl, added \o{} to specify codepoints in octal, making it
possible to specify values greater than 0777 and also making them
unambiguous.
. In UCP mode, \s was not matching two of the characters that Perl matches,
namely NEL (U+0085) and MONGOLIAN VOWEL SEPARATOR (U+180E), though they
were matched by \h.
. Add JIT support for the 64 bit TileGX architecture.
. Upgraded the handling of the POSIX classes [:graph:], [:print:], and
[:punct:] when PCRE_UCP is set so as to include the same characters as Perl
does in Unicode mode.
. Perl no longer allows group names to start with digits, so I have made this
change also in PCRE.
. Added support for [[:<:]] and [[:>:]] as used in the BSD POSIX library to
mean "start of word" and "end of word", respectively, as a transition aid.
Release 8.33 28-May-2013
--------------------------
A number of bugs are fixed, and some performance improvements have been made.
There are also some new features, of which these are the most important:
. The behaviour of the backtracking verbs has been rationalized and
documented in more detail.
. JIT now supports callouts and all of the backtracking verbs.
. Unicode validation has been updated in the light of Unicode Corrigendum #9,
which points out that "non characters" are not "characters that may not
appear in Unicode strings" but rather "characters that are reserved for
internal use and have only local meaning".
. (*LIMIT_MATCH=d) and (*LIMIT_RECURSION=d) have been added so that the
creator of a pattern can specify lower (but not higher) limits for the
matching process.
. The PCRE_NEVER_UTF option is available to prevent pattern-writers from using
the (*UTF) feature, as this could be a security issue.
Release 8.32 30-November-2012
-----------------------------
@ -591,7 +673,7 @@ some of the new functionality in Perl 5.005.
Another (I hope this is the last!) change has been made to the API for the
pcre_compile() function. An additional argument has been added to make it
possible to pass over a pointer to character tables built in the current
locale by pcre_maketables(). To use the default tables, this new arguement
locale by pcre_maketables(). To use the default tables, this new argument
should be passed as NULL.
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05

View File

@ -9,11 +9,14 @@ This document contains the following sections:
Building for virtual Pascal
Stack size in Windows environments
Linking programs in Windows environments
Calling conventions in Windows environments
Comments about Win32 builds
Building PCRE on Windows with CMake
Use of relative paths with CMake on Windows
Testing with RunTest.bat
Building under Windows CE with Visual Studio 200x
Building under Windows with BCC5.5
Building using Borland C++ Builder 2007 (CB2007) and higher
Building PCRE on OpenVMS
Building PCRE on Stratus OpenVOS
Building PCRE on native z/OS and z/VM
@ -168,8 +171,8 @@ can skip ahead to the CMake section.
pcre16_version.c
pcre16_xclass.c
(7') If you want to build a 16-bit library (as well as, or instead of the 8-bit
or 32-bit libraries) repeat steps 5-6 with the following files:
(8) If you want to build a 32-bit library (as well as, or instead of the 8-bit
or 16-bit libraries) repeat steps 5-6 with the following files:
pcre32_byte_order.c
pcre32_chartables.c
@ -194,30 +197,31 @@ can skip ahead to the CMake section.
pcre32_version.c
pcre32_xclass.c
(8) If you want to build the POSIX wrapper functions (which apply only to the
(9) If you want to build the POSIX wrapper functions (which apply only to the
8-bit library), ensure that you have the pcreposix.h file and then compile
pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result
(on its own) as the pcreposix library.
(9) The pcretest program can be linked with any combination of the 8-bit, 16-bit
and 32-bit libraries (depending on what you selected in config.h). Compile
pcretest.c and pcre_printint.c (again, don't forget -DHAVE_CONFIG_H) and
link them together with the appropriate library/ies. If you compiled an
8-bit library, pcretest also needs the pcreposix wrapper library unless
you compiled it with -DNOPOSIX.
(10) The pcretest program can be linked with any combination of the 8-bit,
16-bit and 32-bit libraries (depending on what you selected in config.h).
Compile pcretest.c and pcre_printint.c (again, don't forget
-DHAVE_CONFIG_H) and link them together with the appropriate library/ies.
If you compiled an 8-bit library, pcretest also needs the pcreposix
wrapper library unless you compiled it with -DNOPOSIX.
(10) Run pcretest on the testinput files in the testdata directory, and check
(11) Run pcretest on the testinput files in the testdata directory, and check
that the output matches the corresponding testoutput files. There are
comments about what each test does in the section entitled "Testing PCRE"
in the README file. If you compiled more than one of the 8-bit, 16-bit and
32-bit libraries, you need to run pcretest with the -16 option to do 16-bit
tests and with the -32 option to do 32-bit tests.
32-bit libraries, you need to run pcretest with the -16 option to do
16-bit tests and with the -32 option to do 32-bit tests.
Some tests are relevant only when certain build-time options are selected.
For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run if
you have built PCRE without it. See the comments at the start of each
For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run
if you have built PCRE without it. See the comments at the start of each
testinput file. If you have a suitable Unix-like shell, the RunTest script
will run the appropriate tests for you.
will run the appropriate tests for you. The command "RunTest list" will
output a list of all the tests.
Note that the supplied files are in Unix format, with just LF characters
as line terminators. You may need to edit them to change this if your
@ -227,11 +231,11 @@ can skip ahead to the CMake section.
locale to "french" rather than "fr_FR", and there some minor output
differences.
(11) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested
(12) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested
by the testdata files. However, you might also like to build and run
the JIT test program, pcre_jit_test.c.
the freestanding JIT test program, pcre_jit_test.c.
(12) If you want to use the pcregrep command, compile and link pcregrep.c; it
(13) If you want to use the pcregrep command, compile and link pcregrep.c; it
uses only the basic 8-bit PCRE library (it does not need the pcreposix
library).
@ -428,16 +432,13 @@ CMake build process. In the CMake GUI, the cache can be deleted by selecting
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
A PCRE user comments as follows:
A PCRE user comments as follows: I thought that others may want to know the
current state of CMAKE_USE_RELATIVE_PATHS support on Windows. Here it is:
I thought that others may want to know the current state of
CMAKE_USE_RELATIVE_PATHS support on Windows.
Here it is:
-- AdditionalIncludeDirectories is only partially modified (only the
first path - see below)
first path - see below)
-- Only some of the contained file paths are modified - shown below for
pcre.vcproj
pcre.vcproj
-- It properly modifies
I am sure CMake people can fix that if they want to. Until then one will
@ -449,9 +450,9 @@ deal.
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
RelativePath="pcre.h">
RelativePath="pcre_chartables.c">
RelativePath="pcre_chartables.c.rule">
RelativePath="pcre.h"
RelativePath="pcre_chartables.c"
RelativePath="pcre_chartables.c.rule"
TESTING WITH RUNTEST.BAT
@ -489,20 +490,6 @@ To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
pcre_scanner_unittest.exe.
BUILDING UNDER WINDOWS WITH BCC5.5
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
Some of the core BCC libraries have a version of PCRE from 1998 built in,
which can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a
version mismatch. I'm including an easy workaround below, if you'd like to
include it in the non-unix instructions:
When linking a project with BCC5.5, pcre.lib must be included before any of
the libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command
line.
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
Vincent Richomme sent a zip archive of files to help with this process. They
@ -510,11 +497,149 @@ can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
site.
BUILDING UNDER WINDOWS WITH BCC5.5
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
Some of the core BCC libraries have a version of PCRE from 1998 built in, which
can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a version
mismatch. I'm including an easy workaround below, if you'd like to include it
in the non-unix instructions:
When linking a project with BCC5.5, pcre.lib must be included before any of the
libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command line.
BUILDING USING BORLAND C++ BUILDER 2007 (CB2007) AND HIGHER
A PCRE user sent these comments about this environment (see also the comment
from another user that follows them):
The XE versions of C++ Builder come with a RegularExpressionsCore class which
contain a version of TPerlRegEx. However, direct use of the C PCRE library may
be desirable.
The default makevp.bat, however, supplied with PCRE builds a version of PCRE
that is not usable with any version of C++ Builder because the compiler ships
with an embedded version of PCRE, version 2.01 from 1998! [See also the note
about BCC5.5 above.] If you want to use PCRE you'll need to rename the
functions (pcre_compile to pcre_compile_bcc, etc) or do as I have done and just
use the 16 bit versions. I'm using std::wstring everywhere anyway. Since the
embedded version of PCRE does not have the 16 bit function names, there is no
conflict.
Building PCRE using a C++ Builder static library project file (recommended):
1. Rename or remove pcre.h, pcreposi.h, and pcreposix.h from your C++ Builder
original include path.
2. Download PCRE from pcre.org and extract to a directory.
3. Rename pcre_chartables.c.dist to pcre_chartables.c, pcre.h.generic to
pcre.h, and config.h.generic to config.h.
4. Edit pcre.h and pcre_config.c so that they include config.h.
5. Edit config.h like so:
Comment out the following lines:
#define PACKAGE "pcre"
#define PACKAGE_BUGREPORT ""
#define PACKAGE_NAME "PCRE"
#define PACKAGE_STRING "PCRE 8.32"
#define PACKAGE_TARNAME "pcre"
#define PACKAGE_URL ""
#define PACKAGE_VERSION "8.32"
Add the following lines:
#ifndef SUPPORT_UTF
#define SUPPORT_UTF 100 // any value is fine
#endif
#ifndef SUPPORT_UCP
#define SUPPORT_UCP 101 // any value is fine
#endif
#ifndef SUPPORT_UCP
#define SUPPORT_PCRE16 102 // any value is fine
#endif
#ifndef SUPPORT_UTF8
#define SUPPORT_UTF8 103 // any value is fine
#endif
6. Build a C++ Builder project using the IDE. Go to File / New / Other and
choose Static Library. You can name it pcre.cbproj or whatever. Now set your
paths by going to Project / Options. Set the Include path. Do this from the
"Base" option to apply to both Release and Debug builds. Now add the following
files to the project:
pcre.h
pcre16_byte_order.c
pcre16_chartables.c
pcre16_compile.c
pcre16_config.c
pcre16_dfa_exec.c
pcre16_exec.c
pcre16_fullinfo.c
pcre16_get.c
pcre16_globals.c
pcre16_maketables.c
pcre16_newline.c
pcre16_ord2utf16.c
pcre16_printint.c
pcre16_refcount.c
pcre16_string_utils.c
pcre16_study.c
pcre16_tables.c
pcre16_ucd.c
pcre16_utf16_utils.c
pcre16_valid_utf16.c
pcre16_version.c
pcre16_xclass.c
//Optional
pcre_version.c
7. After compiling the .lib file, copy the .lib and header files to a project
you want to use PCRE with. Enjoy.
Optional ... Building PCRE using the makevp.bat file:
1. Edit makevp_c.txt and makevp_l.txt and change all the names to the 16 bit
versions.
2. Edit makevp.bat and set the path to C++ Builder. Run makevp.bat.
Another PCRE user added this comment:
Another approach I successfully used for some years with BCB 5 and 6 was to
make sure that include and library paths of PCRE are configured before the
default paths of the IDE in the dialogs where one can manage those paths.
Afterwards one can open the project files using a text editor and manually add
the self created library for pcre itself, pcrecpp doesn't ship with the IDE, in
the library nodes where the IDE manages its own libraries to link against in
front of the IDE-own libraries. This way one can use the default PCRE function
names without getting access violations on runtime.
<ALLLIB value="libpcre.lib $(LIBFILES) $(LIBRARIES) import32.lib cp32mt.lib"/>
BUILDING PCRE ON OPENVMS
Dan Mooney sent the following comments about building PCRE on OpenVMS. They
relate to an older version of PCRE that used fewer source files, so the exact
commands will need changing. See the current list of source files above.
Stephen Hoffman sent the following, in December 2012:
"Here <http://labs.hoffmanlabs.com/node/1847> is a very short write-up on the
OpenVMS port and here
<http://labs.hoffmanlabs.com/labsnotes/pcre-vms-8_32.zip>
is a zip with the OpenVMS files, and with one modified testing-related PCRE
file." This is a port of PCRE 8.32.
Earlier, Dan Mooney sent the following comments about building PCRE on OpenVMS.
They relate to an older version of PCRE that used fewer source files, so the
exact commands will need changing. See the current list of source files above.
"It was quite easy to compile and link the library. I don't have a formal
make file but the attached file [reproduced below] contains the OpenVMS DCL
@ -636,4 +761,4 @@ There is also a mirror here:
http://www.vsoft-software.com/downloads.html
==========================
Last Updated: 21 November 2012
Last Updated: 14 May 2013

View File

@ -25,6 +25,12 @@
# when the HTML documentation is built. It works like this so that
# doc/html can be deleted and re-created from scratch.
# README & NON-AUTOTOOLS-BUILD
# These files are copied into the doc/html directory, with .txt
# extensions so that they can by hyperlinked from the HTML
# documentation, because some people just go to the HTML without
# looking for text files.
# First, sort out the documentation. Remove pcredemo.3 first because it won't
# pass the markup check (it is created below, using markup that none of the
@ -122,6 +128,8 @@ if [ $? != 0 ] ; then exit 1; fi
echo "Making HTML documentation"
/bin/rm html/*
cp index.html.src html/index.html
cp ../README html/README.txt
cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
for file in *.1 ; do
base=`basename $file .1`
@ -218,7 +226,6 @@ files="\
pcre_string_utils.c \
pcre_study.c \
pcre_tables.c \
pcre_ucp_searchfuncs.c \
pcre_valid_utf8.c \
pcre_version.c \
pcre_xclass.c \
@ -238,8 +245,6 @@ files="\
pcre_stringpiece_unittest.cc \
perltest.pl \
ucp.h \
ucpinternal.h \
ucptable.h \
makevp.bat \
pcre.def \
libpcre.def \

View File

@ -9,8 +9,10 @@ from:
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
There is a mailing list for discussion about the development of PCRE at
pcre-dev@exim.org. You can access the archives and subscribe or manage your
subscription here:
pcre-dev@exim.org
https://lists.exim.org/mailman/listinfo/pcre-dev
Please read the NEWS file if you are upgrading from a previous release.
The contents of this README file are:
@ -25,6 +27,8 @@ The contents of this README file are:
Shared libraries
Cross-compiling using autotools
Using HP's ANSI C++ compiler (aCC)
Compiling in Tru64 using native compilers
Using Sun's compilers for Solaris
Using PCRE from MySQL
Making new tarballs
Testing PCRE
@ -35,10 +39,10 @@ The contents of this README file are:
The PCRE APIs
-------------
PCRE is written in C, and it has its own API. There are three sets of functions,
one for the 8-bit library, which processes strings of bytes, one for the
16-bit library, which processes strings of 16-bit values, and one for the 32-bit
library, which processes strings of 32-bit values. The distribution also
PCRE is written in C, and it has its own API. There are three sets of
functions, one for the 8-bit library, which processes strings of bytes, one for
the 16-bit library, which processes strings of 16-bit values, and one for the
32-bit library, which processes strings of 32-bit values. The distribution also
includes a set of C++ wrapper functions (see the pcrecpp man page for details),
courtesy of Google Inc., which can be used to call the 8-bit PCRE library from
C++.
@ -81,11 +85,12 @@ documentation is supplied in two other forms:
1. There are files called doc/pcre.txt, doc/pcregrep.txt, and
doc/pcretest.txt in the source distribution. The first of these is a
concatenation of the text forms of all the section 3 man pages except
those that summarize individual functions. The other two are the text
forms of the section 1 man pages for the pcregrep and pcretest commands.
These text forms are provided for ease of scanning with text editors or
similar tools. They are installed in <prefix>/share/doc/pcre, where
<prefix> is the installation prefix (defaulting to /usr/local).
the listing of pcredemo.c and those that summarize individual functions.
The other two are the text forms of the section 1 man pages for the
pcregrep and pcretest commands. These text forms are provided for ease of
scanning with text editors or similar tools. They are installed in
<prefix>/share/doc/pcre, where <prefix> is the installation prefix
(defaulting to /usr/local).
2. A set of files containing all the documentation in HTML form, hyperlinked
in various ways, and rooted in a file called index.html, is distributed in
@ -110,6 +115,11 @@ contributions provided support for compiling PCRE on various flavours of
Windows (I myself do not use Windows). Nowadays there is more Windows support
in the standard distribution, so these contibutions have been archived.
A PCRE user maintains downloadable Windows binaries of the pcregrep and
pcretest programs here:
http://www.rexegg.com/pcregrep-pcretest.html
Building PCRE on non-Unix-like systems
--------------------------------------
@ -260,9 +270,17 @@ library. They are also documented in the pcrebuild man page.
on the "configure" command.
. PCRE has a counter that can be set to limit the amount of resources it uses.
If the limit is exceeded during a match, the match fails. The default is ten
million. You can change the default by setting, for example,
. PCRE has a counter that limits the depth of nesting of parentheses in a
pattern. This limits the amount of system stack that a pattern uses when it
is compiled. The default is 250, but you can change it by setting, for
example,
--with-parens-nest-limit=500
. PCRE has a counter that can be set to limit the amount of resources it uses
when matching a pattern. If the limit is exceeded during a match, the match
fails. The default is ten million. You can change the default by setting, for
example,
--with-match-limit=500000
@ -342,7 +360,8 @@ library. They are also documented in the pcrebuild man page.
report is generated by running "make coverage". If ccache is installed on
your system, it must be disabled when building PCRE for coverage reporting.
You can do this by setting the environment variable CCACHE_DISABLE=1 before
running "make" to build PCRE.
running "make" to build PCRE. There is more information about coverage
reporting in the "pcrebuild" documentation.
. The pcregrep program currently supports only 8-bit data files, and so
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
@ -354,12 +373,12 @@ library. They are also documented in the pcrebuild man page.
Of course, the relevant libraries must be installed on your system.
. The default size of internal buffer used by pcregrep can be set by, for
example:
. The default size (in bytes) of the internal buffer used by pcregrep can be
set by, for example:
--with-pcregrep-bufsize=50K
--with-pcregrep-bufsize=51200
The default value is 20K.
The value must be a plain integer. The default is 20480.
. It is possible to compile pcretest so that it links with the libreadline
or libedit libraries, by specifying, respectively,
@ -575,6 +594,27 @@ running the "configure" script:
CXXLDFLAGS="-lstd_v2 -lCsup_v2"
Compiling in Tru64 using native compilers
-----------------------------------------
The following error may occur when compiling with native compilers in the Tru64
operating system:
CXX libpcrecpp_la-pcrecpp.lo
cxx: Error: /usr/lib/cmplrs/cxx/V7.1-006/include/cxx/iosfwd, line 58: #error
directive: "cannot include iosfwd -- define __USE_STD_IOSTREAM to
override default - see section 7.1.2 of the C++ Using Guide"
#error "cannot include iosfwd -- define __USE_STD_IOSTREAM to override default
- see section 7.1.2 of the C++ Using Guide"
This may be followed by other errors, complaining that 'namespace "std" has no
member'. The solution to this is to add the line
#define __USE_STD_IOSTREAM 1
to the config.h file.
Using Sun's compilers for Solaris
---------------------------------
@ -624,27 +664,40 @@ NON-AUTOTOOLS-BUILD.
The RunTest script runs the pcretest test program (which is documented in its
own man page) on each of the relevant testinput files in the testdata
directory, and compares the output with the contents of the corresponding
testoutput files. Some tests are relevant only when certain build-time options
were selected. For example, the tests for UTF-8/16/32 support are run only if
--enable-utf was used. RunTest outputs a comment when it skips a test.
testoutput files. RunTest uses a file called testtry to hold the main output
from pcretest. Other files whose names begin with "test" are used as working
files in some tests.
Some tests are relevant only when certain build-time options were selected. For
example, the tests for UTF-8/16/32 support are run only if --enable-utf was
used. RunTest outputs a comment when it skips a test.
Many of the tests that are not skipped are run up to three times. The second
run forces pcre_study() to be called for all patterns except for a few in some
tests that are marked "never study" (see the pcretest program for how this is
done). If JIT support is available, the non-DFA tests are run a third time,
this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option.
This testing can be suppressed by putting "nojit" on the RunTest command line.
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
libraries that are enabled. If you want to run just one set of tests, call
RunTest with either the -8, -16 or -32 option.
RunTest uses a file called testtry to hold the main output from pcretest.
Other files whose names begin with "test" are used as working files in some
tests. To run pcretest on just one or more specific test files, give their
numbers as arguments to RunTest, for example:
If valgrind is installed, you can run the tests under it by putting "valgrind"
on the RunTest command line. To run pcretest on just one or more specific test
files, give their numbers as arguments to RunTest, for example:
RunTest 2 7 11
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
end), or a number preceded by ~ to exclude a test. For example:
Runtest 3-15 ~10
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
except test 13. Whatever order the arguments are in, the tests are always run
in numerical order.
You can also call RunTest with the single argument "list" to cause it to output
a list of tests.
@ -704,21 +757,24 @@ test is run only when JIT support is not available. They test some JIT-specific
features such as information output from pcretest about JIT compilation.
The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and
the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit mode.
These are tests that generate different output in the two modes. They are for
general cases, UTF-8/16/32 support, and Unicode property support, respectively.
the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit
mode. These are tests that generate different output in the two modes. They are
for general cases, UTF-8/16/32 support, and Unicode property support,
respectively.
The twentieth test is run only in 16/32-bit mode. It tests some specific
16/32-bit features of the DFA matching engine.
The twenty-first and twenty-second tests are run only in 16/32-bit mode, when the
link size is set to 2 for the 16-bit library. They test reloading pre-compiled patterns.
The twenty-first and twenty-second tests are run only in 16/32-bit mode, when
the link size is set to 2 for the 16-bit library. They test reloading
pre-compiled patterns.
The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are for
general cases, and UTF-16 support, respectively.
The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are
for general cases, and UTF-16 support, respectively.
The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are
for general cases, and UTF-32 support, respectively.
The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are for
general cases, and UTF-32 support, respectively.
Character tables
----------------
@ -784,11 +840,11 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
(A) Source files of the PCRE library functions and their headers:
dftables.c auxiliary program for building pcre_chartables.c
when --enable-rebuild-chartables is specified
when --enable-rebuild-chartables is specified
pcre_chartables.c.dist a default set of character tables that assume ASCII
coding; used, unless --enable-rebuild-chartables is
specified, by copying to pcre[16]_chartables.c
coding; used, unless --enable-rebuild-chartables is
specified, by copying to pcre[16]_chartables.c
pcreposix.c )
pcre[16|32]_byte_order.c )
@ -932,4 +988,4 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
Last updated: 27 October 2012
Last updated: 17 January 2014

View File

@ -69,427 +69,447 @@ utf8=$?
echo "Testing pcregrep main features"
echo "---------------------------- Test 1 ------------------------------" >testtry
(cd $srcdir; $valgrind $pcregrep PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 1 ------------------------------" >testtrygrep
(cd $srcdir; $valgrind $pcregrep PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 2 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep '^PATTERN' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 2 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep '^PATTERN' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 3 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 3 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 4 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -ic PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 4 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -ic PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 5 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 5 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 6 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 6 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 7 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 7 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 8 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 8 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 9 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 9 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 10 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 10 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 11 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -vn pattern ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 11 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -vn pattern ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 12 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -ix pattern ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 12 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -ix pattern ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 13 -----------------------------" >>testtry
echo seventeen >testtemp1
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist -f $builddir/testtemp1 ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 13 -----------------------------" >>testtrygrep
echo seventeen >testtemp1grep
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist -f $builddir/testtemp1grep ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 14 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 14 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 15 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep 'abc^*' ./testdata/grepinput) 2>>testtry >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 15 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep 'abc^*' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 16 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtry >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 16 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtrygrep >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 17 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -M 'the\noutput' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 17 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -M 'the\noutput' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 18 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 18 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 19 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -Mix 'Pattern' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 19 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -Mix 'Pattern' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 20 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 20 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 21 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -nA3 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 21 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -nA3 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 22 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -nB3 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 22 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -nB3 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 23 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -C3 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 23 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -C3 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 24 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -A9 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 24 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -A9 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 25 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -nB9 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 25 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -nB9 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 26 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -A9 -B9 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 26 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -A9 -B9 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 27 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -A10 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 27 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -A10 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 28 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -nB10 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 28 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -nB10 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 29 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -C12 -B10 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 29 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -C12 -B10 'four' ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 30 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 30 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 31 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 31 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 32 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 32 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 33 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep 'fox' ./testdata/grepnonexist) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 33 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 34 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -s 'fox' ./testdata/grepnonexist) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 34 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -s 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 35 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 35 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 36 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 36 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 37 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep '^(a+)*\d' ./testdata/grepinput) >>testtry 2>teststderr
echo "RC=$?" >>testtry
echo "======== STDERR ========" >>testtry
cat teststderr >>testtry
echo "---------------------------- Test 37 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep '^(a+)*\d' ./testdata/grepinput) >>testtrygrep 2>teststderrgrep
echo "RC=$?" >>testtrygrep
echo "======== STDERR ========" >>testtrygrep
cat teststderrgrep >>testtrygrep
echo "---------------------------- Test 38 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep '>\x00<' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 38 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep '>\x00<' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 39 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -A1 'before the binary zero' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 39 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -A1 'before the binary zero' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 40 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -B1 'after the binary zero' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 40 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -B1 'after the binary zero' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 41 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 41 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 42 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 42 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 43 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -on 'before|zero|after' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 43 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -on 'before|zero|after' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 44 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -on -e before -ezero -e after ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 44 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -on -e before -ezero -e after ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 45 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 45 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 46 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtry >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 46 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 47 ------------------------------" >>testtry
echo "---------------------------- Test 47 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -Fx "AB.VE
elephant" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
elephant" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 48 ------------------------------" >>testtry
echo "---------------------------- Test 48 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -F "AB.VE
elephant" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
elephant" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 49 ------------------------------" >>testtry
echo "---------------------------- Test 49 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -F -e DATA -e "AB.VE
elephant" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
elephant" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 50 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 50 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 51 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -Mv "brown\sfox" ./testdata/grepinputv) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 51 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -Mv "brown\sfox" ./testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 52 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --colour=always jumps ./testdata/grepinputv) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 52 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --colour=always jumps ./testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 53 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 53 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 54 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 54 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 55 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 55 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 56 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -c lazy ./testdata/grepinput*) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 56 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -c lazy ./testdata/grepinput*) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 57 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -c -l lazy ./testdata/grepinput*) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 57 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -c -l lazy ./testdata/grepinput*) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 58 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --regex=PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 58 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --regex=PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 59 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --regexp=PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 59 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --regexp=PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 60 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --regex PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 60 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --regex PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 61 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --regexp PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 61 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --regexp PATTERN ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 62 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 62 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 63 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 63 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 64 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 64 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 65 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 65 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 66 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 66 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 67 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 67 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 68 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 68 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 69 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -vn --colour=always pattern ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 69 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -vn --colour=always pattern ./testdata/grepinputx) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 70 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 70 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 71 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|^03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 71 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 72 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 72 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 73 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 73 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 74 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o "^01|02|^03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 74 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -o "^01|02|^03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 75 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --color=always "^01|02|^03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 75 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --color=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 76 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 76 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 77 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 77 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 78 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 78 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 79 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 79 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 80 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o "\b01|\b02" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 80 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -o "\b01|\b02" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 81 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 81 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 82 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 82 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 83 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 83 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 84 -----------------------------" >>testtry
echo testdata/grepinput3 >testtemp1
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1 "fox|complete|t7") >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 84 -----------------------------" >>testtrygrep
echo testdata/grepinput3 >testtemp1grep
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1grep "fox|complete|t7") >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 85 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 85 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 86 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep "dog" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 86 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 87 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep "cat" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 87 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 88 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -v "cat" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 88 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -v "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 89 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -I "dog" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 89 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -I "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 90 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 90 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 91 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -a "dog" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 91 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -a "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 92 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --binary-files=text "dog" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 92 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --binary-files=text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 93 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --text "dog" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 93 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 94 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 94 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 95 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 95 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 96 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 96 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 97 -----------------------------" >>testtry
echo "grepinput$" >testtemp1
echo "grepinput8" >>testtemp1
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude-from $builddir/testtemp1 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 97 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep
echo "grepinput8" >>testtemp1grep
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 98 -----------------------------" >>testtry
echo "grepinput$" >testtemp1
echo "grepinput8" >>testtemp1
(cd $srcdir; $valgrind $pcregrep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 98 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep
echo "grepinput8" >>testtemp1grep
(cd $srcdir; $valgrind $pcregrep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 99 -----------------------------" >>testtry
echo "grepinput$" >testtemp1
echo "grepinput8" >testtemp2
(cd $srcdir; $valgrind $pcregrep -L -r --include grepinput --exclude-from $builddir/testtemp1 --exclude-from=$builddir/testtemp2 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 99 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep
echo "grepinput8" >testtemp2grep
(cd $srcdir; $valgrind $pcregrep -L -r --include grepinput --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 100 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 100 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 101 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 101 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 102 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -n "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 103 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 104 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -n --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 105 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep --colour=always "ipsum|" ./testdata/grepinput3) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 106 -----------------------------" >>testtrygrep
(cd $srcdir; echo "a" | $valgrind $pcregrep -M "|a" ) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
# Now compare the results.
$cf $srcdir/testdata/grepoutput testtry
$cf $srcdir/testdata/grepoutput testtrygrep
if [ $? != 0 ] ; then exit 1; fi
@ -498,15 +518,15 @@ if [ $? != 0 ] ; then exit 1; fi
if [ $utf8 -ne 0 ] ; then
echo "Testing pcregrep UTF-8 features"
echo "---------------------------- Test U1 ------------------------------" >testtry
(cd $srcdir; $valgrind $pcregrep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test U1 ------------------------------" >testtrygrep
(cd $srcdir; $valgrind $pcregrep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test U2 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test U2 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcregrep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep
echo "RC=$?" >>testtrygrep
$cf $srcdir/testdata/grepoutput8 testtry
$cf $srcdir/testdata/grepoutput8 testtrygrep
if [ $? != 0 ] ; then exit 1; fi
else
@ -522,28 +542,28 @@ fi
# starts with a hyphen. These tests are run in the build directory.
echo "Testing pcregrep newline settings"
printf "abc\rdef\r\nghi\njkl" >testNinput
printf "abc\rdef\r\nghi\njkl" >testNinputgrep
printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtry
$valgrind $pcregrep -n -N CR "^(abc|def|ghi|jkl)" testNinput >>testtry
printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtrygrep
$valgrind $pcregrep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtry
$valgrind $pcregrep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinput >>testtry
printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtrygrep
$valgrind $pcregrep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtry
printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtrygrep
pattern=`printf 'def\rjkl'`
$valgrind $pcregrep -n --newline=cr -F "$pattern" testNinput >>testtry
$valgrind $pcregrep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtry
$valgrind $pcregrep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinput >>testtry
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtrygrep
$valgrind $pcregrep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtry
$valgrind $pcregrep -n --newline=any "^(abc|def|ghi|jkl)" testNinput >>testtry
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtrygrep
$valgrind $pcregrep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtry
$valgrind $pcregrep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinput >>testtry
printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtrygrep
$valgrind $pcregrep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
$cf $srcdir/testdata/grepoutputN testtry
$cf $srcdir/testdata/grepoutputN testtrygrep
if [ $? != 0 ] ; then exit 1; fi
exit 0

View File

@ -1,5 +1,6 @@
#! /bin/sh
###############################################################################
# Run the PCRE tests using the pcretest program. The appropriate tests are
# selected, depending on which build-time options were used.
#
@ -13,21 +14,38 @@
# UTF-8 with the UTF-8 check turned off; for this, studying must also be
# disabled with /SS.
#
# When JIT support is available, all the tests are also run with -s+ to test
# (again, almost) everything with studying and the JIT option. There are also
# two tests for JIT-specific features, one to be run when JIT support is
# available, and one when it is not.
# When JIT support is available, all appropriate tests are also run with -s+ to
# test (again, almost) everything with studying and the JIT option, unless
# "nojit" is given on the command line. There are also two tests for
# JIT-specific features, one to be run when JIT support is available (unless
# "nojit" is specified), and one when it is not.
#
# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
# possible to select which to test by the arguments -8, -16 or -32.
# possible to select which to test by giving "-8", "-16" or "-32" on the
# command line.
#
# Other arguments for this script can be individual test numbers, or the word
# "valgrind", "valgrind-log" or "sim" followed by an argument to run cross-
# compiled executables under a simulator, for example:
# As well as "nojit", "-8", "-16", and "-32", arguments for this script are
# individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
# end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
# except test 10. Whatever order the arguments are in, the tests are always run
# in numerical order.
#
# The special argument "3S" runs test 3, stopping if it fails. Test 3 is the
# locale test, and failure usually means there's an issue with the locale
# rather than a bug in PCRE, so normally subsequent tests are run. "3S" is
# useful when you want to debug or update the test.
#
# Inappropriate tests are automatically skipped (with a comment to say so): for
# example, if JIT support is not compiled, test 12 is skipped, whereas if JIT
# support is compiled, test 13 is skipped.
#
# Other arguments can be one of the words "valgrind", "valgrind-log", or "sim"
# followed by an argument to run cross-compiled executables under a simulator,
# for example:
#
# RunTest 3 sim "qemu-arm -s 8388608"
#
#
# There are two special cases where only one argument is allowed:
#
# If the first and only argument is "ebcdic", the script runs the special
@ -36,7 +54,7 @@
#
# If the script is obeyed as "RunTest list", a list of available tests is
# output, but none of them are run.
###############################################################################
# Define test titles in variables so that they can be output as a list. Some
# of them are modified (e.g. with -8 or -16) when used in the actual tests.
@ -53,8 +71,8 @@ title8="Test 8: DFA matching main functionality"
title9="Test 9: DFA matching with UTF"
title10="Test 10: DFA matching with Unicode properties"
title11="Test 11: Internal offsets and code size tests"
title12="Test 12: JIT-specific features (JIT available)"
title13="Test 13: JIT-specific features (JIT not available)"
title12="Test 12: JIT-specific features (when JIT is available)"
title13="Test 13: JIT-specific features (when JIT is not available)"
title14="Test 14: Specials for the basic 8-bit library"
title15="Test 15: Specials for the 8-bit library with UTF-8 support"
title16="Test 16: Specials for the 8-bit library with Unicode propery support"
@ -69,6 +87,8 @@ title24="Test 24: Specials for the 16-bit library with UTF-16 support"
title25="Test 25: Specials for the 32-bit library"
title26="Test 26: Specials for the 32-bit library with UTF-32 support"
maxtest=26
if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title1
echo $title2 "(not UTF)"
@ -151,17 +171,19 @@ fi
# Default values
valgrind=
sim=
arg8=
arg16=
arg32=
nojit=
sim=
skip=
valgrind=
# This is in case the caller has set aliases (as I do - PH)
unset cp ls mv rm
# Select which tests to run; for those that are explicitly requested, check
# that the necessary optional facilities are available.
# Process options and select which tests to run; for those that are explicitly
# requested, check that the necessary optional facilities are available.
do1=no
do2=no
@ -221,10 +243,34 @@ while [ $# -gt 0 ] ; do
-8) arg8=yes;;
-16) arg16=yes;;
-32) arg32=yes;;
nojit) nojit=yes;;
sim) shift; sim=$1;;
valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all --log-file=report.%p ";;
sim) shift; sim=$1;;
*) echo "Unknown test number '$1'"; exit 1;;
~*)
if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
skip="$skip `expr "$1" : '~\([0-9]*\)*$'`"
else
echo "Unknown option or test selector '$1'"; exit 1
fi
;;
*-*)
if expr "$1" : '[0-9][0-9]*-[0-9]*$' >/dev/null; then
tf=`expr "$1" : '\([0-9]*\)'`
tt=`expr "$1" : '.*-\([0-9]*\)'`
if [ "$tt" = "" ] ; then tt=$maxtest; fi
if expr \( "$tf" "<" 1 \) \| \( "$tt" ">" "$maxtest" \) >/dev/null; then
echo "Invalid test range '$1'"; exit 1
fi
while expr "$tf" "<=" "$tt" >/dev/null; do
eval do${tf}=yes
tf=`expr $tf + 1`
done
else
echo "Invalid test range '$1'"; exit 1
fi
;;
*) echo "Unknown option or test selector '$1'"; exit 1;;
esac
shift
done
@ -309,79 +355,12 @@ ucp=$?
jitopt=
$sim ./pcretest -C jit >/dev/null
jit=$?
if [ $jit -ne 0 ] ; then
if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
jitopt=-s+
fi
if [ $utf -eq 0 ] ; then
if [ $do4 = yes ] ; then
echo "Can't run test 4 because UTF support is not configured"
exit 1
fi
if [ $do5 = yes ] ; then
echo "Can't run test 5 because UTF support is not configured"
exit 1
fi
if [ $do9 = yes ] ; then
echo "Can't run test 8 because UTF support is not configured"
exit 1
fi
if [ $do15 = yes ] ; then
echo "Can't run test 15 because UTF support is not configured"
exit 1
fi
if [ $do18 = yes ] ; then
echo "Can't run test 18 because UTF support is not configured"
fi
if [ $do22 = yes ] ; then
echo "Can't run test 22 because UTF support is not configured"
fi
fi
if [ $ucp -eq 0 ] ; then
if [ $do6 = yes ] ; then
echo "Can't run test 6 because Unicode property support is not configured"
exit 1
fi
if [ $do7 = yes ] ; then
echo "Can't run test 7 because Unicode property support is not configured"
exit 1
fi
if [ $do10 = yes ] ; then
echo "Can't run test 10 because Unicode property support is not configured"
exit 1
fi
if [ $do16 = yes ] ; then
echo "Can't run test 16 because Unicode property support is not configured"
exit 1
fi
if [ $do19 = yes ] ; then
echo "Can't run test 19 because Unicode property support is not configured"
exit 1
fi
fi
if [ $link_size -ne 2 ] ; then
if [ $do11 = yes ] ; then
echo "Can't run test 11 because the link size ($link_size) is not 2"
exit 1
fi
fi
if [ $jit -eq 0 ] ; then
if [ $do12 = "yes" ] ; then
echo "Can't run test 12 because JIT support is not configured"
exit 1
fi
else
if [ $do13 = "yes" ] ; then
echo "Can't run test 13 because JIT support is configured"
exit 1
fi
fi
# If no specific tests were requested, select all. Those that are not
# relevant will be skipped.
# relevant will be automatically skipped.
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
$do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \
@ -418,6 +397,11 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
do26=yes
fi
# Handle any explicit skips at this stage, so that an argument list may consist
# only of explicit skips.
for i in $skip; do eval do$i=no; done
# Show which release and which test data
echo ""
@ -479,8 +463,9 @@ fi
# Locale-specific tests, provided that either the "fr_FR" or the "french"
# locale is available. The former is the Unix-like standard; the latter is
# for Windows. Another possibility is "fr", which needs to be run against
# the Windows-specific input and output files.
# for Windows. Another possibility is "fr". Unfortunately, different versions
# of the French locale give different outputs for some items. This test passes
# if the output matches any one of the alternative output files.
if [ $do3 = yes ] ; then
locale -a | grep '^fr_FR$' >/dev/null
@ -488,20 +473,28 @@ if [ $do3 = yes ] ; then
locale=fr_FR
infile=$testdata/testinput3
outfile=$testdata/testoutput3
outfile2=$testdata/testoutput3A
outfile3=$testdata/testoutput3B
else
infile=test3input
outfile=test3output
outfile2=test3outputA
outfile3=test3outputB
locale -a | grep '^french$' >/dev/null
if [ $? -eq 0 ] ; then
locale=french
sed 's/fr_FR/french/' $testdata/testinput3 >test3input
sed 's/fr_FR/french/' $testdata/testoutput3 >test3output
sed 's/fr_FR/french/' $testdata/testoutput3A >test3outputA
sed 's/fr_FR/french/' $testdata/testoutput3B >test3outputB
else
locale -a | grep '^fr$' >/dev/null
if [ $? -eq 0 ] ; then
locale=fr
sed 's/fr_FR/fr/' $testdata/wintestinput3 >test3input
sed 's/fr_FR/fr/' $testdata/wintestoutput3 >test3output
sed 's/fr_FR/fr/' $testdata/intestinput3 >test3input
sed 's/fr_FR/fr/' $testdata/intestoutput3 >test3output
sed 's/fr_FR/fr/' $testdata/intestoutput3A >test3outputA
sed 's/fr_FR/fr/' $testdata/intestoutput3B >test3outputB
else
locale=
fi
@ -513,18 +506,20 @@ if [ $do3 = yes ] ; then
for opt in "" "-s" $jitopt; do
$sim $valgrind ./pcretest -q $bmode $opt $infile testtry
if [ $? = 0 ] ; then
$cf $outfile testtry
if [ $? != 0 ] ; then
echo " "
echo "Locale test did not run entirely successfully."
echo "This usually means that there is a problem with the locale"
echo "settings rather than a bug in PCRE."
break;
else
if $cf $outfile testtry >teststdout || \
$cf $outfile2 testtry >teststdout || \
$cf $outfile3 testtry >teststdout
then
if [ "$opt" = "-s" ] ; then echo " OK with study"
elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
else echo " OK"
fi
else
echo "** Locale test did not run successfully. The output did not match"
echo " $outfile, $outfile2 or $outfile3."
echo " This may mean that there is a problem with the locale settings rather"
echo " than a bug in PCRE."
exit 1
fi
else exit 1
fi
@ -700,7 +695,7 @@ fi
if [ $do12 = yes ] ; then
echo $title12
if [ $jit -eq 0 ] ; then
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
echo " Skipped because JIT is not available or not usable"
else
$sim $valgrind ./pcretest -q $bmode $testdata/testinput12 testtry
@ -1010,6 +1005,6 @@ fi
done
# Clean up local working files
rm -f test3input test3output testNinput testsaved* teststderr teststdout testtry
rm -f test3input test3output test3outputA testNinput testsaved* teststderr teststdout testtry
# End

View File

@ -15,7 +15,7 @@
@rem tests 4 5 9 15 and 18 require utf support
@rem tests 6 7 10 16 and 19 require ucp support
@rem 11 requires ucp and link size 2
@rem 12 requires presense of jit support
@rem 12 requires presence of jit support
@rem 13 requires absence of jit support
@rem Sheri P also added override tests for study and jit testing
@rem Zoltan Herczeg added libpcre16 support

769
tools/pcre/aclocal.m4 vendored

File diff suppressed because it is too large Load Diff

270
tools/pcre/ar-lib Normal file
View File

@ -0,0 +1,270 @@
#! /bin/sh
# Wrapper for Microsoft lib.exe
me=ar-lib
scriptversion=2012-03-01.08; # UTC
# Copyright (C) 2010-2013 Free Software Foundation, Inc.
# Written by Peter Rosin <peda@lysator.liu.se>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
# func_error message
func_error ()
{
echo "$me: $1" 1>&2
exit 1
}
file_conv=
# func_file_conv build_file
# Convert a $build file to $host form and store it in $file
# Currently only supports Windows hosts.
func_file_conv ()
{
file=$1
case $file in
/ | /[!/]*) # absolute file, and not a UNC file
if test -z "$file_conv"; then
# lazily determine how to convert abs files
case `uname -s` in
MINGW*)
file_conv=mingw
;;
CYGWIN*)
file_conv=cygwin
;;
*)
file_conv=wine
;;
esac
fi
case $file_conv in
mingw)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
cygwin)
file=`cygpath -m "$file" || echo "$file"`
;;
wine)
file=`winepath -w "$file" || echo "$file"`
;;
esac
;;
esac
}
# func_at_file at_file operation archive
# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE
# for each of them.
# When interpreting the content of the @FILE, do NOT use func_file_conv,
# since the user would need to supply preconverted file names to
# binutils ar, at least for MinGW.
func_at_file ()
{
operation=$2
archive=$3
at_file_contents=`cat "$1"`
eval set x "$at_file_contents"
shift
for member
do
$AR -NOLOGO $operation:"$member" "$archive" || exit $?
done
}
case $1 in
'')
func_error "no command. Try '$0 --help' for more information."
;;
-h | --h*)
cat <<EOF
Usage: $me [--help] [--version] PROGRAM ACTION ARCHIVE [MEMBER...]
Members may be specified in a file named with @FILE.
EOF
exit $?
;;
-v | --v*)
echo "$me, version $scriptversion"
exit $?
;;
esac
if test $# -lt 3; then
func_error "you must specify a program, an action and an archive"
fi
AR=$1
shift
while :
do
if test $# -lt 2; then
func_error "you must specify a program, an action and an archive"
fi
case $1 in
-lib | -LIB \
| -ltcg | -LTCG \
| -machine* | -MACHINE* \
| -subsystem* | -SUBSYSTEM* \
| -verbose | -VERBOSE \
| -wx* | -WX* )
AR="$AR $1"
shift
;;
*)
action=$1
shift
break
;;
esac
done
orig_archive=$1
shift
func_file_conv "$orig_archive"
archive=$file
# strip leading dash in $action
action=${action#-}
delete=
extract=
list=
quick=
replace=
index=
create=
while test -n "$action"
do
case $action in
d*) delete=yes ;;
x*) extract=yes ;;
t*) list=yes ;;
q*) quick=yes ;;
r*) replace=yes ;;
s*) index=yes ;;
S*) ;; # the index is always updated implicitly
c*) create=yes ;;
u*) ;; # TODO: don't ignore the update modifier
v*) ;; # TODO: don't ignore the verbose modifier
*)
func_error "unknown action specified"
;;
esac
action=${action#?}
done
case $delete$extract$list$quick$replace,$index in
yes,* | ,yes)
;;
yesyes*)
func_error "more than one action specified"
;;
*)
func_error "no action specified"
;;
esac
if test -n "$delete"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
for member
do
case $1 in
@*)
func_at_file "${1#@}" -REMOVE "$archive"
;;
*)
func_file_conv "$1"
$AR -NOLOGO -REMOVE:"$file" "$archive" || exit $?
;;
esac
done
elif test -n "$extract"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
if test $# -gt 0; then
for member
do
case $1 in
@*)
func_at_file "${1#@}" -EXTRACT "$archive"
;;
*)
func_file_conv "$1"
$AR -NOLOGO -EXTRACT:"$file" "$archive" || exit $?
;;
esac
done
else
$AR -NOLOGO -LIST "$archive" | sed -e 's/\\/\\\\/g' | while read member
do
$AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
done
fi
elif test -n "$quick$replace"; then
if test ! -f "$orig_archive"; then
if test -z "$create"; then
echo "$me: creating $orig_archive"
fi
orig_archive=
else
orig_archive=$archive
fi
for member
do
case $1 in
@*)
func_file_conv "${1#@}"
set x "$@" "@$file"
;;
*)
func_file_conv "$1"
set x "$@" "$file"
;;
esac
shift
shift
done
if test -n "$orig_archive"; then
$AR -NOLOGO -OUT:"$archive" "$orig_archive" "$@" || exit $?
else
$AR -NOLOGO -OUT:"$archive" "$@" || exit $?
fi
elif test -n "$list"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
$AR -NOLOGO -LIST "$archive" || exit $?
fi

View File

@ -1,10 +1,9 @@
#! /bin/sh
# Wrapper for compilers which do not understand '-c -o'.
scriptversion=2012-03-05.13; # UTC
scriptversion=2012-10-14.11; # UTC
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2009, 2010, 2012 Free
# Software Foundation, Inc.
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
# Written by Tom Tromey <tromey@cygnus.com>.
#
# This program is free software; you can redistribute it and/or modify
@ -113,6 +112,11 @@ func_cl_dashl ()
lib=$dir/$lib.lib
break
fi
if test -f "$dir/lib$lib.a"; then
found=yes
lib=$dir/lib$lib.a
break
fi
done
IFS=$save_IFS

View File

@ -5,6 +5,8 @@
#cmakedefine HAVE_SYS_TYPES_H 1
#cmakedefine HAVE_UNISTD_H 1
#cmakedefine HAVE_WINDOWS_H 1
#cmakedefine HAVE_STDINT_H 1
#cmakedefine HAVE_INTTYPES_H 1
#cmakedefine HAVE_TYPE_TRAITS_H 1
#cmakedefine HAVE_BITS_TYPE_TRAITS_H 1
@ -44,6 +46,7 @@
#define NEWLINE @NEWLINE@
#define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@
#define LINK_SIZE @PCRE_LINK_SIZE@
#define PARENS_NEST_LIMIT @PCRE_PARENS_NEST_LIMIT@
#define MATCH_LIMIT @PCRE_MATCH_LIMIT@
#define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@
#define PCREGREP_BUFSIZE @PCREGREP_BUFSIZE@

View File

@ -1,14 +1,12 @@
#! /bin/sh
# Attempt to guess a canonical system name.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
# 2011, 2012 Free Software Foundation, Inc.
# Copyright 1992-2013 Free Software Foundation, Inc.
timestamp='2012-08-14'
timestamp='2013-11-29'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
@ -22,19 +20,17 @@ timestamp='2012-08-14'
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# Originally written by Per Bothner. Please send patches (context
# diff format) to <config-patches@gnu.org> and include a ChangeLog
# entry.
# the same distribution terms that you use for the rest of that
# program. This Exception is an additional permission under section 7
# of the GNU General Public License, version 3 ("GPLv3").
#
# This script attempts to guess a canonical system name similar to
# config.sub. If it succeeds, it prints the system name on stdout, and
# exits with 0. Otherwise, it exits with 1.
# Originally written by Per Bothner.
#
# You can get the latest version of this script from:
# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
#
# Please send patches with a ChangeLog entry to config-patches@gnu.org.
me=`echo "$0" | sed -e 's,.*/,,'`
@ -54,9 +50,7 @@ version="\
GNU config.guess ($timestamp)
Originally written by Per Bothner.
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
Free Software Foundation, Inc.
Copyright 1992-2013 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@ -139,22 +133,20 @@ UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
case "${UNAME_SYSTEM}" in
Linux|GNU/*)
Linux|GNU|GNU/*)
# If the system lacks a compiler, then just pick glibc.
# We could probably try harder.
LIBC=gnu
eval $set_cc_for_build
cat <<-EOF > $dummy.c
#include <features.h>
#ifdef __UCLIBC__
# ifdef __UCLIBC_CONFIG_VERSION__
LIBC=uclibc __UCLIBC_CONFIG_VERSION__
# else
#if defined(__UCLIBC__)
LIBC=uclibc
# endif
#else
# ifdef __dietlibc__
#elif defined(__dietlibc__)
LIBC=dietlibc
# else
#else
LIBC=gnu
# endif
#endif
EOF
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
@ -329,7 +321,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
echo arm-acorn-riscix${UNAME_RELEASE}
exit ;;
arm:riscos:*:*|arm:RISCOS:*:*)
arm*:riscos:*:*|arm*:RISCOS:*:*)
echo arm-unknown-riscos
exit ;;
SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
@ -912,6 +904,9 @@ EOF
if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
arc:Linux:*:* | arceb:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
arm*:Linux:*:*)
eval $set_cc_for_build
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
@ -974,6 +969,9 @@ EOF
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
;;
or1k:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
or32:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
@ -997,8 +995,14 @@ EOF
ppc:Linux:*:*)
echo powerpc-unknown-linux-${LIBC}
exit ;;
ppc64le:Linux:*:*)
echo powerpc64le-unknown-linux-${LIBC}
exit ;;
ppcle:Linux:*:*)
echo powerpcle-unknown-linux-${LIBC}
exit ;;
s390:Linux:*:* | s390x:Linux:*:*)
echo ${UNAME_MACHINE}-ibm-linux
echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
exit ;;
sh64*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
@ -1252,19 +1256,31 @@ EOF
exit ;;
*:Darwin:*:*)
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
case $UNAME_PROCESSOR in
i386)
eval $set_cc_for_build
if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_64BIT_ARCH >/dev/null
then
UNAME_PROCESSOR="x86_64"
fi
fi ;;
unknown) UNAME_PROCESSOR=powerpc ;;
esac
eval $set_cc_for_build
if test "$UNAME_PROCESSOR" = unknown ; then
UNAME_PROCESSOR=powerpc
fi
if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_64BIT_ARCH >/dev/null
then
case $UNAME_PROCESSOR in
i386) UNAME_PROCESSOR=x86_64 ;;
powerpc) UNAME_PROCESSOR=powerpc64 ;;
esac
fi
fi
elif test "$UNAME_PROCESSOR" = i386 ; then
# Avoid executing cc on OS X 10.9, as it ships with a stub
# that puts up a graphical alert prompting to install
# developer tools. Any system running Mac OS X 10.7 or
# later (Darwin 11 and later) is required to have a 64-bit
# processor. This is not true of the ARM version of Darwin
# that Apple uses in portable devices.
UNAME_PROCESSOR=x86_64
fi
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
exit ;;
*:procnto*:*:* | *:QNX:[0123456789]*:*)

View File

@ -5,20 +5,28 @@
can cope with, allowing it to run on SunOS4 and other "close to standard"
systems.
In environments that support the facilities, config.h.in is converted by
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
are going to build PCRE "by hand" without using "configure" or CMake, you
should copy the distributed config.h.generic to config.h, and then edit the
macro definitions to be the way you need them. You must then add
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
at the start of every source.
In environments that support the GNU autotools, config.h.in is converted into
config.h by the "configure" script. In environments that use CMake,
config-cmake.in is converted into config.h. If you are going to build PCRE "by
hand" without using "configure" or CMake, you should copy the distributed
config.h.generic to config.h, and edit the macro definitions to be the way you
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
so that config.h is included at the start of every source.
Alternatively, you can avoid editing by using -D on the compiler command line
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
but if you do, default values will be taken from config.h for non-boolean
macros that are not defined on the command line.
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
them both to 0; an emulation function will be used. */
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
macros are listed as a commented #undef in config.h.generic. Macros such as
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
sure both macros are undefined; an emulation function will then be used. */
/* By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined (to any
@ -44,27 +52,19 @@ them both to 0; an emulation function will be used. */
/* #undef EBCDIC_NL25 */
/* Define to 1 if you have the `bcopy' function. */
#ifndef HAVE_BCOPY
#define HAVE_BCOPY 1
#endif
/* #undef HAVE_BCOPY */
/* Define to 1 if you have the <bits/type_traits.h> header file. */
/* #undef HAVE_BITS_TYPE_TRAITS_H */
/* Define to 1 if you have the <bzlib.h> header file. */
#ifndef HAVE_BZLIB_H
#define HAVE_BZLIB_H 1
#endif
/* #undef HAVE_BZLIB_H */
/* Define to 1 if you have the <dirent.h> header file. */
#ifndef HAVE_DIRENT_H
#define HAVE_DIRENT_H 1
#endif
/* #undef HAVE_DIRENT_H */
/* Define to 1 if you have the <dlfcn.h> header file. */
#ifndef HAVE_DLFCN_H
#define HAVE_DLFCN_H 1
#endif
/* #undef HAVE_DLFCN_H */
/* Define to 1 if you have the <editline/readline.h> header file. */
/* #undef HAVE_EDITLINE_READLINE_H */
@ -73,29 +73,19 @@ them both to 0; an emulation function will be used. */
/* #undef HAVE_EDIT_READLINE_READLINE_H */
/* Define to 1 if you have the <inttypes.h> header file. */
#ifndef HAVE_INTTYPES_H
#define HAVE_INTTYPES_H 1
#endif
/* #undef HAVE_INTTYPES_H */
/* Define to 1 if you have the <limits.h> header file. */
#ifndef HAVE_LIMITS_H
#define HAVE_LIMITS_H 1
#endif
/* #undef HAVE_LIMITS_H */
/* Define to 1 if the system has the type `long long'. */
#ifndef HAVE_LONG_LONG
#define HAVE_LONG_LONG 1
#endif
/* #undef HAVE_LONG_LONG */
/* Define to 1 if you have the `memmove' function. */
#ifndef HAVE_MEMMOVE
#define HAVE_MEMMOVE 1
#endif
/* #undef HAVE_MEMMOVE */
/* Define to 1 if you have the <memory.h> header file. */
#ifndef HAVE_MEMORY_H
#define HAVE_MEMORY_H 1
#endif
/* #undef HAVE_MEMORY_H */
/* Define if you have POSIX threads libraries and header files. */
/* #undef HAVE_PTHREAD */
@ -110,34 +100,22 @@ them both to 0; an emulation function will be used. */
/* #undef HAVE_READLINE_READLINE_H */
/* Define to 1 if you have the <stdint.h> header file. */
#ifndef HAVE_STDINT_H
#define HAVE_STDINT_H 1
#endif
/* #undef HAVE_STDINT_H */
/* Define to 1 if you have the <stdlib.h> header file. */
#ifndef HAVE_STDLIB_H
#define HAVE_STDLIB_H 1
#endif
/* #undef HAVE_STDLIB_H */
/* Define to 1 if you have the `strerror' function. */
#ifndef HAVE_STRERROR
#define HAVE_STRERROR 1
#endif
/* #undef HAVE_STRERROR */
/* Define to 1 if you have the <string> header file. */
#ifndef HAVE_STRING
#define HAVE_STRING 1
#endif
/* #undef HAVE_STRING */
/* Define to 1 if you have the <strings.h> header file. */
#ifndef HAVE_STRINGS_H
#define HAVE_STRINGS_H 1
#endif
/* #undef HAVE_STRINGS_H */
/* Define to 1 if you have the <string.h> header file. */
#ifndef HAVE_STRING_H
#define HAVE_STRING_H 1
#endif
/* #undef HAVE_STRING_H */
/* Define to 1 if you have `strtoimax'. */
/* #undef HAVE_STRTOIMAX */
@ -146,46 +124,31 @@ them both to 0; an emulation function will be used. */
/* #undef HAVE_STRTOLL */
/* Define to 1 if you have `strtoq'. */
#ifndef HAVE_STRTOQ
#define HAVE_STRTOQ 1
#endif
/* #undef HAVE_STRTOQ */
/* Define to 1 if you have the <sys/stat.h> header file. */
#ifndef HAVE_SYS_STAT_H
#define HAVE_SYS_STAT_H 1
#endif
/* #undef HAVE_SYS_STAT_H */
/* Define to 1 if you have the <sys/types.h> header file. */
#ifndef HAVE_SYS_TYPES_H
#define HAVE_SYS_TYPES_H 1
#endif
/* #undef HAVE_SYS_TYPES_H */
/* Define to 1 if you have the <type_traits.h> header file. */
/* #undef HAVE_TYPE_TRAITS_H */
/* Define to 1 if you have the <unistd.h> header file. */
#ifndef HAVE_UNISTD_H
#define HAVE_UNISTD_H 1
#endif
/* #undef HAVE_UNISTD_H */
/* Define to 1 if the system has the type `unsigned long long'. */
#ifndef HAVE_UNSIGNED_LONG_LONG
#define HAVE_UNSIGNED_LONG_LONG 1
#endif
/* #undef HAVE_UNSIGNED_LONG_LONG */
/* Define to 1 or 0, depending whether the compiler supports simple visibility
declarations. */
#ifndef HAVE_VISIBILITY
#define HAVE_VISIBILITY 1
#endif
/* Define to 1 if the compiler supports simple visibility declarations. */
/* #undef HAVE_VISIBILITY */
/* Define to 1 if you have the <windows.h> header file. */
/* #undef HAVE_WINDOWS_H */
/* Define to 1 if you have the <zlib.h> header file. */
#ifndef HAVE_ZLIB_H
#define HAVE_ZLIB_H 1
#endif
/* #undef HAVE_ZLIB_H */
/* Define to 1 if you have `_strtoi64'. */
/* #undef HAVE__STRTOI64 */
@ -201,6 +164,7 @@ them both to 0; an emulation function will be used. */
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
/* This is ignored unless you are using libtool. */
#ifndef LT_OBJDIR
#define LT_OBJDIR ".libs/"
#endif
@ -253,9 +217,6 @@ them both to 0; an emulation function will be used. */
#define NEWLINE 10
#endif
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
/* #undef NO_MINUS_C_MINUS_O */
/* PCRE uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited
size. Define NO_RECURSE to any value to get a version that doesn't use
@ -275,7 +236,7 @@ them both to 0; an emulation function will be used. */
#define PACKAGE_NAME "PCRE"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "PCRE 8.32"
#define PACKAGE_STRING "PCRE 8.35"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre"
@ -284,7 +245,14 @@ them both to 0; an emulation function will be used. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "8.32"
#define PACKAGE_VERSION "8.35"
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
stack that is used while compiling a pattern. */
#ifndef PARENS_NEST_LIMIT
#define PARENS_NEST_LIMIT 250
#endif
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
pcregrep to hold parts of the file it is searching. This is also the
@ -325,13 +293,7 @@ them both to 0; an emulation function will be used. */
/* #undef PTHREAD_CREATE_JOINABLE */
/* Define to 1 if you have the ANSI C header files. */
#ifndef STDC_HEADERS
#define STDC_HEADERS 1
#endif
/* Define to allow pcretest and pcregrep to be linked with gcov, so that they
are able to generate code coverage reports. */
/* #undef SUPPORT_GCOV */
/* #undef STDC_HEADERS */
/* Define to any value to enable support for Just-In-Time compiling. */
/* #undef SUPPORT_JIT */
@ -357,9 +319,7 @@ them both to 0; an emulation function will be used. */
/* #undef SUPPORT_PCRE32 */
/* Define to any value to enable the 8 bit PCRE library. */
#ifndef SUPPORT_PCRE8
#define SUPPORT_PCRE8 /**/
#endif
/* #undef SUPPORT_PCRE8 */
/* Define to any value to enable JIT support in pcregrep. */
/* #undef SUPPORT_PCREGREP_JIT */
@ -373,13 +333,11 @@ them both to 0; an emulation function will be used. */
ASCII/UTF-8/16/32, but not both at once. */
/* #undef SUPPORT_UTF */
/* Valgrind support to find invalid memory reads. */
/* Define to any value for valgrind support to find invalid memory reads. */
/* #undef SUPPORT_VALGRIND */
/* Version number of package */
#ifndef VERSION
#define VERSION "8.32"
#endif
#define VERSION "8.35"
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */

View File

@ -5,20 +5,28 @@
can cope with, allowing it to run on SunOS4 and other "close to standard"
systems.
In environments that support the facilities, config.h.in is converted by
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
are going to build PCRE "by hand" without using "configure" or CMake, you
should copy the distributed config.h.generic to config.h, and then edit the
macro definitions to be the way you need them. You must then add
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
at the start of every source.
In environments that support the GNU autotools, config.h.in is converted into
config.h by the "configure" script. In environments that use CMake,
config-cmake.in is converted into config.h. If you are going to build PCRE "by
hand" without using "configure" or CMake, you should copy the distributed
config.h.generic to config.h, and edit the macro definitions to be the way you
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
so that config.h is included at the start of every source.
Alternatively, you can avoid editing by using -D on the compiler command line
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
but if you do, default values will be taken from config.h for non-boolean
macros that are not defined on the command line.
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
them both to 0; an emulation function will be used. */
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
macros are listed as a commented #undef in config.h.generic. Macros such as
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
sure both macros are undefined; an emulation function will then be used. */
/* By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined (to any
@ -133,8 +141,7 @@ them both to 0; an emulation function will be used. */
/* Define to 1 if the system has the type `unsigned long long'. */
#undef HAVE_UNSIGNED_LONG_LONG
/* Define to 1 or 0, depending whether the compiler supports simple visibility
declarations. */
/* Define to 1 if the compiler supports simple visibility declarations. */
#undef HAVE_VISIBILITY
/* Define to 1 if you have the <windows.h> header file. */
@ -195,9 +202,6 @@ them both to 0; an emulation function will be used. */
or -2 (ANYCRLF). */
#undef NEWLINE
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
#undef NO_MINUS_C_MINUS_O
/* PCRE uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited
size. Define NO_RECURSE to any value to get a version that doesn't use
@ -228,6 +232,11 @@ them both to 0; an emulation function will be used. */
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
stack that is used while compiling a pattern. */
#undef PARENS_NEST_LIMIT
/* to make a symbol visible */
#undef PCRECPP_EXP_DECL
@ -284,10 +293,6 @@ them both to 0; an emulation function will be used. */
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Define to allow pcretest and pcregrep to be linked with gcov, so that they
are able to generate code coverage reports. */
#undef SUPPORT_GCOV
/* Define to any value to enable support for Just-In-Time compiling. */
#undef SUPPORT_JIT
@ -326,7 +331,7 @@ them both to 0; an emulation function will be used. */
ASCII/UTF-8/16/32, but not both at once. */
#undef SUPPORT_UTF
/* Valgrind support to find invalid memory reads. */
/* Define to any value for valgrind support to find invalid memory reads. */
#undef SUPPORT_VALGRIND
/* Version number of package */

111
tools/pcre/config.sub vendored
View File

@ -1,24 +1,18 @@
#! /bin/sh
# Configuration validation subroutine script.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
# 2011, 2012 Free Software Foundation, Inc.
# Copyright 1992-2013 Free Software Foundation, Inc.
timestamp='2012-08-18'
timestamp='2013-10-01'
# This file is (in principle) common to ALL GNU software.
# The presence of a machine in this file suggests that SOME GNU software
# can handle that machine. It does not imply ALL GNU software can.
#
# This file is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
@ -26,11 +20,12 @@ timestamp='2012-08-18'
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# the same distribution terms that you use for the rest of that
# program. This Exception is an additional permission under section 7
# of the GNU General Public License, version 3 ("GPLv3").
# Please send patches to <config-patches@gnu.org>. Submit a context
# diff and a properly formatted GNU ChangeLog entry.
# Please send patches with a ChangeLog entry to config-patches@gnu.org.
#
# Configuration subroutine to validate and canonicalize a configuration type.
# Supply the specified configuration type as an argument.
@ -73,9 +68,7 @@ Report bugs and patches to <config-patches@gnu.org>."
version="\
GNU config.sub ($timestamp)
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
Free Software Foundation, Inc.
Copyright 1992-2013 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@ -156,7 +149,7 @@ case $os in
-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-apple | -axis | -knuth | -cray | -microblaze)
-apple | -axis | -knuth | -cray | -microblaze*)
os=
basic_machine=$1
;;
@ -259,21 +252,24 @@ case $basic_machine in
| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
| am33_2.0 \
| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
| be32 | be64 \
| arc | arceb \
| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
| avr | avr32 \
| be32 | be64 \
| bfin \
| c4x | clipper \
| d10v | d30v | dlx | dsp16xx | dvp \
| c4x | c8051 | clipper \
| d10v | d30v | dlx | dsp16xx \
| epiphany \
| fido | fr30 | frv \
| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
| hexagon \
| i370 | i860 | i960 | ia64 \
| ip2k | iq2000 \
| k1om \
| le32 | le64 \
| lm32 \
| m32c | m32r | m32rle | m68000 | m68k | m88k \
| maxq | mb | microblaze | mcore | mep | metag \
| maxq | mb | microblaze | microblazeel | mcore | mep | metag \
| mips | mipsbe | mipseb | mipsel | mipsle \
| mips16 \
| mips64 | mips64el \
@ -291,16 +287,17 @@ case $basic_machine in
| mipsisa64r2 | mipsisa64r2el \
| mipsisa64sb1 | mipsisa64sb1el \
| mipsisa64sr71k | mipsisa64sr71kel \
| mipsr5900 | mipsr5900el \
| mipstx39 | mipstx39el \
| mn10200 | mn10300 \
| moxie \
| mt \
| msp430 \
| nds32 | nds32le | nds32be \
| nios | nios2 \
| nios | nios2 | nios2eb | nios2el \
| ns16k | ns32k \
| open8 \
| or32 \
| or1k | or32 \
| pdp10 | pdp11 | pj | pjl \
| powerpc | powerpc64 | powerpc64le | powerpcle \
| pyramid \
@ -328,7 +325,7 @@ case $basic_machine in
c6x)
basic_machine=tic6x-unknown
;;
m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
basic_machine=$basic_machine-unknown
os=-none
;;
@ -370,13 +367,13 @@ case $basic_machine in
| aarch64-* | aarch64_be-* \
| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \
| avr-* | avr32-* \
| be32-* | be64-* \
| bfin-* | bs2000-* \
| c[123]* | c30-* | [cjt]90-* | c4x-* \
| clipper-* | craynv-* | cydra-* \
| c8051-* | clipper-* | craynv-* | cydra-* \
| d10v-* | d30v-* | dlx-* \
| elxsi-* \
| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
@ -385,11 +382,13 @@ case $basic_machine in
| hexagon-* \
| i*86-* | i860-* | i960-* | ia64-* \
| ip2k-* | iq2000-* \
| k1om-* \
| le32-* | le64-* \
| lm32-* \
| m32c-* | m32r-* | m32rle-* \
| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
| m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
| microblaze-* | microblazeel-* \
| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
| mips16-* \
| mips64-* | mips64el-* \
@ -407,12 +406,13 @@ case $basic_machine in
| mipsisa64r2-* | mipsisa64r2el-* \
| mipsisa64sb1-* | mipsisa64sb1el-* \
| mipsisa64sr71k-* | mipsisa64sr71kel-* \
| mipsr5900-* | mipsr5900el-* \
| mipstx39-* | mipstx39el-* \
| mmix-* \
| mt-* \
| msp430-* \
| nds32-* | nds32le-* | nds32be-* \
| nios-* | nios2-* \
| nios-* | nios2-* | nios2eb-* | nios2el-* \
| none-* | np1-* | ns16k-* | ns32k-* \
| open8-* \
| orion-* \
@ -788,7 +788,7 @@ case $basic_machine in
basic_machine=ns32k-utek
os=-sysv
;;
microblaze)
microblaze*)
basic_machine=microblaze-xilinx
;;
mingw64)
@ -796,7 +796,7 @@ case $basic_machine in
os=-mingw64
;;
mingw32)
basic_machine=i386-pc
basic_machine=i686-pc
os=-mingw32
;;
mingw32ce)
@ -810,24 +810,6 @@ case $basic_machine in
basic_machine=m68k-atari
os=-mint
;;
mipsEE* | ee | ps2)
basic_machine=mips64r5900el-scei
case $os in
-linux*)
;;
*)
os=-elf
;;
esac
;;
iop)
basic_machine=mipsel-scei
os=-irx
;;
dvp)
basic_machine=dvp-scei
os=-elf
;;
mips3*-*)
basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
;;
@ -850,7 +832,7 @@ case $basic_machine in
basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
;;
msys)
basic_machine=i386-pc
basic_machine=i686-pc
os=-msys
;;
mvs)
@ -1041,7 +1023,11 @@ case $basic_machine in
basic_machine=i586-unknown
os=-pw32
;;
rdos)
rdos | rdos64)
basic_machine=x86_64-pc
os=-rdos
;;
rdos32)
basic_machine=i386-pc
os=-rdos
;;
@ -1368,7 +1354,7 @@ case $os in
-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
| -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
| -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
| -sym* | -kopensolaris* \
| -sym* | -kopensolaris* | -plan9* \
| -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
| -aos* | -aros* \
| -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
@ -1386,7 +1372,7 @@ case $os in
| -uxpv* | -beos* | -mpeix* | -udk* \
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
| -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* | -irx* \
| -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
| -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
| -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
| -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
@ -1514,9 +1500,6 @@ case $os in
-aros*)
os=-aros
;;
-kaos*)
os=-kaos
;;
-zvmoe)
os=-zvmoe
;;
@ -1565,6 +1548,9 @@ case $basic_machine in
c4x-* | tic4x-*)
os=-coff
;;
c8051-*)
os=-elf
;;
hexagon-*)
os=-elf
;;
@ -1608,6 +1594,9 @@ case $basic_machine in
mips*-*)
os=-elf
;;
or1k-*)
os=-elf
;;
or32-*)
os=-coff
;;

2144
tools/pcre/configure vendored

File diff suppressed because it is too large Load Diff

View File

@ -9,18 +9,18 @@ dnl The PCRE_PRERELEASE feature is for identifying release candidates. It might
dnl be defined as -RC2, for example. For real releases, it should be empty.
m4_define(pcre_major, [8])
m4_define(pcre_minor, [32])
m4_define(pcre_minor, [35])
m4_define(pcre_prerelease, [])
m4_define(pcre_date, [2012-11-30])
m4_define(pcre_date, [2014-04-04])
# NOTE: The CMakeLists.txt file searches for the above variables in the first
# 50 lines of this file. Please update that if the variables above are moved.
# Libtool shared library interface versions (current:revision:age)
m4_define(libpcre_version, [3:0:2])
m4_define(libpcre16_version, [2:0:2])
m4_define(libpcre32_version, [0:0:0])
m4_define(libpcreposix_version, [0:1:0])
m4_define(libpcre_version, [3:3:2])
m4_define(libpcre16_version, [2:3:2])
m4_define(libpcre32_version, [0:3:0])
m4_define(libpcreposix_version, [0:2:0])
m4_define(libpcrecpp_version, [0:0:0])
AC_PREREQ(2.57)
@ -30,6 +30,9 @@ AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AC_CONFIG_HEADERS(config.h)
# This is a new thing required to stop a warning from automake 1.12
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
# This was added at the suggestion of libtoolize (03-Jan-10)
AC_CONFIG_MACRO_DIR([m4])
@ -245,7 +248,7 @@ AC_ARG_ENABLE(pcregrep-libbz2,
# Handle --with-pcregrep-bufsize=N
AC_ARG_WITH(pcregrep-bufsize,
AS_HELP_STRING([--with-pcregrep-bufsize=N],
[pcregrep buffer size (default=20480)]),
[pcregrep buffer size (default=20480, minimum=8192)]),
, with_pcregrep_bufsize=20480)
# Handle --enable-pcretest-libedit
@ -272,6 +275,12 @@ AC_ARG_WITH(link-size,
[internal link size (2, 3, or 4 allowed; default=2)]),
, with_link_size=2)
# Handle --with-parens-nest-limit=N
AC_ARG_WITH(parens-nest-limit,
AS_HELP_STRING([--with-parens-nest-limit=N],
[nested parentheses limit (default=250)]),
, with_parens_nest_limit=250)
# Handle --with-match-limit=N
AC_ARG_WITH(match-limit,
AS_HELP_STRING([--with-match-limit=N],
@ -427,24 +436,33 @@ AH_TOP([
can cope with, allowing it to run on SunOS4 and other "close to standard"
systems.
In environments that support the facilities, config.h.in is converted by
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
are going to build PCRE "by hand" without using "configure" or CMake, you
should copy the distributed config.h.generic to config.h, and then edit the
macro definitions to be the way you need them. You must then add
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
at the start of every source.
In environments that support the GNU autotools, config.h.in is converted into
config.h by the "configure" script. In environments that use CMake,
config-cmake.in is converted into config.h. If you are going to build PCRE "by
hand" without using "configure" or CMake, you should copy the distributed
config.h.generic to config.h, and edit the macro definitions to be the way you
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
so that config.h is included at the start of every source.
Alternatively, you can avoid editing by using -D on the compiler command line
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
but if you do, default values will be taken from config.h for non-boolean
macros that are not defined on the command line.
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
them both to 0; an emulation function will be used. */])
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
macros are listed as a commented #undef in config.h.generic. Macros such as
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
sure both macros are undefined; an emulation function will then be used. */])
# Checks for header files.
AC_HEADER_STDC
AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h windows.h)
AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h)
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
# The files below are C++ header files.
pcre_have_type_traits="0"
@ -669,11 +687,15 @@ if test "$enable_pcre32" = "yes"; then
Define to any value to enable the 32 bit PCRE library.])
fi
# Unless running under Windows, JIT support requires pthreads.
if test "$enable_jit" = "yes"; then
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
CC="$PTHREAD_CC"
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
LIBS="$PTHREAD_LIBS $LIBS"
if test "$HAVE_WINDOWS_H" != "1"; then
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
CC="$PTHREAD_CC"
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
LIBS="$PTHREAD_LIBS $LIBS"
fi
AC_DEFINE([SUPPORT_JIT], [], [
Define to any value to enable support for Just-In-Time compiling.])
else
@ -722,7 +744,12 @@ if test "$enable_pcregrep_libbz2" = "yes"; then
fi
if test $with_pcregrep_bufsize -lt 8192 ; then
AC_MSG_WARN([$with_pcregrep_bufsize is too small for --with-pcregrep-bufsize; using 8192])
with_pcregrep_bufsize="8192"
else
if test $? -gt 1 ; then
AC_MSG_ERROR([Bad value for --with-pcregrep-bufsize])
fi
fi
AC_DEFINE_UNQUOTED([PCREGREP_BUFSIZE], [$with_pcregrep_bufsize], [
@ -773,6 +800,11 @@ AC_DEFINE_UNQUOTED([POSIX_MALLOC_THRESHOLD], [$with_posix_malloc_threshold], [
faster than using malloc() for each call. The threshold above which
the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD.])
AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
stack that is used while compiling a pattern.])
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
The value of MATCH_LIMIT determines the default number of times the
internal match() function can be called during a single execution of
@ -838,7 +870,7 @@ fi
if test "$enable_valgrind" = "yes"; then
AC_DEFINE_UNQUOTED([SUPPORT_VALGRIND], [], [
Valgrind support to find invalid memory reads.])
Define to any value for valgrind support to find invalid memory reads.])
fi
# Platform specific issues
@ -946,7 +978,7 @@ if test "$enable_pcretest_libreadline" = "yes"; then
fi
fi
# Check for valgrind
# Handle valgrind support
if test "$enable_valgrind" = "yes"; then
m4_ifdef([PKG_CHECK_MODULES],
@ -954,7 +986,7 @@ if test "$enable_valgrind" = "yes"; then
[AC_MSG_ERROR([pkg-config not supported])])
fi
# test code coverage reporting
# Handle code coverage reporting support
if test "$enable_coverage" = "yes"; then
if test "x$GCC" != "xyes"; then
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
@ -985,11 +1017,7 @@ if test "$enable_coverage" = "yes"; then
AC_MSG_ERROR([genhtml not found])
fi
AC_DEFINE([SUPPORT_GCOV],[1], [
Define to allow pcretest and pcregrep to be linked with gcov, so that they
are able to generate code coverage reports.])
# And add flags needed for gcov
# Set flags needed for gcov
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
GCOV_LIBS="-lgcov"
@ -1064,6 +1092,7 @@ $PACKAGE-$VERSION configuration summary:
Use stack recursion ............. : ${enable_stack_for_recursion}
POSIX mem threshold ............. : ${with_posix_malloc_threshold}
Internal link size .............. : ${with_link_size}
Nested parentheses limit ........ : ${with_parens_nest_limit}
Match limit ..................... : ${with_match_limit}
Match limit recursion ........... : ${with_match_limit_recursion}
Build shared libs ............... : ${enable_shared}

View File

@ -1,10 +1,9 @@
#! /bin/sh
# depcomp - compile a program generating dependencies as side-effects
scriptversion=2012-03-27.16; # UTC
scriptversion=2013-05-30.07; # UTC
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009, 2010,
# 2011, 2012 Free Software Foundation, Inc.
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -28,9 +27,9 @@ scriptversion=2012-03-27.16; # UTC
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
@ -57,11 +56,65 @@ EOF
;;
esac
# Get the directory component of the given path, and save it in the
# global variables '$dir'. Note that this directory component will
# be either empty or ending with a '/' character. This is deliberate.
set_dir_from ()
{
case $1 in
*/*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
*) dir=;;
esac
}
# Get the suffix-stripped basename of the given path, and save it the
# global variable '$base'.
set_base_from ()
{
base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
}
# If no dependency file was actually created by the compiler invocation,
# we still have to create a dummy depfile, to avoid errors with the
# Makefile "include basename.Plo" scheme.
make_dummy_depfile ()
{
echo "#dummy" > "$depfile"
}
# Factor out some common post-processing of the generated depfile.
# Requires the auxiliary global variable '$tmpdepfile' to be set.
aix_post_process_depfile ()
{
# If the compiler actually managed to produce a dependency file,
# post-process it.
if test -f "$tmpdepfile"; then
# Each line is of the form 'foo.o: dependency.h'.
# Do two passes, one to just change these to
# $object: dependency.h
# and one to simply output
# dependency.h:
# which is needed to avoid the deleted-header problem.
{ sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
} > "$depfile"
rm -f "$tmpdepfile"
else
make_dummy_depfile
fi
}
# A tabulation character.
tab=' '
# A newline character.
nl='
'
# Character ranges might be problematic outside the C locale.
# These definitions help.
upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
lower=abcdefghijklmnopqrstuvwxyz
digits=0123456789
alpha=${upper}${lower}
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
echo "depcomp: Variables source, object and depmode must be set" 1>&2
@ -75,6 +128,9 @@ tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
rm -f "$tmpdepfile"
# Avoid interferences from the environment.
gccflag= dashmflag=
# Some modes work just like other modes, but use different flags. We
# parameterize here, but still list the modes in the big case below,
# to make depend.m4 easier to write. Note that we *cannot* use a case
@ -86,32 +142,32 @@ if test "$depmode" = hp; then
fi
if test "$depmode" = dashXmstdout; then
# This is just like dashmstdout with a different argument.
dashmflag=-xM
depmode=dashmstdout
# This is just like dashmstdout with a different argument.
dashmflag=-xM
depmode=dashmstdout
fi
cygpath_u="cygpath -u -f -"
if test "$depmode" = msvcmsys; then
# This is just like msvisualcpp but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvisualcpp
# This is just like msvisualcpp but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvisualcpp
fi
if test "$depmode" = msvc7msys; then
# This is just like msvc7 but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvc7
# This is just like msvc7 but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvc7
fi
if test "$depmode" = xlc; then
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency informations.
gccflag=-qmakedep=gcc,-MF
depmode=gcc
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
gccflag=-qmakedep=gcc,-MF
depmode=gcc
fi
case "$depmode" in
@ -134,8 +190,7 @@ gcc3)
done
"$@"
stat=$?
if test $stat -eq 0; then :
else
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
@ -143,13 +198,17 @@ gcc3)
;;
gcc)
## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
## (see the conditional assignment to $gccflag above).
## There are various ways to get dependency output from gcc. Here's
## why we pick this rather obscure method:
## - Don't want to use -MD because we'd like the dependencies to end
## up in a subdir. Having to rename by hand is ugly.
## (We might end up doing this anyway to support other compilers.)
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
## -MM, not -M (despite what the docs say).
## -MM, not -M (despite what the docs say). Also, it might not be
## supported by the other compilers which use the 'gcc' depmode.
## - Using -M directly means running the compiler twice (even worse
## than renaming).
if test -z "$gccflag"; then
@ -157,15 +216,14 @@ gcc)
fi
"$@" -Wp,"$gccflag$tmpdepfile"
stat=$?
if test $stat -eq 0; then :
else
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
## The second -e expression handles DOS-style file names with drive letters.
# The second -e expression handles DOS-style file names with drive
# letters.
sed -e 's/^[^:]*: / /' \
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
## This next piece of magic avoids the "deleted header file" problem.
@ -174,15 +232,15 @@ gcc)
## typically no way to rebuild the header). We avoid this by adding
## dummy dependencies for each header file. Too bad gcc doesn't do
## this for us directly.
tr ' ' "$nl" < "$tmpdepfile" |
## Some versions of gcc put a space before the ':'. On the theory
## that the space means something, we add a space to the output as
## well. hp depmode also adds that space, but also prefixes the VPATH
## to the object. Take care to not repeat it in the output.
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
@ -200,8 +258,7 @@ sgi)
"$@" -MDupdate "$tmpdepfile"
fi
stat=$?
if test $stat -eq 0; then :
else
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
@ -209,7 +266,6 @@ sgi)
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
echo "$object : \\" > "$depfile"
# Clip off the initial element (the dependent). Don't try to be
# clever and replace this with sed code, as IRIX sed won't handle
# lines with more than a fixed number of characters (4096 in
@ -217,19 +273,15 @@ sgi)
# the IRIX cc adds comments like '#:fec' to the end of the
# dependency line.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
tr "$nl" ' ' >> "$depfile"
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
| tr "$nl" ' ' >> "$depfile"
echo >> "$depfile"
# The second pass generates a dummy entry for each header file.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
>> "$depfile"
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
>> "$depfile"
else
# The sourcefile does not contain any dependencies, so just
# store a dummy comment line, to avoid errors with the Makefile
# "include basename.Plo" scheme.
echo "#dummy" > "$depfile"
make_dummy_depfile
fi
rm -f "$tmpdepfile"
;;
@ -247,9 +299,8 @@ aix)
# current directory. Also, the AIX compiler puts '$object:' at the
# start of each line; $object doesn't have directory information.
# Version 6 uses the directory in both cases.
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
test "x$dir" = "x$object" && dir=
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.u
tmpdepfile2=$base.u
@ -262,9 +313,7 @@ aix)
"$@" -M
fi
stat=$?
if test $stat -eq 0; then :
else
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
@ -273,65 +322,113 @@ aix)
do
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
# Each line is of the form 'foo.o: dependent.h'.
# Do two passes, one to just change these to
# '$object: dependent.h' and one to simply 'dependent.h:'.
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
else
# The sourcefile does not contain any dependencies, so just
# store a dummy comment line, to avoid errors with the Makefile
# "include basename.Plo" scheme.
echo "#dummy" > "$depfile"
fi
rm -f "$tmpdepfile"
aix_post_process_depfile
;;
icc)
# Intel's C compiler anf tcc (Tiny C Compiler) understand '-MD -MF file'.
# However on
# $CC -MD -MF foo.d -c -o sub/foo.o sub/foo.c
# ICC 7.0 will fill foo.d with something like
# foo.o: sub/foo.c
# foo.o: sub/foo.h
# which is wrong. We want
# sub/foo.o: sub/foo.c
# sub/foo.o: sub/foo.h
# sub/foo.c:
# sub/foo.h:
# ICC 7.1 will output
# foo.o: sub/foo.c sub/foo.h
# and will wrap long lines using '\':
# foo.o: sub/foo.c ... \
# sub/foo.h ... \
# ...
# tcc 0.9.26 (FIXME still under development at the moment of writing)
# will emit a similar output, but also prepend the continuation lines
# with horizontal tabulation characters.
tcc)
# tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
# FIXME: That version still under development at the moment of writing.
# Make that this statement remains true also for stable, released
# versions.
# It will wrap lines (doesn't matter whether long or short) with a
# trailing '\', as in:
#
# foo.o : \
# foo.c \
# foo.h \
#
# It will put a trailing '\' even on the last line, and will use leading
# spaces rather than leading tabs (at least since its commit 0394caf7
# "Emit spaces for -MD").
"$@" -MD -MF "$tmpdepfile"
stat=$?
if test $stat -eq 0; then :
else
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each line is of the form 'foo.o: dependent.h',
# or 'foo.o: dep1.h dep2.h \', or ' dep3.h dep4.h \'.
# Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
# We have to change lines of the first kind to '$object: \'.
sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
# And for each line of the second kind, we have to emit a 'dep.h:'
# dummy dependency, to avoid the deleted-header problem.
sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
rm -f "$tmpdepfile"
;;
## The order of this option in the case statement is important, since the
## shell code in configure will try each of these formats in the order
## listed in this file. A plain '-MD' option would be understood by many
## compilers, so we must ensure this comes after the gcc and icc options.
pgcc)
# Portland's C compiler understands '-MD'.
# Will always output deps to 'file.d' where file is the root name of the
# source file under compilation, even if file resides in a subdirectory.
# The object file name does not affect the name of the '.d' file.
# pgcc 10.2 will output
# foo.o: sub/foo.c sub/foo.h
# and will wrap long lines using '\' :
# foo.o: sub/foo.c ... \
# sub/foo.h ... \
# ...
set_dir_from "$object"
# Use the source, not the object, to determine the base name, since
# that's sadly what pgcc will do too.
set_base_from "$source"
tmpdepfile=$base.d
# For projects that build the same source file twice into different object
# files, the pgcc approach of using the *source* file root name can cause
# problems in parallel builds. Use a locking strategy to avoid stomping on
# the same $tmpdepfile.
lockdir=$base.d-lock
trap "
echo '$0: caught signal, cleaning up...' >&2
rmdir '$lockdir'
exit 1
" 1 2 13 15
numtries=100
i=$numtries
while test $i -gt 0; do
# mkdir is a portable test-and-set.
if mkdir "$lockdir" 2>/dev/null; then
# This process acquired the lock.
"$@" -MD
stat=$?
# Release the lock.
rmdir "$lockdir"
break
else
# If the lock is being held by a different process, wait
# until the winning process is done or we timeout.
while test -d "$lockdir" && test $i -gt 0; do
sleep 1
i=`expr $i - 1`
done
fi
i=`expr $i - 1`
done
trap - 1 2 13 15
if test $i -le 0; then
echo "$0: failed to acquire lock after $numtries attempts" >&2
echo "$0: check lockdir '$lockdir'" >&2
exit 1
fi
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each line is of the form `foo.o: dependent.h',
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
# Do two passes, one to just change these to
# '$object: dependent.h' and one to simply 'dependent.h:'.
sed -e "s/^[ $tab][ $tab]*/ /" -e "s,^[^:]*:,$object :," \
< "$tmpdepfile" > "$depfile"
sed '
s/[ '"$tab"'][ '"$tab"']*/ /g
s/^ *//
s/ *\\*$//
s/^[^:]*: *//
/^$/d
/:$/d
s/$/ :/
' < "$tmpdepfile" >> "$depfile"
# `$object: dependent.h' and one to simply `dependent.h:'.
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process this invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
@ -342,9 +439,8 @@ hp2)
# 'foo.d', which lands next to the object file, wherever that
# happens to be.
# Much of this is similar to the tru64 case; see comments there.
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
test "x$dir" = "x$object" && dir=
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir.libs/$base.d
@ -355,8 +451,7 @@ hp2)
"$@" +Maked
fi
stat=$?
if test $stat -eq 0; then :
else
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2"
exit $stat
fi
@ -366,76 +461,61 @@ hp2)
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
# Add 'dependent.h:' lines.
sed -ne '2,${
s/^ *//
s/ \\*$//
s/$/:/
p
}' "$tmpdepfile" >> "$depfile"
s/^ *//
s/ \\*$//
s/$/:/
p
}' "$tmpdepfile" >> "$depfile"
else
echo "#dummy" > "$depfile"
make_dummy_depfile
fi
rm -f "$tmpdepfile" "$tmpdepfile2"
;;
tru64)
# The Tru64 compiler uses -MD to generate dependencies as a side
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
# dependencies in 'foo.d' instead, so we check for that too.
# Subdirectories are respected.
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
test "x$dir" = "x$object" && dir=
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
# The Tru64 compiler uses -MD to generate dependencies as a side
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
# dependencies in 'foo.d' instead, so we check for that too.
# Subdirectories are respected.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
# With Tru64 cc, shared objects can also be used to make a
# static library. This mechanism is used in libtool 1.4 series to
# handle both shared and static libraries in a single compilation.
# With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d.
#
# With libtool 1.5 this exception was removed, and libtool now
# generates 2 separate objects for the 2 libraries. These two
# compilations output dependencies in $dir.libs/$base.o.d and
# in $dir$base.o.d. We have to check for both files, because
# one of the two compilations can be disabled. We should prefer
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
# automatically cleaned when .libs/ is deleted, while ignoring
# the former would cause a distcleancheck panic.
tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4
tmpdepfile2=$dir$base.o.d # libtool 1.5
tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5
tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504
"$@" -Wc,-MD
else
tmpdepfile1=$dir$base.o.d
tmpdepfile2=$dir$base.d
tmpdepfile3=$dir$base.d
tmpdepfile4=$dir$base.d
"$@" -MD
fi
if test "$libtool" = yes; then
# Libtool generates 2 separate objects for the 2 libraries. These
# two compilations output dependencies in $dir.libs/$base.o.d and
# in $dir$base.o.d. We have to check for both files, because
# one of the two compilations can be disabled. We should prefer
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
# automatically cleaned when .libs/ is deleted, while ignoring
# the former would cause a distcleancheck panic.
tmpdepfile1=$dir$base.o.d # libtool 1.5
tmpdepfile2=$dir.libs/$base.o.d # Likewise.
tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504
"$@" -Wc,-MD
else
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir$base.d
tmpdepfile3=$dir$base.d
"$@" -MD
fi
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
exit $stat
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
do
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
else
echo "#dummy" > "$depfile"
fi
rm -f "$tmpdepfile"
;;
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
do
test -f "$tmpdepfile" && break
done
# Same post-processing that is required for AIX mode.
aix_post_process_depfile
;;
msvc7)
if test "$libtool" = yes; then
@ -446,8 +526,7 @@ msvc7)
"$@" $showIncludes > "$tmpdepfile"
stat=$?
grep -v '^Note: including file: ' "$tmpdepfile"
if test "$stat" = 0; then :
else
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
@ -473,6 +552,7 @@ $ {
G
p
}' >> "$depfile"
echo >> "$depfile" # make sure the fragment doesn't end with a backslash
rm -f "$tmpdepfile"
;;
@ -524,13 +604,14 @@ dashmstdout)
# in the target name. This is to cope with DOS-style filenames:
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
"$@" $dashmflag |
sed 's:^['"$tab"' ]*[^:'"$tab"' ][^:][^:]*\:['"$tab"' ]*:'"$object"'\: :' > "$tmpdepfile"
sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
rm -f "$depfile"
cat < "$tmpdepfile" > "$depfile"
tr ' ' "$nl" < "$tmpdepfile" | \
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
# Some versions of the HPUX 10.20 sed can't process this sed invocation
# correctly. Breaking it into two sed invocations is a workaround.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
@ -583,10 +664,12 @@ makedepend)
# makedepend may prepend the VPATH from the source file name to the object.
# No need to regex-escape $object, excess matching of '.' is harmless.
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
sed '1,2d' "$tmpdepfile" | tr ' ' "$nl" | \
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
# Some versions of the HPUX 10.20 sed can't process the last invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed '1,2d' "$tmpdepfile" \
| tr ' ' "$nl" \
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile" "$tmpdepfile".bak
;;
@ -622,10 +705,10 @@ cpp)
esac
done
"$@" -E |
sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
sed '$ s: \\$::' > "$tmpdepfile"
"$@" -E \
| sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
| sed '$ s: \\$::' > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
cat < "$tmpdepfile" >> "$depfile"
@ -657,15 +740,15 @@ msvisualcpp)
shift
;;
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
set fnord "$@"
shift
shift
;;
set fnord "$@"
shift
shift
;;
*)
set fnord "$@" "$arg"
shift
shift
;;
set fnord "$@" "$arg"
shift
shift
;;
esac
done
"$@" -E 2>/dev/null |

View File

@ -0,0 +1,764 @@
Building PCRE without using autotools
-------------------------------------
This document contains the following sections:
General
Generic instructions for the PCRE C library
The C++ wrapper functions
Building for virtual Pascal
Stack size in Windows environments
Linking programs in Windows environments
Calling conventions in Windows environments
Comments about Win32 builds
Building PCRE on Windows with CMake
Use of relative paths with CMake on Windows
Testing with RunTest.bat
Building under Windows CE with Visual Studio 200x
Building under Windows with BCC5.5
Building using Borland C++ Builder 2007 (CB2007) and higher
Building PCRE on OpenVMS
Building PCRE on Stratus OpenVOS
Building PCRE on native z/OS and z/VM
GENERAL
I (Philip Hazel) have no experience of Windows or VMS sytems and how their
libraries work. The items in the PCRE distribution and Makefile that relate to
anything other than Linux systems are untested by me.
There are some other comments and files (including some documentation in CHM
format) in the Contrib directory on the FTP site:
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
The basic PCRE library consists entirely of code written in Standard C, and so
should compile successfully on any system that has a Standard C compiler and
library. The C++ wrapper functions are a separate issue (see below).
The PCRE distribution includes a "configure" file for use by the configure/make
(autotools) build system, as found in many Unix-like environments. The README
file contains information about the options for "configure".
There is also support for CMake, which some users prefer, especially in Windows
environments, though it can also be run in Unix-like environments. See the
section entitled "Building PCRE on Windows with CMake" below.
Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
names config.h.generic and pcre.h.generic. These are provided for those who
build PCRE without using "configure" or CMake. If you use "configure" or CMake,
the .generic versions are not used.
GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
The following are generic instructions for building the PCRE C library "by
hand". If you are going to use CMake, this section does not apply to you; you
can skip ahead to the CMake section.
(1) Copy or rename the file config.h.generic as config.h, and edit the macro
settings that it contains to whatever is appropriate for your environment.
In particular, you can alter the definition of the NEWLINE macro to
specify what character(s) you want to be interpreted as line terminators.
In an EBCDIC environment, you MUST change NEWLINE, because its default
value is 10, an ASCII LF. The usual EBCDIC newline character is 21 (0x15,
NL), though in some cases it may be 37 (0x25).
When you compile any of the PCRE modules, you must specify -DHAVE_CONFIG_H
to your compiler so that config.h is included in the sources.
An alternative approach is not to edit config.h, but to use -D on the
compiler command line to make any changes that you need to the
configuration options. In this case -DHAVE_CONFIG_H must not be set.
NOTE: There have been occasions when the way in which certain parameters
in config.h are used has changed between releases. (In the configure/make
world, this is handled automatically.) When upgrading to a new release,
you are strongly advised to review config.h.generic before re-using what
you had previously.
(2) Copy or rename the file pcre.h.generic as pcre.h.
(3) EITHER:
Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
OR:
Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if
you have set up config.h), and then run it with the single argument
"pcre_chartables.c". This generates a set of standard character tables
and writes them to that file. The tables are generated using the default
C locale for your system. If you want to use a locale that is specified
by LC_xxx environment variables, add the -L option to the dftables
command. You must use this method if you are building on a system that
uses EBCDIC code.
The tables in pcre_chartables.c are defaults. The caller of PCRE can
specify alternative tables at run time.
(4) Ensure that you have the following header files:
pcre_internal.h
ucp.h
(5) For an 8-bit library, compile the following source files, setting
-DHAVE_CONFIG_H as a compiler option if you have set up config.h with your
configuration, or else use other -D settings to change the configuration
as required.
pcre_byte_order.c
pcre_chartables.c
pcre_compile.c
pcre_config.c
pcre_dfa_exec.c
pcre_exec.c
pcre_fullinfo.c
pcre_get.c
pcre_globals.c
pcre_jit_compile.c
pcre_maketables.c
pcre_newline.c
pcre_ord2utf8.c
pcre_refcount.c
pcre_string_utils.c
pcre_study.c
pcre_tables.c
pcre_ucd.c
pcre_valid_utf8.c
pcre_version.c
pcre_xclass.c
Make sure that you include -I. in the compiler command (or equivalent for
an unusual compiler) so that all included PCRE header files are first
sought in the current directory. Otherwise you run the risk of picking up
a previously-installed file from somewhere else.
Note that you must still compile pcre_jit_compile.c, even if you have not
defined SUPPORT_JIT in config.h, because when JIT support is not
configured, dummy functions are compiled. When JIT support IS configured,
pcre_jit_compile.c #includes sources from the sljit subdirectory, where
there should be 16 files, all of whose names begin with "sljit".
(6) Now link all the compiled code into an object library in whichever form
your system keeps such libraries. This is the basic PCRE C 8-bit library.
If your system has static and shared libraries, you may have to do this
once for each type.
(7) If you want to build a 16-bit library (as well as, or instead of the 8-bit
or 32-bit libraries) repeat steps 5-6 with the following files:
pcre16_byte_order.c
pcre16_chartables.c
pcre16_compile.c
pcre16_config.c
pcre16_dfa_exec.c
pcre16_exec.c
pcre16_fullinfo.c
pcre16_get.c
pcre16_globals.c
pcre16_jit_compile.c
pcre16_maketables.c
pcre16_newline.c
pcre16_ord2utf16.c
pcre16_refcount.c
pcre16_string_utils.c
pcre16_study.c
pcre16_tables.c
pcre16_ucd.c
pcre16_utf16_utils.c
pcre16_valid_utf16.c
pcre16_version.c
pcre16_xclass.c
(8) If you want to build a 32-bit library (as well as, or instead of the 8-bit
or 16-bit libraries) repeat steps 5-6 with the following files:
pcre32_byte_order.c
pcre32_chartables.c
pcre32_compile.c
pcre32_config.c
pcre32_dfa_exec.c
pcre32_exec.c
pcre32_fullinfo.c
pcre32_get.c
pcre32_globals.c
pcre32_jit_compile.c
pcre32_maketables.c
pcre32_newline.c
pcre32_ord2utf32.c
pcre32_refcount.c
pcre32_string_utils.c
pcre32_study.c
pcre32_tables.c
pcre32_ucd.c
pcre32_utf32_utils.c
pcre32_valid_utf32.c
pcre32_version.c
pcre32_xclass.c
(9) If you want to build the POSIX wrapper functions (which apply only to the
8-bit library), ensure that you have the pcreposix.h file and then compile
pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result
(on its own) as the pcreposix library.
(10) The pcretest program can be linked with any combination of the 8-bit,
16-bit and 32-bit libraries (depending on what you selected in config.h).
Compile pcretest.c and pcre_printint.c (again, don't forget
-DHAVE_CONFIG_H) and link them together with the appropriate library/ies.
If you compiled an 8-bit library, pcretest also needs the pcreposix
wrapper library unless you compiled it with -DNOPOSIX.
(11) Run pcretest on the testinput files in the testdata directory, and check
that the output matches the corresponding testoutput files. There are
comments about what each test does in the section entitled "Testing PCRE"
in the README file. If you compiled more than one of the 8-bit, 16-bit and
32-bit libraries, you need to run pcretest with the -16 option to do
16-bit tests and with the -32 option to do 32-bit tests.
Some tests are relevant only when certain build-time options are selected.
For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run
if you have built PCRE without it. See the comments at the start of each
testinput file. If you have a suitable Unix-like shell, the RunTest script
will run the appropriate tests for you. The command "RunTest list" will
output a list of all the tests.
Note that the supplied files are in Unix format, with just LF characters
as line terminators. You may need to edit them to change this if your
system uses a different convention. If you are using Windows, you probably
should use the wintestinput3 file instead of testinput3 (and the
corresponding output file). This is a locale test; wintestinput3 sets the
locale to "french" rather than "fr_FR", and there some minor output
differences.
(12) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested
by the testdata files. However, you might also like to build and run
the freestanding JIT test program, pcre_jit_test.c.
(13) If you want to use the pcregrep command, compile and link pcregrep.c; it
uses only the basic 8-bit PCRE library (it does not need the pcreposix
library).
THE C++ WRAPPER FUNCTIONS
The PCRE distribution also contains some C++ wrapper functions and tests,
applicable to the 8-bit library, which were contributed by Google Inc. On a
system that can use "configure" and "make", the functions are automatically
built into a library called pcrecpp. It should be straightforward to compile
the .cc files manually on other systems. The files called xxx_unittest.cc are
test programs for each of the corresponding xxx.cc files.
BUILDING FOR VIRTUAL PASCAL
A script for building PCRE using Borland's C++ compiler for use with VPASCAL
was contributed by Alexander Tokarev. Stefan Weber updated the script and added
additional files. The following files in the distribution are for building PCRE
for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
STACK SIZE IN WINDOWS ENVIRONMENTS
The default processor stack size of 1Mb in some Windows environments is too
small for matching patterns that need much recursion. In particular, test 2 may
fail because of this. Normally, running out of stack causes a crash, but there
have been cases where the test program has just died silently. See your linker
documentation for how to increase stack size if you experience problems. The
Linux default of 8Mb is a reasonable choice for the stack, though even that can
be too small for some pattern/subject combinations.
PCRE has a compile configuration option to disable the use of stack for
recursion so that heap is used instead. However, pattern matching is
significantly slower when this is done. There is more about stack usage in the
"pcrestack" documentation.
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
If you want to statically link a program against a PCRE library in the form of
a non-dll .a file, you must define PCRE_STATIC before including pcre.h or
pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will
be declared __declspec(dllimport), with unwanted results.
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
It is possible to compile programs to use different calling conventions using
MSVC. Search the web for "calling conventions" for more information. To make it
easier to change the calling convention for the exported functions in the
PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
not set, it defaults to empty; the default calling convention is then used
(which is what is wanted most of the time).
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE ON WINDOWS WITH CMAKE")
There are two ways of building PCRE using the "configure, make, make install"
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
the same thing; they are completely different from each other. There is also
support for building using CMake, which some users find a more straightforward
way of building PCRE under Windows.
The MinGW home page (http://www.mingw.org/) says this:
MinGW: A collection of freely available and freely distributable Windows
specific header files and import libraries combined with GNU toolsets that
allow one to produce native Windows programs that do not rely on any
3rd-party C runtime DLLs.
The Cygwin home page (http://www.cygwin.com/) says this:
Cygwin is a Linux-like environment for Windows. It consists of two parts:
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
substantial Linux API functionality
. A collection of tools which provide Linux look and feel.
The Cygwin DLL currently works with all recent, commercially released x86 32
bit and 64 bit versions of Windows, with the exception of Windows CE.
On both MinGW and Cygwin, PCRE should build correctly using:
./configure && make && make install
This should create two libraries called libpcre and libpcreposix, and, if you
have enabled building the C++ wrapper, a third one called libpcrecpp. These are
independent libraries: when you link with libpcreposix or libpcrecpp you must
also link with libpcre, which contains the basic functions. (Some earlier
releases of PCRE included the basic libpcre functions in libpcreposix. This no
longer happens.)
A user submitted a special-purpose patch that makes it easy to create
"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll"
as a special target. If you use this target, no other files are built, and in
particular, the pcretest and pcregrep programs are not built. An example of how
this might be used is:
./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll
Using Cygwin's compiler generates libraries and executables that depend on
cygwin1.dll. If a library that is generated this way is distributed,
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
licence, this forces not only PCRE to be under the GPL, but also the entire
application. A distributor who wants to keep their own code proprietary must
purchase an appropriate Cygwin licence.
MinGW has no such restrictions. The MinGW compiler generates a library or
executable that can run standalone on Windows without any third party dll or
licensing issues.
But there is more complication:
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
gcc and MinGW's gcc). So, a user can:
. Build native binaries by using MinGW or by getting Cygwin and using
-mno-cygwin.
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
compiler flags.
The test files that are supplied with PCRE are in UNIX format, with LF
characters as line terminators. Unless your PCRE library uses a default newline
option that includes LF as a valid newline, it may be necessary to change the
line terminators in the test files to get some of the tests to work.
BUILDING PCRE ON WINDOWS WITH CMAKE
CMake is an alternative configuration facility that can be used instead of
"configure". CMake creates project files (make files, solution files, etc.)
tailored to numerous development environments, including Visual Studio,
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
spaces in the names for your CMake installation and your PCRE source and build
directories.
The following instructions were contributed by a PCRE user. If they are not
followed exactly, errors may occur. In the event that errors do occur, it is
recommended that you delete the CMake cache before attempting to repeat the
CMake build process. In the CMake GUI, the cache can be deleted by selecting
"File > Delete Cache".
1. Install the latest CMake version available from http://www.cmake.org/, and
ensure that cmake\bin is on your path.
2. Unzip (retaining folder structure) the PCRE source tree into a source
directory such as C:\pcre. You should ensure your local date and time
is not earlier than the file dates in your source dir if the release is
very new.
3. Create a new, empty build directory, preferably a subdirectory of the
source dir. For example, C:\pcre\pcre-xx\build.
4. Run cmake-gui from the Shell envirornment of your build tool, for example,
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
to start Cmake from the Windows Start menu, as this can lead to errors.
5. Enter C:\pcre\pcre-xx and C:\pcre\pcre-xx\build for the source and build
directories, respectively.
6. Hit the "Configure" button.
7. Select the particular IDE / build tool that you are using (Visual
Studio, MSYS makefiles, MinGW makefiles, etc.)
8. The GUI will then list several configuration options. This is where
you can enable UTF-8 support or other PCRE optional features.
9. Hit "Configure" again. The adjacent "Generate" button should now be
active.
10. Hit "Generate".
11. The build directory should now contain a usable build system, be it a
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
cmake-gui and use the generated build system with your compiler or IDE.
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE
solution, select the desired configuration (Debug, or Release, etc.) and
build the ALL_BUILD project.
12. If during configuration with cmake-gui you've elected to build the test
programs, you can execute them by building the test project. E.g., for
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
most recent build configuration is targeted by the tests. A summary of
test results is presented. Complete test output is subsequently
available for review in Testing\Temporary under your build dir.
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
A PCRE user comments as follows: I thought that others may want to know the
current state of CMAKE_USE_RELATIVE_PATHS support on Windows. Here it is:
-- AdditionalIncludeDirectories is only partially modified (only the
first path - see below)
-- Only some of the contained file paths are modified - shown below for
pcre.vcproj
-- It properly modifies
I am sure CMake people can fix that if they want to. Until then one will
need to replace existing absolute paths in project files with relative
paths manually (e.g. from VS) - relative to project file location. I did
just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big
deal.
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
RelativePath="pcre.h"
RelativePath="pcre_chartables.c"
RelativePath="pcre_chartables.c.rule"
TESTING WITH RUNTEST.BAT
If configured with CMake, building the test project ("make test" or building
ALL_TESTS in Visual Studio) creates (and runs) pcre_test.bat (and depending
on your configuration options, possibly other test programs) in the build
directory. Pcre_test.bat runs RunTest.Bat with correct source and exe paths.
For manual testing with RunTest.bat, provided the build dir is a subdirectory
of the source directory: Open command shell window. Chdir to the location
of your pcretest.exe and pcregrep.exe programs. Call RunTest.bat with
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
To run only a particular test with RunTest.Bat provide a test number argument.
Otherwise:
1. Copy RunTest.bat into the directory where pcretest.exe and pcregrep.exe
have been created.
2. Edit RunTest.bat to indentify the full or relative location of
the pcre source (wherein which the testdata folder resides), e.g.:
set srcdir=C:\pcre\pcre-8.20
3. In a Windows command environment, chdir to the location of your bat and
exe programs.
4. Run RunTest.bat. Test outputs will automatically be compared to expected
results, and discrepancies will be identified in the console output.
To independently test the just-in-time compiler, run pcre_jit_test.exe.
To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
pcre_scanner_unittest.exe.
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
Vincent Richomme sent a zip archive of files to help with this process. They
can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
site.
BUILDING UNDER WINDOWS WITH BCC5.5
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
Some of the core BCC libraries have a version of PCRE from 1998 built in, which
can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a version
mismatch. I'm including an easy workaround below, if you'd like to include it
in the non-unix instructions:
When linking a project with BCC5.5, pcre.lib must be included before any of the
libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command line.
BUILDING USING BORLAND C++ BUILDER 2007 (CB2007) AND HIGHER
A PCRE user sent these comments about this environment (see also the comment
from another user that follows them):
The XE versions of C++ Builder come with a RegularExpressionsCore class which
contain a version of TPerlRegEx. However, direct use of the C PCRE library may
be desirable.
The default makevp.bat, however, supplied with PCRE builds a version of PCRE
that is not usable with any version of C++ Builder because the compiler ships
with an embedded version of PCRE, version 2.01 from 1998! [See also the note
about BCC5.5 above.] If you want to use PCRE you'll need to rename the
functions (pcre_compile to pcre_compile_bcc, etc) or do as I have done and just
use the 16 bit versions. I'm using std::wstring everywhere anyway. Since the
embedded version of PCRE does not have the 16 bit function names, there is no
conflict.
Building PCRE using a C++ Builder static library project file (recommended):
1. Rename or remove pcre.h, pcreposi.h, and pcreposix.h from your C++ Builder
original include path.
2. Download PCRE from pcre.org and extract to a directory.
3. Rename pcre_chartables.c.dist to pcre_chartables.c, pcre.h.generic to
pcre.h, and config.h.generic to config.h.
4. Edit pcre.h and pcre_config.c so that they include config.h.
5. Edit config.h like so:
Comment out the following lines:
#define PACKAGE "pcre"
#define PACKAGE_BUGREPORT ""
#define PACKAGE_NAME "PCRE"
#define PACKAGE_STRING "PCRE 8.32"
#define PACKAGE_TARNAME "pcre"
#define PACKAGE_URL ""
#define PACKAGE_VERSION "8.32"
Add the following lines:
#ifndef SUPPORT_UTF
#define SUPPORT_UTF 100 // any value is fine
#endif
#ifndef SUPPORT_UCP
#define SUPPORT_UCP 101 // any value is fine
#endif
#ifndef SUPPORT_UCP
#define SUPPORT_PCRE16 102 // any value is fine
#endif
#ifndef SUPPORT_UTF8
#define SUPPORT_UTF8 103 // any value is fine
#endif
6. Build a C++ Builder project using the IDE. Go to File / New / Other and
choose Static Library. You can name it pcre.cbproj or whatever. Now set your
paths by going to Project / Options. Set the Include path. Do this from the
"Base" option to apply to both Release and Debug builds. Now add the following
files to the project:
pcre.h
pcre16_byte_order.c
pcre16_chartables.c
pcre16_compile.c
pcre16_config.c
pcre16_dfa_exec.c
pcre16_exec.c
pcre16_fullinfo.c
pcre16_get.c
pcre16_globals.c
pcre16_maketables.c
pcre16_newline.c
pcre16_ord2utf16.c
pcre16_printint.c
pcre16_refcount.c
pcre16_string_utils.c
pcre16_study.c
pcre16_tables.c
pcre16_ucd.c
pcre16_utf16_utils.c
pcre16_valid_utf16.c
pcre16_version.c
pcre16_xclass.c
//Optional
pcre_version.c
7. After compiling the .lib file, copy the .lib and header files to a project
you want to use PCRE with. Enjoy.
Optional ... Building PCRE using the makevp.bat file:
1. Edit makevp_c.txt and makevp_l.txt and change all the names to the 16 bit
versions.
2. Edit makevp.bat and set the path to C++ Builder. Run makevp.bat.
Another PCRE user added this comment:
Another approach I successfully used for some years with BCB 5 and 6 was to
make sure that include and library paths of PCRE are configured before the
default paths of the IDE in the dialogs where one can manage those paths.
Afterwards one can open the project files using a text editor and manually add
the self created library for pcre itself, pcrecpp doesn't ship with the IDE, in
the library nodes where the IDE manages its own libraries to link against in
front of the IDE-own libraries. This way one can use the default PCRE function
names without getting access violations on runtime.
<ALLLIB value="libpcre.lib $(LIBFILES) $(LIBRARIES) import32.lib cp32mt.lib"/>
BUILDING PCRE ON OPENVMS
Stephen Hoffman sent the following, in December 2012:
"Here <http://labs.hoffmanlabs.com/node/1847> is a very short write-up on the
OpenVMS port and here
<http://labs.hoffmanlabs.com/labsnotes/pcre-vms-8_32.zip>
is a zip with the OpenVMS files, and with one modified testing-related PCRE
file." This is a port of PCRE 8.32.
Earlier, Dan Mooney sent the following comments about building PCRE on OpenVMS.
They relate to an older version of PCRE that used fewer source files, so the
exact commands will need changing. See the current list of source files above.
"It was quite easy to compile and link the library. I don't have a formal
make file but the attached file [reproduced below] contains the OpenVMS DCL
commands I used to build the library. I had to add #define
POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere.
The library was built on:
O/S: HP OpenVMS v7.3-1
Compiler: Compaq C v6.5-001-48BCD
Linker: vA13-01
The test results did not match 100% due to the issues you mention in your
documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I
modified some of the character tables temporarily and was able to get the
results to match. Tests using the fr locale did not match since I don't have
that locale loaded. The study size was always reported to be 3 less than the
value in the standard test output files."
=========================
$! This DCL procedure builds PCRE on OpenVMS
$!
$! I followed the instructions in the non-unix-use file in the distribution.
$!
$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES
$ COMPILE DFTABLES.C
$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ
$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C
$ COMPILE MAKETABLES.C
$ COMPILE GET.C
$ COMPILE STUDY.C
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
$! did not seem to be defined anywhere.
$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support.
$ COMPILE PCRE.C
$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
$! did not seem to be defined anywhere.
$ COMPILE PCREPOSIX.C
$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ
$ COMPILE PCRETEST.C
$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB
$! C programs that want access to command line arguments must be
$! defined as a symbol
$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE"
$! Arguments must be enclosed in quotes.
$ PCRETEST "-C"
$! Test results:
$!
$! The test results did not match 100%. The functions isprint(), iscntrl(),
$! isgraph() and ispunct() on OpenVMS must not produce the same results
$! as the system that built the test output files provided with the
$! distribution.
$!
$! The study size did not match and was always 3 less on OpenVMS.
$!
$! Locale could not be set to fr
$!
=========================
BUILDING PCRE ON STRATUS OPENVOS
These notes on the port of PCRE to VOS (lightly edited) were supplied by
Ashutosh Warikoo, whose email address has the local part awarikoo and the
domain nse.co.in. The port was for version 7.9 in August 2009.
1. Building PCRE
I built pcre on OpenVOS Release 17.0.1at using GNU Tools 3.4a without any
problems. I used the following packages to build PCRE:
ftp://ftp.stratus.com/pub/vos/posix/ga/posix.save.evf.gz
Please read and follow the instructions that come with these packages. To start
the build of pcre, from the root of the package type:
./build.sh
2. Installing PCRE
Once you have successfully built PCRE, login to the SysAdmin group, switch to
the root user, and type
[ !create_dir (master_disk)>usr --if needed ]
[ !create_dir (master_disk)>usr>local --if needed ]
!gmake install
This installs PCRE and its man pages into /usr/local. You can add
(master_disk)>usr>local>bin to your command search paths, or if you are in
BASH, add /usr/local/bin to the PATH environment variable.
4. Restrictions
This port requires readline library optionally. However during the build I
faced some yet unexplored errors while linking with readline. As it was an
optional component I chose to disable it.
5. Known Problems
I ran the test suite, but you will have to be your own judge of whether this
command, and this port, suits your purposes. If you find any problems that
appear to be related to the port itself, please let me know. Please see the
build.log file in the root of the package also.
BUILDING PCRE ON NATIVE Z/OS AND Z/VM
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
applications can be supported through UNIX System Services, and in such an
environment PCRE can be built in the same way as in other systems. However, in
native z/OS (without UNIX System Services) and in z/VM, special ports are
required. For details, please see this web site:
http://www.zaconsultants.net
There is also a mirror here:
http://www.vsoft-software.com/downloads.html
==========================
Last Updated: 14 May 2013

View File

@ -0,0 +1,991 @@
README file for PCRE (Perl-compatible regular expression library)
-----------------------------------------------------------------
The latest release of PCRE is always available in three alternative formats
from:
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.bz2
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
There is a mailing list for discussion about the development of PCRE at
pcre-dev@exim.org. You can access the archives and subscribe or manage your
subscription here:
https://lists.exim.org/mailman/listinfo/pcre-dev
Please read the NEWS file if you are upgrading from a previous release.
The contents of this README file are:
The PCRE APIs
Documentation for PCRE
Contributions by users of PCRE
Building PCRE on non-Unix-like systems
Building PCRE without using autotools
Building PCRE using autotools
Retrieving configuration information
Shared libraries
Cross-compiling using autotools
Using HP's ANSI C++ compiler (aCC)
Compiling in Tru64 using native compilers
Using Sun's compilers for Solaris
Using PCRE from MySQL
Making new tarballs
Testing PCRE
Character tables
File manifest
The PCRE APIs
-------------
PCRE is written in C, and it has its own API. There are three sets of
functions, one for the 8-bit library, which processes strings of bytes, one for
the 16-bit library, which processes strings of 16-bit values, and one for the
32-bit library, which processes strings of 32-bit values. The distribution also
includes a set of C++ wrapper functions (see the pcrecpp man page for details),
courtesy of Google Inc., which can be used to call the 8-bit PCRE library from
C++.
In addition, there is a set of C wrapper functions (again, just for the 8-bit
library) that are based on the POSIX regular expression API (see the pcreposix
man page). These end up in the library called libpcreposix. Note that this just
provides a POSIX calling interface to PCRE; the regular expressions themselves
still follow Perl syntax and semantics. The POSIX API is restricted, and does
not give full access to all of PCRE's facilities.
The header file for the POSIX-style functions is called pcreposix.h. The
official POSIX name is regex.h, but I did not want to risk possible problems
with existing files of that name by distributing it that way. To use PCRE with
an existing program that uses the POSIX API, pcreposix.h will have to be
renamed or pointed at by a link.
If you are using the POSIX interface to PCRE and there is already a POSIX regex
library installed on your system, as well as worrying about the regex.h header
file (as mentioned above), you must also take care when linking programs to
ensure that they link with PCRE's libpcreposix library. Otherwise they may pick
up the POSIX functions of the same name from the other library.
One way of avoiding this confusion is to compile PCRE with the addition of
-Dregcomp=PCREregcomp (and similarly for the other POSIX functions) to the
compiler flags (CFLAGS if you are using "configure" -- see below). This has the
effect of renaming the functions so that the names no longer clash. Of course,
you have to do the same thing for your applications, or write them using the
new names.
Documentation for PCRE
----------------------
If you install PCRE in the normal way on a Unix-like system, you will end up
with a set of man pages whose names all start with "pcre". The one that is just
called "pcre" lists all the others. In addition to these man pages, the PCRE
documentation is supplied in two other forms:
1. There are files called doc/pcre.txt, doc/pcregrep.txt, and
doc/pcretest.txt in the source distribution. The first of these is a
concatenation of the text forms of all the section 3 man pages except
the listing of pcredemo.c and those that summarize individual functions.
The other two are the text forms of the section 1 man pages for the
pcregrep and pcretest commands. These text forms are provided for ease of
scanning with text editors or similar tools. They are installed in
<prefix>/share/doc/pcre, where <prefix> is the installation prefix
(defaulting to /usr/local).
2. A set of files containing all the documentation in HTML form, hyperlinked
in various ways, and rooted in a file called index.html, is distributed in
doc/html and installed in <prefix>/share/doc/pcre/html.
Users of PCRE have contributed files containing the documentation for various
releases in CHM format. These can be found in the Contrib directory of the FTP
site (see next section).
Contributions by users of PCRE
------------------------------
You can find contributions from PCRE users in the directory
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
There is a README file giving brief descriptions of what they are. Some are
complete in themselves; others are pointers to URLs containing relevant files.
Some of this material is likely to be well out-of-date. Several of the earlier
contributions provided support for compiling PCRE on various flavours of
Windows (I myself do not use Windows). Nowadays there is more Windows support
in the standard distribution, so these contibutions have been archived.
A PCRE user maintains downloadable Windows binaries of the pcregrep and
pcretest programs here:
http://www.rexegg.com/pcregrep-pcretest.html
Building PCRE on non-Unix-like systems
--------------------------------------
For a non-Unix-like system, please read the comments in the file
NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
"make" you may be able to build PCRE using autotools in the same way as for
many Unix-like systems.
PCRE can also be configured using the GUI facility provided by CMake's
cmake-gui command. This creates Makefiles, solution files, etc. The file
NON-AUTOTOOLS-BUILD has information about CMake.
PCRE has been compiled on many different operating systems. It should be
straightforward to build PCRE on any system that has a Standard C compiler and
library, because it uses only Standard C functions.
Building PCRE without using autotools
-------------------------------------
The use of autotools (in particular, libtool) is problematic in some
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
file for ways of building PCRE without using autotools.
Building PCRE using autotools
-----------------------------
If you are using HP's ANSI C++ compiler (aCC), please see the special note
in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.
The following instructions assume the use of the widely used "configure; make;
make install" (autotools) process.
To build PCRE on system that supports autotools, first run the "configure"
command from the PCRE distribution directory, with your current directory set
to the directory where you want the files to be created. This command is a
standard GNU "autoconf" configuration script, for which generic instructions
are supplied in the file INSTALL.
Most commonly, people build PCRE within its own distribution directory, and in
this case, on many systems, just running "./configure" is sufficient. However,
the usual methods of changing standard defaults are available. For example:
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
This command specifies that the C compiler should be run with the flags '-O2
-Wall' instead of the default, and that "make install" should install PCRE
under /opt/local instead of the default /usr/local.
If you want to build in a different directory, just run "configure" with that
directory as current. For example, suppose you have unpacked the PCRE source
into /source/pcre/pcre-xxx, but you want to build it in /build/pcre/pcre-xxx:
cd /build/pcre/pcre-xxx
/source/pcre/pcre-xxx/configure
PCRE is written in C and is normally compiled as a C library. However, it is
possible to build it as a C++ library, though the provided building apparatus
does not have any features to support this.
There are some optional features that can be included or omitted from the PCRE
library. They are also documented in the pcrebuild man page.
. By default, both shared and static libraries are built. You can change this
by adding one of these options to the "configure" command:
--disable-shared
--disable-static
(See also "Shared libraries on Unix-like systems" below.)
. By default, only the 8-bit library is built. If you add --enable-pcre16 to
the "configure" command, the 16-bit library is also built. If you add
--enable-pcre32 to the "configure" command, the 32-bit library is also built.
If you want only the 16-bit or 32-bit library, use --disable-pcre8 to disable
building the 8-bit library.
. If you are building the 8-bit library and want to suppress the building of
the C++ wrapper library, you can add --disable-cpp to the "configure"
command. Otherwise, when "configure" is run without --disable-pcre8, it will
try to find a C++ compiler and C++ header files, and if it succeeds, it will
try to build the C++ wrapper.
. If you want to include support for just-in-time compiling, which can give
large performance improvements on certain platforms, add --enable-jit to the
"configure" command. This support is available only for certain hardware
architectures. If you try to enable it on an unsupported architecture, there
will be a compile time error.
. When JIT support is enabled, pcregrep automatically makes use of it, unless
you add --disable-pcregrep-jit to the "configure" command.
. If you want to make use of the support for UTF-8 Unicode character strings in
the 8-bit library, or UTF-16 Unicode character strings in the 16-bit library,
or UTF-32 Unicode character strings in the 32-bit library, you must add
--enable-utf to the "configure" command. Without it, the code for handling
UTF-8, UTF-16 and UTF-8 is not included in the relevant library. Even
when --enable-utf is included, the use of a UTF encoding still has to be
enabled by an option at run time. When PCRE is compiled with this option, its
input can only either be ASCII or UTF-8/16/32, even when running on EBCDIC
platforms. It is not possible to use both --enable-utf and --enable-ebcdic at
the same time.
. There are no separate options for enabling UTF-8, UTF-16 and UTF-32
independently because that would allow ridiculous settings such as requesting
UTF-16 support while building only the 8-bit library. However, the option
--enable-utf8 is retained for backwards compatibility with earlier releases
that did not support 16-bit or 32-bit character strings. It is synonymous with
--enable-utf. It is not possible to configure one library with UTF support
and the other without in the same configuration.
. If, in addition to support for UTF-8/16/32 character strings, you want to
include support for the \P, \p, and \X sequences that recognize Unicode
character properties, you must add --enable-unicode-properties to the
"configure" command. This adds about 30K to the size of the library (in the
form of a property table); only the basic two-letter properties such as Lu
are supported.
. You can build PCRE to recognize either CR or LF or the sequence CRLF or any
of the preceding, or any of the Unicode newline sequences as indicating the
end of a line. Whatever you specify at build time is the default; the caller
of PCRE can change the selection at run time. The default newline indicator
is a single LF character (the Unix standard). You can specify the default
newline indicator by adding --enable-newline-is-cr or --enable-newline-is-lf
or --enable-newline-is-crlf or --enable-newline-is-anycrlf or
--enable-newline-is-any to the "configure" command, respectively.
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
the standard tests will fail, because the lines in the test files end with
LF. Even if the files are edited to change the line endings, there are likely
to be some failures. With --enable-newline-is-anycrlf or
--enable-newline-is-any, many tests should succeed, but there may be some
failures.
. By default, the sequence \R in a pattern matches any Unicode line ending
sequence. This is independent of the option specifying what PCRE considers to
be the end of a line (see above). However, the caller of PCRE can restrict \R
to match only CR, LF, or CRLF. You can make this the default by adding
--enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
. When called via the POSIX interface, PCRE uses malloc() to get additional
storage for processing capturing parentheses if there are more than 10 of
them in a pattern. You can increase this threshold by setting, for example,
--with-posix-malloc-threshold=20
on the "configure" command.
. PCRE has a counter that limits the depth of nesting of parentheses in a
pattern. This limits the amount of system stack that a pattern uses when it
is compiled. The default is 250, but you can change it by setting, for
example,
--with-parens-nest-limit=500
. PCRE has a counter that can be set to limit the amount of resources it uses
when matching a pattern. If the limit is exceeded during a match, the match
fails. The default is ten million. You can change the default by setting, for
example,
--with-match-limit=500000
on the "configure" command. This is just the default; individual calls to
pcre_exec() can supply their own value. There is more discussion on the
pcreapi man page.
. There is a separate counter that limits the depth of recursive function calls
during a matching process. This also has a default of ten million, which is
essentially "unlimited". You can change the default by setting, for example,
--with-match-limit-recursion=500000
Recursive function calls use up the runtime stack; running out of stack can
cause programs to crash in strange ways. There is a discussion about stack
sizes in the pcrestack man page.
. The default maximum compiled pattern size is around 64K. You can increase
this by adding --with-link-size=3 to the "configure" command. In the 8-bit
library, PCRE then uses three bytes instead of two for offsets to different
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
the same as --with-link-size=4, which (in both libraries) uses four-byte
offsets. Increasing the internal link size reduces performance. In the 32-bit
library, the only supported link size is 4.
. You can build PCRE so that its internal match() function that is called from
pcre_exec() does not call itself recursively. Instead, it uses memory blocks
obtained from the heap via the special functions pcre_stack_malloc() and
pcre_stack_free() to save data that would otherwise be saved on the stack. To
build PCRE like this, use
--disable-stack-for-recursion
on the "configure" command. PCRE runs more slowly in this mode, but it may be
necessary in environments with limited stack sizes. This applies only to the
normal execution of the pcre_exec() function; if JIT support is being
successfully used, it is not relevant. Equally, it does not apply to
pcre_dfa_exec(), which does not use deeply nested recursion. There is a
discussion about stack sizes in the pcrestack man page.
. For speed, PCRE uses four tables for manipulating and identifying characters
whose code point values are less than 256. By default, it uses a set of
tables for ASCII encoding that is part of the distribution. If you specify
--enable-rebuild-chartables
a program called dftables is compiled and run in the default C locale when
you obey "make". It builds a source file called pcre_chartables.c. If you do
not specify this option, pcre_chartables.c is created as a copy of
pcre_chartables.c.dist. See "Character tables" below for further information.
. It is possible to compile PCRE for use on systems that use EBCDIC as their
character code (as opposed to ASCII/Unicode) by specifying
--enable-ebcdic
This automatically implies --enable-rebuild-chartables (see above). However,
when PCRE is built this way, it always operates in EBCDIC. It cannot support
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
which specifies that the code value for the EBCDIC NL character is 0x25
instead of the default 0x15.
. In environments where valgrind is installed, if you specify
--enable-valgrind
PCRE will use valgrind annotations to mark certain memory regions as
unaddressable. This allows it to detect invalid memory accesses, and is
mostly useful for debugging PCRE itself.
. In environments where the gcc compiler is used and lcov version 1.6 or above
is installed, if you specify
--enable-coverage
the build process implements a code coverage report for the test suite. The
report is generated by running "make coverage". If ccache is installed on
your system, it must be disabled when building PCRE for coverage reporting.
You can do this by setting the environment variable CCACHE_DISABLE=1 before
running "make" to build PCRE. There is more information about coverage
reporting in the "pcrebuild" documentation.
. The pcregrep program currently supports only 8-bit data files, and so
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
specifying one or both of
--enable-pcregrep-libz
--enable-pcregrep-libbz2
Of course, the relevant libraries must be installed on your system.
. The default size (in bytes) of the internal buffer used by pcregrep can be
set by, for example:
--with-pcregrep-bufsize=51200
The value must be a plain integer. The default is 20480.
. It is possible to compile pcretest so that it links with the libreadline
or libedit libraries, by specifying, respectively,
--enable-pcretest-libreadline or --enable-pcretest-libedit
If this is done, when pcretest's input is from a terminal, it reads it using
the readline() function. This provides line-editing and history facilities.
Note that libreadline is GPL-licenced, so if you distribute a binary of
pcretest linked in this way, there may be licensing issues. These can be
avoided by linking with libedit (which has a BSD licence) instead.
Enabling libreadline causes the -lreadline option to be added to the pcretest
build. In many operating environments with a sytem-installed readline
library this is sufficient. However, in some environments (e.g. if an
unmodified distribution version of readline is in use), it may be necessary
to specify something like LIBS="-lncurses" as well. This is because, to quote
the readline INSTALL, "Readline uses the termcap functions, but does not link
with the termcap or curses library itself, allowing applications which link
with readline the to choose an appropriate library." If you get error
messages about missing functions tgetstr, tgetent, tputs, tgetflag, or tgoto,
this is the problem, and linking with the ncurses library should fix it.
The "configure" script builds the following files for the basic C library:
. Makefile the makefile that builds the library
. config.h build-time configuration options for the library
. pcre.h the public PCRE header file
. pcre-config script that shows the building settings such as CFLAGS
that were set for "configure"
. libpcre.pc ) data for the pkg-config command
. libpcre16.pc )
. libpcre32.pc )
. libpcreposix.pc )
. libtool script that builds shared and/or static libraries
Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
names config.h.generic and pcre.h.generic. These are provided for those who
have to built PCRE without using "configure" or CMake. If you use "configure"
or CMake, the .generic versions are not used.
When building the 8-bit library, if a C++ compiler is found, the following
files are also built:
. libpcrecpp.pc data for the pkg-config command
. pcrecpparg.h header file for calling PCRE via the C++ wrapper
. pcre_stringpiece.h header for the C++ "stringpiece" functions
The "configure" script also creates config.status, which is an executable
script that can be run to recreate the configuration, and config.log, which
contains compiler output from tests that "configure" runs.
Once "configure" has run, you can run "make". This builds the the libraries
libpcre, libpcre16 and/or libpcre32, and a test program called pcretest. If you
enabled JIT support with --enable-jit, a test program called pcre_jit_test is
built as well.
If the 8-bit library is built, libpcreposix and the pcregrep command are also
built, and if a C++ compiler was found on your system, and you did not disable
it with --disable-cpp, "make" builds the C++ wrapper library, which is called
libpcrecpp, as well as some test programs called pcrecpp_unittest,
pcre_scanner_unittest, and pcre_stringpiece_unittest.
The command "make check" runs all the appropriate tests. Details of the PCRE
tests are given below in a separate section of this document.
You can use "make install" to install PCRE into live directories on your
system. The following are installed (file names are all relative to the
<prefix> that is set when "configure" is run):
Commands (bin):
pcretest
pcregrep (if 8-bit support is enabled)
pcre-config
Libraries (lib):
libpcre16 (if 16-bit support is enabled)
libpcre32 (if 32-bit support is enabled)
libpcre (if 8-bit support is enabled)
libpcreposix (if 8-bit support is enabled)
libpcrecpp (if 8-bit and C++ support is enabled)
Configuration information (lib/pkgconfig):
libpcre16.pc
libpcre32.pc
libpcre.pc
libpcreposix.pc
libpcrecpp.pc (if C++ support is enabled)
Header files (include):
pcre.h
pcreposix.h
pcre_scanner.h )
pcre_stringpiece.h ) if C++ support is enabled
pcrecpp.h )
pcrecpparg.h )
Man pages (share/man/man{1,3}):
pcregrep.1
pcretest.1
pcre-config.1
pcre.3
pcre*.3 (lots more pages, all starting "pcre")
HTML documentation (share/doc/pcre/html):
index.html
*.html (lots more pages, hyperlinked from index.html)
Text file documentation (share/doc/pcre):
AUTHORS
COPYING
ChangeLog
LICENCE
NEWS
README
pcre.txt (a concatenation of the man(3) pages)
pcretest.txt the pcretest man page
pcregrep.txt the pcregrep man page
pcre-config.txt the pcre-config man page
If you want to remove PCRE from your system, you can run "make uninstall".
This removes all the files that "make install" installed. However, it does not
remove any directories, because these are often shared with other programs.
Retrieving configuration information
------------------------------------
Running "make install" installs the command pcre-config, which can be used to
recall information about the PCRE configuration and installation. For example:
pcre-config --version
prints the version number, and
pcre-config --libs
outputs information about where the library is installed. This command can be
included in makefiles for programs that use PCRE, saving the programmer from
having to remember too many details.
The pkg-config command is another system for saving and retrieving information
about installed libraries. Instead of separate commands for each library, a
single command is used. For example:
pkg-config --cflags pcre
The data is held in *.pc files that are installed in a directory called
<prefix>/lib/pkgconfig.
Shared libraries
----------------
The default distribution builds PCRE as shared libraries and static libraries,
as long as the operating system supports shared libraries. Shared library
support relies on the "libtool" script which is built as part of the
"configure" process.
The libtool script is used to compile and link both shared and static
libraries. They are placed in a subdirectory called .libs when they are newly
built. The programs pcretest and pcregrep are built to use these uninstalled
libraries (by means of wrapper scripts in the case of shared libraries). When
you use "make install" to install shared libraries, pcregrep and pcretest are
automatically re-built to use the newly installed shared libraries before being
installed themselves. However, the versions left in the build directory still
use the uninstalled libraries.
To build PCRE using static libraries only you must use --disable-shared when
configuring it. For example:
./configure --prefix=/usr/gnu --disable-shared
Then run "make" in the usual way. Similarly, you can use --disable-static to
build only shared libraries.
Cross-compiling using autotools
-------------------------------
You can specify CC and CFLAGS in the normal way to the "configure" command, in
order to cross-compile PCRE for some other host. However, you should NOT
specify --enable-rebuild-chartables, because if you do, the dftables.c source
file is compiled and run on the local host, in order to generate the inbuilt
character tables (the pcre_chartables.c file). This will probably not work,
because dftables.c needs to be compiled with the local compiler, not the cross
compiler.
When --enable-rebuild-chartables is not specified, pcre_chartables.c is created
by making a copy of pcre_chartables.c.dist, which is a default set of tables
that assumes ASCII code. Cross-compiling with the default tables should not be
a problem.
If you need to modify the character tables when cross-compiling, you should
move pcre_chartables.c.dist out of the way, then compile dftables.c by hand and
run it on the local host to make a new version of pcre_chartables.c.dist.
Then when you cross-compile PCRE this new version of the tables will be used.
Using HP's ANSI C++ compiler (aCC)
----------------------------------
Unless C++ support is disabled by specifying the "--disable-cpp" option of the
"configure" script, you must include the "-AA" option in the CXXFLAGS
environment variable in order for the C++ components to compile correctly.
Also, note that the aCC compiler on PA-RISC platforms may have a defect whereby
needed libraries fail to get included when specifying the "-AA" compiler
option. If you experience unresolved symbols when linking the C++ programs,
use the workaround of specifying the following environment variable prior to
running the "configure" script:
CXXLDFLAGS="-lstd_v2 -lCsup_v2"
Compiling in Tru64 using native compilers
-----------------------------------------
The following error may occur when compiling with native compilers in the Tru64
operating system:
CXX libpcrecpp_la-pcrecpp.lo
cxx: Error: /usr/lib/cmplrs/cxx/V7.1-006/include/cxx/iosfwd, line 58: #error
directive: "cannot include iosfwd -- define __USE_STD_IOSTREAM to
override default - see section 7.1.2 of the C++ Using Guide"
#error "cannot include iosfwd -- define __USE_STD_IOSTREAM to override default
- see section 7.1.2 of the C++ Using Guide"
This may be followed by other errors, complaining that 'namespace "std" has no
member'. The solution to this is to add the line
#define __USE_STD_IOSTREAM 1
to the config.h file.
Using Sun's compilers for Solaris
---------------------------------
A user reports that the following configurations work on Solaris 9 sparcv9 and
Solaris 9 x86 (32-bit):
Solaris 9 sparcv9: ./configure --disable-cpp CC=/bin/cc CFLAGS="-m64 -g"
Solaris 9 x86: ./configure --disable-cpp CC=/bin/cc CFLAGS="-g"
Using PCRE from MySQL
---------------------
On systems where both PCRE and MySQL are installed, it is possible to make use
of PCRE from within MySQL, as an alternative to the built-in pattern matching.
There is a web page that tells you how to do this:
http://www.mysqludf.org/lib_mysqludf_preg/index.php
Making new tarballs
-------------------
The command "make dist" creates three PCRE tarballs, in tar.gz, tar.bz2, and
zip formats. The command "make distcheck" does the same, but then does a trial
build of the new distribution to ensure that it works.
If you have modified any of the man page sources in the doc directory, you
should first run the PrepareRelease script before making a distribution. This
script creates the .txt and HTML forms of the documentation from the man pages.
Testing PCRE
------------
To test the basic PCRE library on a Unix-like system, run the RunTest script.
There is another script called RunGrepTest that tests the options of the
pcregrep command. If the C++ wrapper library is built, three test programs
called pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest
are also built. When JIT support is enabled, another test program called
pcre_jit_test is built.
Both the scripts and all the program tests are run if you obey "make check" or
"make test". For other environments, see the instructions in
NON-AUTOTOOLS-BUILD.
The RunTest script runs the pcretest test program (which is documented in its
own man page) on each of the relevant testinput files in the testdata
directory, and compares the output with the contents of the corresponding
testoutput files. RunTest uses a file called testtry to hold the main output
from pcretest. Other files whose names begin with "test" are used as working
files in some tests.
Some tests are relevant only when certain build-time options were selected. For
example, the tests for UTF-8/16/32 support are run only if --enable-utf was
used. RunTest outputs a comment when it skips a test.
Many of the tests that are not skipped are run up to three times. The second
run forces pcre_study() to be called for all patterns except for a few in some
tests that are marked "never study" (see the pcretest program for how this is
done). If JIT support is available, the non-DFA tests are run a third time,
this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option.
This testing can be suppressed by putting "nojit" on the RunTest command line.
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
libraries that are enabled. If you want to run just one set of tests, call
RunTest with either the -8, -16 or -32 option.
If valgrind is installed, you can run the tests under it by putting "valgrind"
on the RunTest command line. To run pcretest on just one or more specific test
files, give their numbers as arguments to RunTest, for example:
RunTest 2 7 11
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
end), or a number preceded by ~ to exclude a test. For example:
Runtest 3-15 ~10
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
except test 13. Whatever order the arguments are in, the tests are always run
in numerical order.
You can also call RunTest with the single argument "list" to cause it to output
a list of tests.
The first test file can be fed directly into the perltest.pl script to check
that Perl gives the same results. The only difference you should see is in the
first few lines, where the Perl version is given instead of the PCRE version.
The second set of tests check pcre_fullinfo(), pcre_study(),
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
detection, and run-time flags that are specific to PCRE, as well as the POSIX
wrapper API. It also uses the debugging flags to check some of the internals of
pcre_compile().
If you build PCRE with a locale setting that is not the standard C locale, the
character tables may be different (see next paragraph). In some cases, this may
cause failures in the second set of tests. For example, in a locale where the
isprint() function yields TRUE for characters in the range 128-255, the use of
[:isascii:] inside a character class defines a different set of characters, and
this shows up in this test as a difference in the compiled code, which is being
listed for checking. Where the comparison test output contains [\x00-\x7f] the
test will contain [\x00-\xff], and similarly in some other cases. This is not a
bug in PCRE.
The third set of tests checks pcre_maketables(), the facility for building a
set of character tables for a specific locale and using them instead of the
default tables. The tests make use of the "fr_FR" (French) locale. Before
running the test, the script checks for the presence of this locale by running
the "locale" command. If that command fails, or if it doesn't include "fr_FR"
in the list of available locales, the third test cannot be run, and a comment
is output to say why. If running this test produces instances of the error
** Failed to set locale "fr_FR"
in the comparison output, it means that locale is not available on your system,
despite being listed by "locale". This does not mean that PCRE is broken.
[If you are trying to run this test on Windows, you may be able to get it to
work by changing "fr_FR" to "french" everywhere it occurs. Alternatively, use
RunTest.bat. The version of RunTest.bat included with PCRE 7.4 and above uses
Windows versions of test 2. More info on using RunTest.bat is included in the
document entitled NON-UNIX-USE.]
The fourth and fifth tests check the UTF-8/16/32 support and error handling and
internal UTF features of PCRE that are not relevant to Perl, respectively. The
sixth and seventh tests do the same for Unicode character properties support.
The eighth, ninth, and tenth tests check the pcre_dfa_exec() alternative
matching function, in non-UTF-8/16/32 mode, UTF-8/16/32 mode, and UTF-8/16/32
mode with Unicode property support, respectively.
The eleventh test checks some internal offsets and code size features; it is
run only when the default "link size" of 2 is set (in other cases the sizes
change) and when Unicode property support is enabled.
The twelfth test is run only when JIT support is available, and the thirteenth
test is run only when JIT support is not available. They test some JIT-specific
features such as information output from pcretest about JIT compilation.
The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and
the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit
mode. These are tests that generate different output in the two modes. They are
for general cases, UTF-8/16/32 support, and Unicode property support,
respectively.
The twentieth test is run only in 16/32-bit mode. It tests some specific
16/32-bit features of the DFA matching engine.
The twenty-first and twenty-second tests are run only in 16/32-bit mode, when
the link size is set to 2 for the 16-bit library. They test reloading
pre-compiled patterns.
The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are
for general cases, and UTF-16 support, respectively.
The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are
for general cases, and UTF-32 support, respectively.
Character tables
----------------
For speed, PCRE uses four tables for manipulating and identifying characters
whose code point values are less than 256. The final argument of the
pcre_compile() function is a pointer to a block of memory containing the
concatenated tables. A call to pcre_maketables() can be used to generate a set
of tables in the current locale. If the final argument for pcre_compile() is
passed as NULL, a set of default tables that is built into the binary is used.
The source file called pcre_chartables.c contains the default set of tables. By
default, this is created as a copy of pcre_chartables.c.dist, which contains
tables for ASCII coding. However, if --enable-rebuild-chartables is specified
for ./configure, a different version of pcre_chartables.c is built by the
program dftables (compiled from dftables.c), which uses the ANSI C character
handling functions such as isalnum(), isalpha(), isupper(), islower(), etc. to
build the table sources. This means that the default C locale which is set for
your system will control the contents of these default tables. You can change
the default tables by editing pcre_chartables.c and then re-building PCRE. If
you do this, you should take care to ensure that the file does not get
automatically re-generated. The best way to do this is to move
pcre_chartables.c.dist out of the way and replace it with your customized
tables.
When the dftables program is run as a result of --enable-rebuild-chartables,
it uses the default C locale that is set on your system. It does not pay
attention to the LC_xxx environment variables. In other words, it uses the
system's default locale rather than whatever the compiling user happens to have
set. If you really do want to build a source set of character tables in a
locale that is specified by the LC_xxx variables, you can run the dftables
program by hand with the -L option. For example:
./dftables -L pcre_chartables.c.special
The first two 256-byte tables provide lower casing and case flipping functions,
respectively. The next table consists of three 32-byte bit maps which identify
digits, "word" characters, and white space, respectively. These are used when
building 32-byte bit maps that represent character classes for code points less
than 256.
The final 256-byte table has bits indicating various character types, as
follows:
1 white space character
2 letter
4 decimal digit
8 hexadecimal digit
16 alphanumeric or '_'
128 regular expression metacharacter or binary zero
You should not alter the set of characters that contain the 128 bit, as that
will cause PCRE to malfunction.
File manifest
-------------
The distribution should contain the files listed below. Where a file name is
given as pcre[16|32]_xxx it means that there are three files, one with the name
pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
(A) Source files of the PCRE library functions and their headers:
dftables.c auxiliary program for building pcre_chartables.c
when --enable-rebuild-chartables is specified
pcre_chartables.c.dist a default set of character tables that assume ASCII
coding; used, unless --enable-rebuild-chartables is
specified, by copying to pcre[16]_chartables.c
pcreposix.c )
pcre[16|32]_byte_order.c )
pcre[16|32]_compile.c )
pcre[16|32]_config.c )
pcre[16|32]_dfa_exec.c )
pcre[16|32]_exec.c )
pcre[16|32]_fullinfo.c )
pcre[16|32]_get.c ) sources for the functions in the library,
pcre[16|32]_globals.c ) and some internal functions that they use
pcre[16|32]_jit_compile.c )
pcre[16|32]_maketables.c )
pcre[16|32]_newline.c )
pcre[16|32]_refcount.c )
pcre[16|32]_string_utils.c )
pcre[16|32]_study.c )
pcre[16|32]_tables.c )
pcre[16|32]_ucd.c )
pcre[16|32]_version.c )
pcre[16|32]_xclass.c )
pcre_ord2utf8.c )
pcre_valid_utf8.c )
pcre16_ord2utf16.c )
pcre16_utf16_utils.c )
pcre16_valid_utf16.c )
pcre32_utf32_utils.c )
pcre32_valid_utf32.c )
pcre[16|32]_printint.c ) debugging function that is used by pcretest,
) and can also be #included in pcre_compile()
pcre.h.in template for pcre.h when built by "configure"
pcreposix.h header for the external POSIX wrapper API
pcre_internal.h header for internal use
sljit/* 16 files that make up the JIT compiler
ucp.h header for Unicode property handling
config.h.in template for config.h, which is built by "configure"
pcrecpp.h public header file for the C++ wrapper
pcrecpparg.h.in template for another C++ header file
pcre_scanner.h public header file for C++ scanner functions
pcrecpp.cc )
pcre_scanner.cc ) source for the C++ wrapper library
pcre_stringpiece.h.in template for pcre_stringpiece.h, the header for the
C++ stringpiece functions
pcre_stringpiece.cc source for the C++ stringpiece functions
(B) Source files for programs that use PCRE:
pcredemo.c simple demonstration of coding calls to PCRE
pcregrep.c source of a grep utility that uses PCRE
pcretest.c comprehensive test program
(C) Auxiliary files:
132html script to turn "man" pages into HTML
AUTHORS information about the author of PCRE
ChangeLog log of changes to the code
CleanTxt script to clean nroff output for txt man pages
Detrail script to remove trailing spaces
HACKING some notes about the internals of PCRE
INSTALL generic installation instructions
LICENCE conditions for the use of PCRE
COPYING the same, using GNU's standard name
Makefile.in ) template for Unix Makefile, which is built by
) "configure"
Makefile.am ) the automake input that was used to create
) Makefile.in
NEWS important changes in this release
NON-UNIX-USE the previous name for NON-AUTOTOOLS-BUILD
NON-AUTOTOOLS-BUILD notes on building PCRE without using autotools
PrepareRelease script to make preparations for "make dist"
README this file
RunTest a Unix shell script for running tests
RunGrepTest a Unix shell script for pcregrep tests
aclocal.m4 m4 macros (generated by "aclocal")
config.guess ) files used by libtool,
config.sub ) used only when building a shared library
configure a configuring shell script (built by autoconf)
configure.ac ) the autoconf input that was used to build
) "configure" and config.h
depcomp ) script to find program dependencies, generated by
) automake
doc/*.3 man page sources for PCRE
doc/*.1 man page sources for pcregrep and pcretest
doc/index.html.src the base HTML page
doc/html/* HTML documentation
doc/pcre.txt plain text version of the man pages
doc/pcretest.txt plain text documentation of test program
doc/perltest.txt plain text documentation of Perl test program
install-sh a shell script for installing files
libpcre16.pc.in template for libpcre16.pc for pkg-config
libpcre32.pc.in template for libpcre32.pc for pkg-config
libpcre.pc.in template for libpcre.pc for pkg-config
libpcreposix.pc.in template for libpcreposix.pc for pkg-config
libpcrecpp.pc.in template for libpcrecpp.pc for pkg-config
ltmain.sh file used to build a libtool script
missing ) common stub for a few missing GNU programs while
) installing, generated by automake
mkinstalldirs script for making install directories
perltest.pl Perl test program
pcre-config.in source of script which retains PCRE information
pcre_jit_test.c test program for the JIT compiler
pcrecpp_unittest.cc )
pcre_scanner_unittest.cc ) test programs for the C++ wrapper
pcre_stringpiece_unittest.cc )
testdata/testinput* test data for main library tests
testdata/testoutput* expected test results
testdata/grep* input and output for pcregrep tests
testdata/* other supporting test files
(D) Auxiliary files for cmake support
cmake/COPYING-CMAKE-SCRIPTS
cmake/FindPackageHandleStandardArgs.cmake
cmake/FindEditline.cmake
cmake/FindReadline.cmake
CMakeLists.txt
config-cmake.h.in
(E) Auxiliary files for VPASCAL
makevp.bat
makevp_c.txt
makevp_l.txt
pcregexp.pas
(F) Auxiliary files for building PCRE "by hand"
pcre.h.generic ) a version of the public PCRE header file
) for use in non-"configure" environments
config.h.generic ) a version of config.h for use in non-"configure"
) environments
(F) Miscellaneous
RunTest.bat a script for running tests under Windows
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
Last updated: 17 January 2014

View File

@ -11,27 +11,29 @@
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>Perl-compatible Regular Expressions (PCRE)</h1>
<p>
The HTML documentation for PCRE comprises the following pages:
The HTML documentation for PCRE consists of a number of pages that are listed
below in alphabetical order. If you are new to PCRE, please read the first one
first.
</p>
<table>
<tr><td><a href="pcre.html">pcre</a></td>
<td>&nbsp;&nbsp;Introductory page</td></tr>
<tr><td><a href="pcre-config.html">pcre-config</a></td>
<td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
<tr><td><a href="pcre16.html">pcre16</a></td>
<td>&nbsp;&nbsp;Discussion of the 16-bit PCRE library</td></tr>
<tr><td><a href="pcre32.html">pcre32</a></td>
<td>&nbsp;&nbsp;Discussion of the 32-bit PCRE library</td></tr>
<tr><td><a href="pcre-config.html">pcre-config</a></td>
<td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
<tr><td><a href="pcreapi.html">pcreapi</a></td>
<td>&nbsp;&nbsp;PCRE's native API</td></tr>
<tr><td><a href="pcrebuild.html">pcrebuild</a></td>
<td>&nbsp;&nbsp;Options for building PCRE</td></tr>
<td>&nbsp;&nbsp;Building PCRE</td></tr>
<tr><td><a href="pcrecallout.html">pcrecallout</a></td>
<td>&nbsp;&nbsp;The <i>callout</i> facility</td></tr>
@ -67,7 +69,7 @@ The HTML documentation for PCRE comprises the following pages:
<td>&nbsp;&nbsp;Some comments on performance</td></tr>
<tr><td><a href="pcreposix.html">pcreposix</a></td>
<td>&nbsp;&nbsp;The POSIX API to the PCRE library</td></tr>
<td>&nbsp;&nbsp;The POSIX API to the PCRE 8-bit library</td></tr>
<tr><td><a href="pcreprecompile.html">pcreprecompile</a></td>
<td>&nbsp;&nbsp;How to save and re-use compiled patterns</td></tr>
@ -118,13 +120,13 @@ functions.
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
<td>&nbsp;&nbsp;Free study data</td></tr>
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(Perl compatible)</td></tr>
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
<td>&nbsp;&nbsp;Free study data</td></tr>
<tr><td><a href="pcre_free_substring.html">pcre_free_substring</a></td>
<td>&nbsp;&nbsp;Free extracted substring</td></tr>
@ -140,14 +142,17 @@ functions.
<tr><td><a href="pcre_get_stringnumber.html">pcre_get_stringnumber</a></td>
<td>&nbsp;&nbsp;Convert captured string name to number</td></tr>
<tr><td><a href="pcre_get_stringtable_entries.html">pcre_get_stringtable_entries</a></td>
<td>&nbsp;&nbsp;Find table entries for given string name</td></tr>
<tr><td><a href="pcre_get_substring.html">pcre_get_substring</a></td>
<td>&nbsp;&nbsp;Extract numbered substring into new memory</td></tr>
<tr><td><a href="pcre_get_substring_list.html">pcre_get_substring_list</a></td>
<td>&nbsp;&nbsp;Extract all substrings into new memory</td></tr>
<tr><td><a href="pcre_info.html">pcre_info</a></td>
<td>&nbsp;&nbsp;Obsolete information extraction function</td></tr>
<tr><td><a href="pcre_jit_exec.html">pcre_jit_exec</a></td>
<td>&nbsp;&nbsp;Fast path interface to JIT matching</td></tr>
<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
<td>&nbsp;&nbsp;Create a stack for JIT matching</td></tr>

View File

@ -23,8 +23,8 @@ man page, in case the conversion went wrong.
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P>
<b>pcre-config [--prefix] [--exec-prefix] [--version] [--libs]</b>
<b>[--libs16] [--libs32] [--libs-cpp] [--libs-posix]</b>
<b>[--cflags] [--cflags-posix]</b>
<b> [--libs16] [--libs32] [--libs-cpp] [--libs-posix]</b>
<b> [--cflags] [--cflags-posix]</b>
</P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P>

View File

@ -38,9 +38,9 @@ Herczeg.
</P>
<P>
Starting with release 8.32 it is possible to compile a third separate PCRE
library, which supports 32-bit character strings (including
UTF-32 strings). The build process allows any set of the 8-, 16- and 32-bit
libraries. The work to make this possible was done by Christian Persch.
library that supports 32-bit character strings (including UTF-32 strings). The
build process allows any combination of the 8-, 16- and 32-bit libraries. The
work to make this possible was done by Christian Persch.
</P>
<P>
The three libraries contain identical sets of functions, except that the names
@ -62,7 +62,7 @@ The current implementation of PCRE corresponds approximately with Perl 5.12,
including support for UTF-8/16/32 encoded strings and Unicode general category
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
enabled; it is not the default. The Unicode tables correspond to Unicode
release 6.2.0.
release 6.3.0.
</P>
<P>
In addition to the Perl-compatible matching function, PCRE contains an
@ -100,8 +100,11 @@ function makes it possible for a client to discover which features are
available. The features themselves are described in the
<a href="pcrebuild.html"><b>pcrebuild</b></a>
page. Documentation about building PCRE for various operating systems can be
found in the <b>README</b> and <b>NON-AUTOTOOLS_BUILD</b> files in the source
distribution.
found in the
<a href="README.txt"><b>README</b></a>
and
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS_BUILD</b></a>
files in the source distribution.
</P>
<P>
The libraries contains a number of undocumented internal functions and data
@ -126,8 +129,11 @@ use sufficiently many resources as to cause your application to lose
performance.
</P>
<P>
The best way of guarding against this possibility is to use the
One way of guarding against this possibility is to use the
<b>pcre_fullinfo()</b> function to check the compiled pattern's options for UTF.
Alternatively, from release 8.33, you can set the PCRE_NEVER_UTF option at
compile time. This causes an compile time error if a pattern contains a
UTF-setting sequence.
</P>
<P>
If your application is one that supports UTF, be aware that validity checking
@ -148,15 +154,18 @@ page.
The user documentation for PCRE comprises a number of different sections. In
the "man" format, each of these is a separate "man page". In the HTML format,
each is a separate page, linked from the index page. In the plain text format,
all the sections, except the <b>pcredemo</b> section, are concatenated, for ease
of searching. The sections are as follows:
the descriptions of the <b>pcregrep</b> and <b>pcretest</b> programs are in files
called <b>pcregrep.txt</b> and <b>pcretest.txt</b>, respectively. The remaining
sections, except for the <b>pcredemo</b> section (which is a program listing),
are concatenated in <b>pcre.txt</b>, for ease of searching. The sections are as
follows:
<pre>
pcre this document
pcre-config show PCRE installation configuration information
pcre16 details of the 16-bit library
pcre32 details of the 32-bit library
pcre-config show PCRE installation configuration information
pcreapi details of PCRE's native C API
pcrebuild options for building PCRE
pcrebuild building PCRE
pcrecallout details of the callout feature
pcrecompat discussion of Perl compatibility
pcrecpp details of the C++ wrapper for the 8-bit library
@ -176,8 +185,8 @@ of searching. The sections are as follows:
pcretest description of the <b>pcretest</b> testing command
pcreunicode discussion of Unicode and UTF-8/16/32 support
</pre>
In addition, in the "man" and HTML formats, there is a short page for each
C library function, listing its arguments and results.
In the "man" and HTML formats, there is also a short page for each C library
function, listing its arguments and results.
</P>
<br><a name="SEC4" href="#TOC1">AUTHOR</a><br>
<P>
@ -195,9 +204,9 @@ two digits 10, at the domain cam.ac.uk.
</P>
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
<P>
Last updated: 11 November 2012
Last updated: 08 January 2014
<br>
Copyright &copy; 1997-2012 University of Cambridge.
Copyright &copy; 1997-2014 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.

View File

@ -42,126 +42,126 @@ man page, in case the conversion went wrong.
<br><a name="SEC1" href="#TOC1">PCRE 16-BIT API BASIC FUNCTIONS</a><br>
<P>
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<P>
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b> const unsigned char *<i>tableptr</i>);</b>
<br>
<br>
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
<b>int *<i>errorcodeptr</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<P>
<b> int *<i>errorcodeptr</i>,</b>
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b> const unsigned char *<i>tableptr</i>);</b>
<br>
<br>
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>);</b>
</P>
<P>
<b> const char **<i>errptr</i>);</b>
<br>
<br>
<b>void pcre16_free_study(pcre16_extra *<i>extra</i>);</b>
</P>
<P>
<br>
<br>
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
</P>
<P>
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
<br>
<br>
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
</P>
<br><a name="SEC2" href="#TOC1">PCRE 16-BIT API STRING EXTRACTION FUNCTIONS</a><br>
<P>
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
<b>PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
</P>
<P>
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
<b> PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
<br>
<br>
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
<b>int <i>buffersize</i>);</b>
</P>
<P>
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
<b> int <i>buffersize</i>);</b>
<br>
<br>
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
</P>
<P>
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
<br>
<br>
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>name</i>);</b>
</P>
<P>
<b>" PCRE_SPTR16 <i>name</i>);</b>
<br>
<br>
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
</P>
<P>
<b> PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
<br>
<br>
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
</P>
<P>
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
<br>
<br>
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
</P>
<P>
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
<br>
<br>
<b>void pcre16_free_substring(PCRE_SPTR16 <i>stringptr</i>);</b>
</P>
<P>
<br>
<br>
<b>void pcre16_free_substring_list(PCRE_SPTR16 *<i>stringptr</i>);</b>
</P>
<br><a name="SEC3" href="#TOC1">PCRE 16-BIT API AUXILIARY FUNCTIONS</a><br>
<P>
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
</P>
<P>
<br>
<br>
<b>void pcre16_jit_stack_free(pcre16_jit_stack *<i>stack</i>);</b>
</P>
<P>
<br>
<br>
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
<b>pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
</P>
<P>
<b> pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
<br>
<br>
<b>const unsigned char *pcre16_maketables(void);</b>
</P>
<P>
<br>
<br>
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>int <i>what</i>, void *<i>where</i>);</b>
</P>
<P>
<b> int <i>what</i>, void *<i>where</i>);</b>
<br>
<br>
<b>int pcre16_refcount(pcre16 *<i>code</i>, int <i>adjust</i>);</b>
</P>
<P>
<br>
<br>
<b>int pcre16_config(int <i>what</i>, void *<i>where</i>);</b>
</P>
<P>
<br>
<br>
<b>const char *pcre16_version(void);</b>
</P>
<P>
<br>
<br>
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
<b>pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
<b> pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
</P>
<br><a name="SEC4" href="#TOC1">PCRE 16-BIT API INDIRECTED FUNCTIONS</a><br>
<P>
<b>void *(*pcre16_malloc)(size_t);</b>
</P>
<P>
<br>
<br>
<b>void (*pcre16_free)(void *);</b>
</P>
<P>
<br>
<br>
<b>void *(*pcre16_stack_malloc)(size_t);</b>
</P>
<P>
<br>
<br>
<b>void (*pcre16_stack_free)(void *);</b>
</P>
<P>
<br>
<br>
<b>int (*pcre16_callout)(pcre16_callout_block *);</b>
</P>
<br><a name="SEC5" href="#TOC1">PCRE 16-BIT API 16-BIT-ONLY FUNCTION</a><br>
<P>
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
<b>PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
<b>int <i>keep_boms</i>);</b>
<b> PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
<b> int <i>keep_boms</i>);</b>
</P>
<br><a name="SEC6" href="#TOC1">THE PCRE 16-BIT LIBRARY</a><br>
<P>
@ -259,8 +259,9 @@ buffer, including the zero terminator if the string was zero-terminated.
</P>
<br><a name="SEC12" href="#TOC1">SUBJECT STRING OFFSETS</a><br>
<P>
The offsets within subject strings that are returned by the matching functions
are in 16-bit units rather than bytes.
The lengths and starting offsets of subject strings must be specified in 16-bit
data units, and the offsets within subject strings that are returned by the
matching functions are in also 16-bit units rather than bytes.
</P>
<br><a name="SEC13" href="#TOC1">NAMED SUBPATTERNS</a><br>
<P>
@ -374,9 +375,9 @@ Cambridge CB2 3QH, England.
</P>
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
<P>
Last updated: 08 November 2012
Last updated: 12 May 2013
<br>
Copyright &copy; 1997-2012 University of Cambridge.
Copyright &copy; 1997-2013 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.

View File

@ -0,0 +1,382 @@
<html>
<head>
<title>pcre32 specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre32 man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">PCRE 32-BIT API BASIC FUNCTIONS</a>
<li><a name="TOC2" href="#SEC2">PCRE 32-BIT API STRING EXTRACTION FUNCTIONS</a>
<li><a name="TOC3" href="#SEC3">PCRE 32-BIT API AUXILIARY FUNCTIONS</a>
<li><a name="TOC4" href="#SEC4">PCRE 32-BIT API INDIRECTED FUNCTIONS</a>
<li><a name="TOC5" href="#SEC5">PCRE 32-BIT API 32-BIT-ONLY FUNCTION</a>
<li><a name="TOC6" href="#SEC6">THE PCRE 32-BIT LIBRARY</a>
<li><a name="TOC7" href="#SEC7">THE HEADER FILE</a>
<li><a name="TOC8" href="#SEC8">THE LIBRARY NAME</a>
<li><a name="TOC9" href="#SEC9">STRING TYPES</a>
<li><a name="TOC10" href="#SEC10">STRUCTURE TYPES</a>
<li><a name="TOC11" href="#SEC11">32-BIT FUNCTIONS</a>
<li><a name="TOC12" href="#SEC12">SUBJECT STRING OFFSETS</a>
<li><a name="TOC13" href="#SEC13">NAMED SUBPATTERNS</a>
<li><a name="TOC14" href="#SEC14">OPTION NAMES</a>
<li><a name="TOC15" href="#SEC15">CHARACTER CODES</a>
<li><a name="TOC16" href="#SEC16">ERROR NAMES</a>
<li><a name="TOC17" href="#SEC17">ERROR TEXTS</a>
<li><a name="TOC18" href="#SEC18">CALLOUTS</a>
<li><a name="TOC19" href="#SEC19">TESTING</a>
<li><a name="TOC20" href="#SEC20">NOT SUPPORTED IN 32-BIT MODE</a>
<li><a name="TOC21" href="#SEC21">AUTHOR</a>
<li><a name="TOC22" href="#SEC22">REVISION</a>
</ul>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<br><a name="SEC1" href="#TOC1">PCRE 32-BIT API BASIC FUNCTIONS</a><br>
<P>
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b> const unsigned char *<i>tableptr</i>);</b>
<br>
<br>
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
<b> int *<i>errorcodeptr</i>,</b>
<b> const unsigned char *<i>tableptr</i>);</b>
<br>
<br>
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
<b> const char **<i>errptr</i>);</b>
<br>
<br>
<b>void pcre32_free_study(pcre32_extra *<i>extra</i>);</b>
<br>
<br>
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
<br>
<br>
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
</P>
<br><a name="SEC2" href="#TOC1">PCRE 32-BIT API STRING EXTRACTION FUNCTIONS</a><br>
<P>
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
<b> PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
<br>
<br>
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
<b> int <i>buffersize</i>);</b>
<br>
<br>
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
<br>
<br>
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
<b> PCRE_SPTR32 <i>name</i>);</b>
<br>
<br>
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
<b> PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
<br>
<br>
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
<br>
<br>
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
<br>
<br>
<b>void pcre32_free_substring(PCRE_SPTR32 <i>stringptr</i>);</b>
<br>
<br>
<b>void pcre32_free_substring_list(PCRE_SPTR32 *<i>stringptr</i>);</b>
</P>
<br><a name="SEC3" href="#TOC1">PCRE 32-BIT API AUXILIARY FUNCTIONS</a><br>
<P>
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
<br>
<br>
<b>void pcre32_jit_stack_free(pcre32_jit_stack *<i>stack</i>);</b>
<br>
<br>
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
<b> pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
<br>
<br>
<b>const unsigned char *pcre32_maketables(void);</b>
<br>
<br>
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
<b> int <i>what</i>, void *<i>where</i>);</b>
<br>
<br>
<b>int pcre32_refcount(pcre32 *<i>code</i>, int <i>adjust</i>);</b>
<br>
<br>
<b>int pcre32_config(int <i>what</i>, void *<i>where</i>);</b>
<br>
<br>
<b>const char *pcre32_version(void);</b>
<br>
<br>
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
<b> pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
</P>
<br><a name="SEC4" href="#TOC1">PCRE 32-BIT API INDIRECTED FUNCTIONS</a><br>
<P>
<b>void *(*pcre32_malloc)(size_t);</b>
<br>
<br>
<b>void (*pcre32_free)(void *);</b>
<br>
<br>
<b>void *(*pcre32_stack_malloc)(size_t);</b>
<br>
<br>
<b>void (*pcre32_stack_free)(void *);</b>
<br>
<br>
<b>int (*pcre32_callout)(pcre32_callout_block *);</b>
</P>
<br><a name="SEC5" href="#TOC1">PCRE 32-BIT API 32-BIT-ONLY FUNCTION</a><br>
<P>
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
<b> PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
<b> int <i>keep_boms</i>);</b>
</P>
<br><a name="SEC6" href="#TOC1">THE PCRE 32-BIT LIBRARY</a><br>
<P>
Starting with release 8.32, it is possible to compile a PCRE library that
supports 32-bit character strings, including UTF-32 strings, as well as or
instead of the original 8-bit library. This work was done by Christian Persch,
based on the work done by Zoltan Herczeg for the 16-bit library. All three
libraries contain identical sets of functions, used in exactly the same way.
Only the names of the functions and the data types of their arguments and
results are different. To avoid over-complication and reduce the documentation
maintenance load, most of the PCRE documentation describes the 8-bit library,
with only occasional references to the 16-bit and 32-bit libraries. This page
describes what is different when you use the 32-bit library.
</P>
<P>
WARNING: A single application can be linked with all or any of the three
libraries, but you must take care when processing any particular pattern
to use functions from just one library. For example, if you want to study
a pattern that was compiled with <b>pcre32_compile()</b>, you must do so
with <b>pcre32_study()</b>, not <b>pcre_study()</b>, and you must free the
study data with <b>pcre32_free_study()</b>.
</P>
<br><a name="SEC7" href="#TOC1">THE HEADER FILE</a><br>
<P>
There is only one header file, <b>pcre.h</b>. It contains prototypes for all the
functions in all libraries, as well as definitions of flags, structures, error
codes, etc.
</P>
<br><a name="SEC8" href="#TOC1">THE LIBRARY NAME</a><br>
<P>
In Unix-like systems, the 32-bit library is called <b>libpcre32</b>, and can
normally be accesss by adding <b>-lpcre32</b> to the command for linking an
application that uses PCRE.
</P>
<br><a name="SEC9" href="#TOC1">STRING TYPES</a><br>
<P>
In the 8-bit library, strings are passed to PCRE library functions as vectors
of bytes with the C type "char *". In the 32-bit library, strings are passed as
vectors of unsigned 32-bit quantities. The macro PCRE_UCHAR32 specifies an
appropriate data type, and PCRE_SPTR32 is defined as "const PCRE_UCHAR32 *". In
very many environments, "unsigned int" is a 32-bit data type. When PCRE is
built, it defines PCRE_UCHAR32 as "unsigned int", but checks that it really is
a 32-bit data type. If it is not, the build fails with an error message telling
the maintainer to modify the definition appropriately.
</P>
<br><a name="SEC10" href="#TOC1">STRUCTURE TYPES</a><br>
<P>
The types of the opaque structures that are used for compiled 32-bit patterns
and JIT stacks are <b>pcre32</b> and <b>pcre32_jit_stack</b> respectively. The
type of the user-accessible structure that is returned by <b>pcre32_study()</b>
is <b>pcre32_extra</b>, and the type of the structure that is used for passing
data to a callout function is <b>pcre32_callout_block</b>. These structures
contain the same fields, with the same names, as their 8-bit counterparts. The
only difference is that pointers to character strings are 32-bit instead of
8-bit types.
</P>
<br><a name="SEC11" href="#TOC1">32-BIT FUNCTIONS</a><br>
<P>
For every function in the 8-bit library there is a corresponding function in
the 32-bit library with a name that starts with <b>pcre32_</b> instead of
<b>pcre_</b>. The prototypes are listed above. In addition, there is one extra
function, <b>pcre32_utf32_to_host_byte_order()</b>. This is a utility function
that converts a UTF-32 character string to host byte order if necessary. The
other 32-bit functions expect the strings they are passed to be in host byte
order.
</P>
<P>
The <i>input</i> and <i>output</i> arguments of
<b>pcre32_utf32_to_host_byte_order()</b> may point to the same address, that is,
conversion in place is supported. The output buffer must be at least as long as
the input.
</P>
<P>
The <i>length</i> argument specifies the number of 32-bit data units in the
input string; a negative value specifies a zero-terminated string.
</P>
<P>
If <i>byte_order</i> is NULL, it is assumed that the string starts off in host
byte order. This may be changed by byte-order marks (BOMs) anywhere in the
string (commonly as the first character).
</P>
<P>
If <i>byte_order</i> is not NULL, a non-zero value of the integer to which it
points means that the input starts off in host byte order, otherwise the
opposite order is assumed. Again, BOMs in the string can change this. The final
byte order is passed back at the end of processing.
</P>
<P>
If <i>keep_boms</i> is not zero, byte-order mark characters (0xfeff) are copied
into the output string. Otherwise they are discarded.
</P>
<P>
The result of the function is the number of 32-bit units placed into the output
buffer, including the zero terminator if the string was zero-terminated.
</P>
<br><a name="SEC12" href="#TOC1">SUBJECT STRING OFFSETS</a><br>
<P>
The lengths and starting offsets of subject strings must be specified in 32-bit
data units, and the offsets within subject strings that are returned by the
matching functions are in also 32-bit units rather than bytes.
</P>
<br><a name="SEC13" href="#TOC1">NAMED SUBPATTERNS</a><br>
<P>
The name-to-number translation table that is maintained for named subpatterns
uses 32-bit characters. The <b>pcre32_get_stringtable_entries()</b> function
returns the length of each entry in the table as the number of 32-bit data
units.
</P>
<br><a name="SEC14" href="#TOC1">OPTION NAMES</a><br>
<P>
There are two new general option names, PCRE_UTF32 and PCRE_NO_UTF32_CHECK,
which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In
fact, these new options define the same bits in the options word. There is a
discussion about the
<a href="pcreunicode.html#utf32strings">validity of UTF-32 strings</a>
in the
<a href="pcreunicode.html"><b>pcreunicode</b></a>
page.
</P>
<P>
For the <b>pcre32_config()</b> function there is an option PCRE_CONFIG_UTF32
that returns 1 if UTF-32 support is configured, otherwise 0. If this option is
given to <b>pcre_config()</b> or <b>pcre16_config()</b>, or if the
PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 option is given to <b>pcre32_config()</b>,
the result is the PCRE_ERROR_BADOPTION error.
</P>
<br><a name="SEC15" href="#TOC1">CHARACTER CODES</a><br>
<P>
In 32-bit mode, when PCRE_UTF32 is not set, character values are treated in the
same way as in 8-bit, non UTF-8 mode, except, of course, that they can range
from 0 to 0x7fffffff instead of 0 to 0xff. Character types for characters less
than 0xff can therefore be influenced by the locale in the same way as before.
Characters greater than 0xff have only one case, and no "type" (such as letter
or digit).
</P>
<P>
In UTF-32 mode, the character code is Unicode, in the range 0 to 0x10ffff, with
the exception of values in the range 0xd800 to 0xdfff because those are
"surrogate" values that are ill-formed in UTF-32.
</P>
<P>
A UTF-32 string can indicate its endianness by special code knows as a
byte-order mark (BOM). The PCRE functions do not handle this, expecting strings
to be in host byte order. A utility function called
<b>pcre32_utf32_to_host_byte_order()</b> is provided to help with this (see
above).
</P>
<br><a name="SEC16" href="#TOC1">ERROR NAMES</a><br>
<P>
The error PCRE_ERROR_BADUTF32 corresponds to its 8-bit counterpart.
The error PCRE_ERROR_BADMODE is given when a compiled
pattern is passed to a function that processes patterns in the other
mode, for example, if a pattern compiled with <b>pcre_compile()</b> is passed to
<b>pcre32_exec()</b>.
</P>
<P>
There are new error codes whose names begin with PCRE_UTF32_ERR for invalid
UTF-32 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that
are described in the section entitled
<a href="pcreapi.html#badutf8reasons">"Reason codes for invalid UTF-8 strings"</a>
in the main
<a href="pcreapi.html"><b>pcreapi</b></a>
page. The UTF-32 errors are:
<pre>
PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff)
PCRE_UTF32_ERR2 Non-character
PCRE_UTF32_ERR3 Character &#62; 0x10ffff
</PRE>
</P>
<br><a name="SEC17" href="#TOC1">ERROR TEXTS</a><br>
<P>
If there is an error while compiling a pattern, the error text that is passed
back by <b>pcre32_compile()</b> or <b>pcre32_compile2()</b> is still an 8-bit
character string, zero-terminated.
</P>
<br><a name="SEC18" href="#TOC1">CALLOUTS</a><br>
<P>
The <i>subject</i> and <i>mark</i> fields in the callout block that is passed to
a callout function point to 32-bit vectors.
</P>
<br><a name="SEC19" href="#TOC1">TESTING</a><br>
<P>
The <b>pcretest</b> program continues to operate with 8-bit input and output
files, but it can be used for testing the 32-bit library. If it is run with the
command line option <b>-32</b>, patterns and subject strings are converted from
8-bit to 32-bit before being passed to PCRE, and the 32-bit library functions
are used instead of the 8-bit ones. Returned 32-bit strings are converted to
8-bit for output. If both the 8-bit and the 16-bit libraries were not compiled,
<b>pcretest</b> defaults to 32-bit and the <b>-32</b> option is ignored.
</P>
<P>
When PCRE is being built, the <b>RunTest</b> script that is called by "make
check" uses the <b>pcretest</b> <b>-C</b> option to discover which of the 8-bit,
16-bit and 32-bit libraries has been built, and runs the tests appropriately.
</P>
<br><a name="SEC20" href="#TOC1">NOT SUPPORTED IN 32-BIT MODE</a><br>
<P>
Not all the features of the 8-bit library are available with the 32-bit
library. The C++ and POSIX wrapper functions support only the 8-bit library,
and the <b>pcregrep</b> program is at present 8-bit only.
</P>
<br><a name="SEC21" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
<P>
Last updated: 12 May 2013
<br>
Copyright &copy; 1997-2013 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>

View File

@ -20,15 +20,15 @@ SYNOPSIS
</P>
<P>
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
<b>pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
</P>
<P>
<b> pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
<br>
<br>
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
<b>pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
</P>
<P>
<b> pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
<br>
<br>
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
<b>pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
<b> pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -20,18 +20,18 @@ SYNOPSIS
</P>
<P>
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<P>
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b> const unsigned char *<i>tableptr</i>);</b>
<br>
<br>
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<P>
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b> const unsigned char *<i>tableptr</i>);</b>
<br>
<br>
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b> const unsigned char *<i>tableptr</i>);</b>
</P>
<br><b>
DESCRIPTION
@ -65,6 +65,7 @@ The option bits are:
PCRE_FIRSTLINE Force matching to be before newline
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
PCRE_MULTILINE ^ and $ match newlines within data
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
sequences
@ -73,6 +74,8 @@ The option bits are:
PCRE_NEWLINE_LF Set LF as the newline sequence
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
theses (named ones available)
PCRE_NO_AUTO_POSSESS Disable auto-possessification
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
validity (only relevant if
PCRE_UTF16 is set)

View File

@ -20,21 +20,21 @@ SYNOPSIS
</P>
<P>
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
<b>int *<i>errorcodeptr</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<P>
<b> int *<i>errorcodeptr</i>,</b>
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b> const unsigned char *<i>tableptr</i>);</b>
<br>
<br>
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
<b>int *<i>errorcodeptr</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<P>
<b> int *<i>errorcodeptr</i>,</b>
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b> const unsigned char *<i>tableptr</i>);</b>
<br>
<br>
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
<b>int *<i>errorcodeptr</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
<b>" int *<i>errorcodeptr</i>,£</b>
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b> const unsigned char *<i>tableptr</i>);</b>
</P>
<br><b>
DESCRIPTION
@ -69,6 +69,7 @@ The option bits are:
PCRE_FIRSTLINE Force matching to be before newline
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
PCRE_MULTILINE ^ and $ match newlines within data
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
sequences
@ -77,6 +78,8 @@ The option bits are:
PCRE_NEWLINE_LF Set LF as the newline sequence
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
theses (named ones available)
PCRE_NO_AUTO_POSSESS Disable auto-possessification
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
validity (only relevant if
PCRE_UTF16 is set)

View File

@ -48,6 +48,7 @@ point to an unsigned long integer. The available codes are:
target architecture for the JIT compiler,
or NULL if there is no JIT support
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
PCRE_CONFIG_PARENS_LIMIT Parentheses nesting limit
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
PCRE_CONFIG_MATCH_LIMIT_RECURSION
Internal recursion depth limit

View File

@ -20,21 +20,21 @@ SYNOPSIS
</P>
<P>
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
</P>
<P>
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
<br>
<br>
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
<b>PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
</P>
<P>
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
<b> PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
<br>
<br>
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
<b>PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
<b> PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -20,18 +20,18 @@ SYNOPSIS
</P>
<P>
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
<b>int <i>buffersize</i>);</b>
</P>
<P>
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
<b> int <i>buffersize</i>);</b>
<br>
<br>
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
<b>int <i>buffersize</i>);</b>
</P>
<P>
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
<b> int <i>buffersize</i>);</b>
<br>
<br>
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
<b>int <i>buffersize</i>);</b>
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
<b> int <i>buffersize</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -20,21 +20,21 @@ SYNOPSIS
</P>
<P>
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
</P>
<P>
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
<br>
<br>
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
</P>
<P>
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
<br>
<br>
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
</P>
<br><b>
DESCRIPTION
@ -50,16 +50,17 @@ are:
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
or is NULL
<i>subject</i> Points to the subject string
<i>length</i> Length of the subject string, in bytes
<i>startoffset</i> Offset in bytes in the subject at which to
start matching
<i>length</i> Length of the subject string
<i>startoffset</i> Offset in the subject at which to start matching
<i>options</i> Option bits
<i>ovector</i> Points to a vector of ints for result offsets
<i>ovecsize</i> Number of elements in the vector
<i>workspace</i> Points to a vector of ints used as working space
<i>wscount</i> Number of elements in the vector
</pre>
The options are:
The units for <i>length</i> and <i>startoffset</i> are bytes for
<b>pcre_exec()</b>, 16-bit data items for <b>pcre16_exec()</b>, and 32-bit items
for <b>pcre32_exec()</b>. The options are:
<pre>
PCRE_ANCHORED Match only at the first position
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF

View File

@ -20,18 +20,18 @@ SYNOPSIS
</P>
<P>
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
</P>
<P>
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
<br>
<br>
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
</P>
<P>
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
<br>
<br>
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
</P>
<br><b>
DESCRIPTION
@ -45,14 +45,15 @@ offsets to captured substrings. Its arguments are:
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
or is NULL
<i>subject</i> Points to the subject string
<i>length</i> Length of the subject string, in bytes
<i>startoffset</i> Offset in bytes in the subject at which to
start matching
<i>length</i> Length of the subject string
<i>startoffset</i> Offset in the subject at which to start matching
<i>options</i> Option bits
<i>ovector</i> Points to a vector of ints for result offsets
<i>ovecsize</i> Number of elements in the vector (a multiple of 3)
</pre>
The options are:
The units for <i>length</i> and <i>startoffset</i> are bytes for
<b>pcre_exec()</b>, 16-bit data items for <b>pcre16_exec()</b>, and 32-bit items
for <b>pcre32_exec()</b>. The options are:
<pre>
PCRE_ANCHORED Match only at the first position
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF

View File

@ -20,15 +20,15 @@ SYNOPSIS
</P>
<P>
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>int <i>what</i>, void *<i>where</i>);</b>
</P>
<P>
<b> int <i>what</i>, void *<i>where</i>);</b>
<br>
<br>
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>int <i>what</i>, void *<i>where</i>);</b>
</P>
<P>
<b> int <i>what</i>, void *<i>where</i>);</b>
<br>
<br>
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
<b>int <i>what</i>, void *<i>where</i>);</b>
<b> int <i>what</i>, void *<i>where</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -20,21 +20,21 @@ SYNOPSIS
</P>
<P>
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
<b>const char **<i>stringptr</i>);</b>
</P>
<P>
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
<b> const char **<i>stringptr</i>);</b>
<br>
<br>
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
</P>
<P>
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
<br>
<br>
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -20,15 +20,15 @@ SYNOPSIS
</P>
<P>
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
<b>const char *<i>name</i>);</b>
</P>
<P>
<b> const char *<i>name</i>);</b>
<br>
<br>
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>name</i>);</b>
</P>
<P>
<b> PCRE_SPTR16 <i>name</i>);</b>
<br>
<br>
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
<b>PCRE_SPTR32 <i>name</i>);</b>
<b> PCRE_SPTR32 <i>name</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -20,15 +20,15 @@ SYNOPSIS
</P>
<P>
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
</P>
<P>
<b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
<br>
<br>
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
</P>
<P>
<b> PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
<br>
<br>
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
<b>PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
<b> PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -20,18 +20,18 @@ SYNOPSIS
</P>
<P>
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b>const char **<i>stringptr</i>);</b>
</P>
<P>
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b> const char **<i>stringptr</i>);</b>
<br>
<br>
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
</P>
<P>
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
<br>
<br>
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -20,15 +20,15 @@ SYNOPSIS
</P>
<P>
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
</P>
<P>
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
<br>
<br>
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
</P>
<P>
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
<br>
<br>
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -20,21 +20,21 @@ SYNOPSIS
</P>
<P>
<b>int pcre_jit_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>pcre_jit_stack *<i>jstack</i>);</b>
</P>
<P>
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b> pcre_jit_stack *<i>jstack</i>);</b>
<br>
<br>
<b>int pcre16_jit_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>pcre_jit_stack *<i>jstack</i>);</b>
</P>
<P>
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b> pcre_jit_stack *<i>jstack</i>);</b>
<br>
<br>
<b>int pcre32_jit_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>pcre_jit_stack *<i>jstack</i>);</b>
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b> pcre_jit_stack *<i>jstack</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -20,15 +20,15 @@ SYNOPSIS
</P>
<P>
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>,</b>
<b>int <i>maxsize</i>);</b>
</P>
<P>
<b> int <i>maxsize</i>);</b>
<br>
<br>
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>,</b>
<b>int <i>maxsize</i>);</b>
</P>
<P>
<b> int <i>maxsize</i>);</b>
<br>
<br>
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>,</b>
<b>int <i>maxsize</i>);</b>
<b> int <i>maxsize</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -20,15 +20,15 @@ SYNOPSIS
</P>
<P>
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
<b>pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
</P>
<P>
<b> pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
<br>
<br>
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
<b>pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
</P>
<P>
<b> pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
<br>
<br>
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
<b>pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
<b> pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -20,15 +20,15 @@ SYNOPSIS
</P>
<P>
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>);</b>
</P>
<P>
<b> const char **<i>errptr</i>);</b>
<br>
<br>
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>);</b>
</P>
<P>
<b> const char **<i>errptr</i>);</b>
<br>
<br>
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>);</b>
<b> const char **<i>errptr</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -20,8 +20,8 @@ SYNOPSIS
</P>
<P>
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
<b>PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
<b>int <i>keep_boms</i>);</b>
<b> PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
<b> int <i>keep_boms</i>);</b>
</P>
<br><b>
DESCRIPTION

View File

@ -0,0 +1,57 @@
<html>
<head>
<title>pcre_utf32_to_host_byte_order specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_utf32_to_host_byte_order man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
<b> PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
<b> int <i>keep_boms</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function, which exists only in the 32-bit library, converts a UTF-32
string to the correct order for the current host, taking account of any byte
order marks (BOMs) within the string. Its arguments are:
<pre>
<i>output</i> pointer to output buffer, may be the same as <i>input</i>
<i>input</i> pointer to input buffer
<i>length</i> number of 32-bit units in the input, or negative for
a zero-terminated string
<i>host_byte_order</i> a NULL value or a non-zero value pointed to means
start in host byte order
<i>keep_boms</i> if non-zero, BOMs are copied to the output string
</pre>
The result of the function is the number of 32-bit units placed into the output
buffer, including the zero terminator if the string was zero-terminated.
</P>
<P>
If <i>host_byte_order</i> is not NULL, it is set to indicate the byte order that
is current at the end of the string.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>

File diff suppressed because it is too large Load Diff

View File

@ -13,46 +13,63 @@ from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">PCRE BUILD-TIME OPTIONS</a>
<li><a name="TOC2" href="#SEC2">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
<li><a name="TOC3" href="#SEC3">BUILDING SHARED AND STATIC LIBRARIES</a>
<li><a name="TOC4" href="#SEC4">C++ SUPPORT</a>
<li><a name="TOC5" href="#SEC5">UTF-8, UTF-16 AND UTF-32 SUPPORT</a>
<li><a name="TOC6" href="#SEC6">UNICODE CHARACTER PROPERTY SUPPORT</a>
<li><a name="TOC7" href="#SEC7">JUST-IN-TIME COMPILER SUPPORT</a>
<li><a name="TOC8" href="#SEC8">CODE VALUE OF NEWLINE</a>
<li><a name="TOC9" href="#SEC9">WHAT \R MATCHES</a>
<li><a name="TOC10" href="#SEC10">POSIX MALLOC USAGE</a>
<li><a name="TOC11" href="#SEC11">HANDLING VERY LARGE PATTERNS</a>
<li><a name="TOC12" href="#SEC12">AVOIDING EXCESSIVE STACK USAGE</a>
<li><a name="TOC13" href="#SEC13">LIMITING PCRE RESOURCE USAGE</a>
<li><a name="TOC14" href="#SEC14">CREATING CHARACTER TABLES AT BUILD TIME</a>
<li><a name="TOC15" href="#SEC15">USING EBCDIC CODE</a>
<li><a name="TOC16" href="#SEC16">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
<li><a name="TOC17" href="#SEC17">PCREGREP BUFFER SIZE</a>
<li><a name="TOC18" href="#SEC18">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a>
<li><a name="TOC19" href="#SEC19">DEBUGGING WITH VALGRIND SUPPORT</a>
<li><a name="TOC20" href="#SEC20">CODE COVERAGE REPORTING</a>
<li><a name="TOC21" href="#SEC21">SEE ALSO</a>
<li><a name="TOC22" href="#SEC22">AUTHOR</a>
<li><a name="TOC23" href="#SEC23">REVISION</a>
<li><a name="TOC1" href="#SEC1">BUILDING PCRE</a>
<li><a name="TOC2" href="#SEC2">PCRE BUILD-TIME OPTIONS</a>
<li><a name="TOC3" href="#SEC3">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
<li><a name="TOC4" href="#SEC4">BUILDING SHARED AND STATIC LIBRARIES</a>
<li><a name="TOC5" href="#SEC5">C++ SUPPORT</a>
<li><a name="TOC6" href="#SEC6">UTF-8, UTF-16 AND UTF-32 SUPPORT</a>
<li><a name="TOC7" href="#SEC7">UNICODE CHARACTER PROPERTY SUPPORT</a>
<li><a name="TOC8" href="#SEC8">JUST-IN-TIME COMPILER SUPPORT</a>
<li><a name="TOC9" href="#SEC9">CODE VALUE OF NEWLINE</a>
<li><a name="TOC10" href="#SEC10">WHAT \R MATCHES</a>
<li><a name="TOC11" href="#SEC11">POSIX MALLOC USAGE</a>
<li><a name="TOC12" href="#SEC12">HANDLING VERY LARGE PATTERNS</a>
<li><a name="TOC13" href="#SEC13">AVOIDING EXCESSIVE STACK USAGE</a>
<li><a name="TOC14" href="#SEC14">LIMITING PCRE RESOURCE USAGE</a>
<li><a name="TOC15" href="#SEC15">CREATING CHARACTER TABLES AT BUILD TIME</a>
<li><a name="TOC16" href="#SEC16">USING EBCDIC CODE</a>
<li><a name="TOC17" href="#SEC17">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
<li><a name="TOC18" href="#SEC18">PCREGREP BUFFER SIZE</a>
<li><a name="TOC19" href="#SEC19">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a>
<li><a name="TOC20" href="#SEC20">DEBUGGING WITH VALGRIND SUPPORT</a>
<li><a name="TOC21" href="#SEC21">CODE COVERAGE REPORTING</a>
<li><a name="TOC22" href="#SEC22">SEE ALSO</a>
<li><a name="TOC23" href="#SEC23">AUTHOR</a>
<li><a name="TOC24" href="#SEC24">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PCRE BUILD-TIME OPTIONS</a><br>
<br><a name="SEC1" href="#TOC1">BUILDING PCRE</a><br>
<P>
This document describes the optional features of PCRE that can be selected when
the library is compiled. It assumes use of the <b>configure</b> script, where
the optional features are selected or deselected by providing options to
<b>configure</b> before running the <b>make</b> command. However, the same
options can be selected in both Unix-like and non-Unix-like environments using
the GUI facility of <b>cmake-gui</b> if you are using <b>CMake</b> instead of
<b>configure</b> to build PCRE.
PCRE is distributed with a <b>configure</b> script that can be used to build the
library in Unix-like environments using the applications known as Autotools.
Also in the distribution are files to support building using <b>CMake</b>
instead of <b>configure</b>. The text file
<a href="README.txt"><b>README</b></a>
contains general information about building with Autotools (some of which is
repeated below), and also has some comments about building on various operating
systems. There is a lot more information about building PCRE without using
Autotools (including information about using <b>CMake</b> and building "by
hand") in the text file called
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS-BUILD</b>.</a>
You should consult this file as well as the
<a href="README.txt"><b>README</b></a>
file if you are building in a non-Unix-like environment.
</P>
<br><a name="SEC2" href="#TOC1">PCRE BUILD-TIME OPTIONS</a><br>
<P>
The rest of this document describes the optional features of PCRE that can be
selected when the library is compiled. It assumes use of the <b>configure</b>
script, where the optional features are selected or deselected by providing
options to <b>configure</b> before running the <b>make</b> command. However, the
same options can be selected in both Unix-like and non-Unix-like environments
using the GUI facility of <b>cmake-gui</b> if you are using <b>CMake</b> instead
of <b>configure</b> to build PCRE.
</P>
<P>
There is a lot more information about building PCRE without using
<b>configure</b> (including information about using <b>CMake</b> or building "by
hand") in the file called <i>NON-AUTOTOOLS-BUILD</i>, which is part of the PCRE
distribution. You should consult this file as well as the <i>README</i> file if
you are building in a non-Unix-like environment.
If you are not using Autotools or <b>CMake</b>, option selection can be done by
editing the <b>config.h</b> file, or by passing parameter settings to the
compiler, as described in
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS-BUILD</b>.</a>
</P>
<P>
The complete list of options for <b>configure</b> (which includes the standard
@ -67,7 +84,7 @@ The following sections include descriptions of options whose names begin with
--enable and --disable always come in pairs, so the complementary option always
exists as well, but as it specifies the default, it is not described.
</P>
<br><a name="SEC2" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
<br><a name="SEC3" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
<P>
By default, a library called <b>libpcre</b> is built, containing functions that
take string arguments contained in vectors of bytes, either as single-byte
@ -78,7 +95,7 @@ strings, by adding
<pre>
--enable-pcre16
</pre>
to the <b>configure</b> command. You can also build a separate
to the <b>configure</b> command. You can also build yet another separate
library, called <b>libpcre32</b>, in which strings are contained in vectors of
32-bit data units and interpreted either as single-unit characters or UTF-32
strings, by adding
@ -94,17 +111,17 @@ and POSIX wrappers are for the 8-bit library only, and that <b>pcregrep</b> is
an 8-bit program. None of these are built if you select only the 16-bit or
32-bit libraries.
</P>
<br><a name="SEC3" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
<br><a name="SEC4" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
<P>
The PCRE building process uses <b>libtool</b> to build both shared and static
Unix libraries by default. You can suppress one of these by adding one of
The Autotools PCRE building process uses <b>libtool</b> to build both shared and
static libraries by default. You can suppress one of these by adding one of
<pre>
--disable-shared
--disable-static
</pre>
to the <b>configure</b> command, as required.
</P>
<br><a name="SEC4" href="#TOC1">C++ SUPPORT</a><br>
<br><a name="SEC5" href="#TOC1">C++ SUPPORT</a><br>
<P>
By default, if the 8-bit library is being built, the <b>configure</b> script
will search for a C++ compiler and C++ header files. If it finds them, it
@ -115,7 +132,7 @@ strings). You can disable this by adding
</pre>
to the <b>configure</b> command.
</P>
<br><a name="SEC5" href="#TOC1">UTF-8, UTF-16 AND UTF-32 SUPPORT</a><br>
<br><a name="SEC6" href="#TOC1">UTF-8, UTF-16 AND UTF-32 SUPPORT</a><br>
<P>
To build PCRE with support for UTF Unicode character strings, add
<pre>
@ -143,7 +160,7 @@ not possible to support both EBCDIC and UTF-8 codes in the same version of the
library. Consequently, --enable-utf and --enable-ebcdic are mutually
exclusive.
</P>
<br><a name="SEC6" href="#TOC1">UNICODE CHARACTER PROPERTY SUPPORT</a><br>
<br><a name="SEC7" href="#TOC1">UNICODE CHARACTER PROPERTY SUPPORT</a><br>
<P>
UTF support allows the libraries to process character codepoints up to 0x10ffff
in the strings that they handle. On its own, however, it does not provide any
@ -163,7 +180,7 @@ supported. Details are given in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
documentation.
</P>
<br><a name="SEC7" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
<br><a name="SEC8" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
<P>
Just-in-time compiler support is included in the build by specifying
<pre>
@ -180,7 +197,7 @@ pcregrep automatically makes use of it, unless you add
</pre>
to the "configure" command.
</P>
<br><a name="SEC8" href="#TOC1">CODE VALUE OF NEWLINE</a><br>
<br><a name="SEC9" href="#TOC1">CODE VALUE OF NEWLINE</a><br>
<P>
By default, PCRE interprets the linefeed (LF) character as indicating the end
of a line. This is the normal newline character on Unix-like systems. You can
@ -213,7 +230,7 @@ Whatever line ending convention is selected when PCRE is built can be
overridden when the library functions are called. At build time it is
conventional to use the standard for your operating system.
</P>
<br><a name="SEC9" href="#TOC1">WHAT \R MATCHES</a><br>
<br><a name="SEC10" href="#TOC1">WHAT \R MATCHES</a><br>
<P>
By default, the sequence \R in a pattern matches any Unicode newline sequence,
whatever has been selected as the line ending sequence. If you specify
@ -224,7 +241,7 @@ the default is changed so that \R matches only CR, LF, or CRLF. Whatever is
selected when PCRE is built can be overridden when the library functions are
called.
</P>
<br><a name="SEC10" href="#TOC1">POSIX MALLOC USAGE</a><br>
<br><a name="SEC11" href="#TOC1">POSIX MALLOC USAGE</a><br>
<P>
When the 8-bit library is called through the POSIX interface (see the
<a href="pcreposix.html"><b>pcreposix</b></a>
@ -240,7 +257,7 @@ such as
</pre>
to the <b>configure</b> command.
</P>
<br><a name="SEC11" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
<br><a name="SEC12" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
<P>
Within a compiled pattern, offset values are used to point from one part to
another (for example, from an opening parenthesis to an alternation
@ -259,7 +276,7 @@ longer offsets slows down the operation of PCRE because it has to load
additional data when handling them. For the 32-bit library the value is always
4 and cannot be overridden; the value of --with-link-size is ignored.
</P>
<br><a name="SEC12" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
<br><a name="SEC13" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
<P>
When matching with the <b>pcre_exec()</b> function, PCRE implements backtracking
by making recursive calls to an internal function called <b>match()</b>. In
@ -290,7 +307,7 @@ perform better than <b>malloc()</b> and <b>free()</b>. PCRE runs noticeably more
slowly when built in this way. This option affects only the <b>pcre_exec()</b>
function; it is not relevant for <b>pcre_dfa_exec()</b>.
</P>
<br><a name="SEC13" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br>
<br><a name="SEC14" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br>
<P>
Internally, PCRE has a function called <b>match()</b>, which it calls repeatedly
(sometimes recursively) when matching a pattern with the <b>pcre_exec()</b>
@ -319,7 +336,7 @@ constraints. However, you can set a lower limit by adding, for example,
</pre>
to the <b>configure</b> command. This value can also be overridden at run time.
</P>
<br><a name="SEC14" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
<br><a name="SEC15" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
<P>
PCRE uses fixed tables for processing characters whose code values are less
than 256. By default, PCRE is built with a set of tables that are distributed
@ -336,7 +353,7 @@ compiling, because <b>dftables</b> is run on the local host. If you need to
create alternative tables when cross compiling, you will have to do so "by
hand".)
</P>
<br><a name="SEC15" href="#TOC1">USING EBCDIC CODE</a><br>
<br><a name="SEC16" href="#TOC1">USING EBCDIC CODE</a><br>
<P>
PCRE assumes by default that it will run in an environment where the character
code is ASCII (or Unicode, which is a superset of ASCII). This is the case for
@ -367,7 +384,7 @@ The options that select newline behaviour, such as --enable-newline-is-cr,
and equivalent run-time options, refer to these character values in an EBCDIC
environment.
</P>
<br><a name="SEC16" href="#TOC1">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
<br><a name="SEC17" href="#TOC1">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
<P>
By default, <b>pcregrep</b> reads all files as plain text. You can build it so
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
@ -380,7 +397,7 @@ to the <b>configure</b> command. These options naturally require that the
relevant libraries are installed on your system. Configuration will fail if
they are not.
</P>
<br><a name="SEC17" href="#TOC1">PCREGREP BUFFER SIZE</a><br>
<br><a name="SEC18" href="#TOC1">PCREGREP BUFFER SIZE</a><br>
<P>
<b>pcregrep</b> uses an internal buffer to hold a "window" on the file it is
scanning, in order to be able to output "before" and "after" lines when it
@ -395,7 +412,7 @@ parameter value by adding, for example,
to the <b>configure</b> command. The caller of \fPpcregrep\fP can, however,
override this value by specifying a run-time option.
</P>
<br><a name="SEC18" href="#TOC1">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a><br>
<br><a name="SEC19" href="#TOC1">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a><br>
<P>
If you add
<pre>
@ -426,7 +443,7 @@ automatically included, you may need to add something like
</pre>
immediately before the <b>configure</b> command.
</P>
<br><a name="SEC19" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
<br><a name="SEC20" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
<P>
By adding the
<pre>
@ -436,7 +453,7 @@ option to to the <b>configure</b> command, PCRE will use valgrind annotations
to mark certain memory regions as unaddressable. This allows it to detect
invalid memory accesses, and is mostly useful for debugging PCRE itself.
</P>
<br><a name="SEC20" href="#TOC1">CODE COVERAGE REPORTING</a><br>
<br><a name="SEC21" href="#TOC1">CODE COVERAGE REPORTING</a><br>
<P>
If your C compiler is gcc, you can build a version of PCRE that can generate a
code coverage report for its test suite. To enable this, you must install
@ -493,11 +510,11 @@ This cleans all coverage data including the generated coverage report. For more
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
documentation.
</P>
<br><a name="SEC21" href="#TOC1">SEE ALSO</a><br>
<br><a name="SEC22" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcreapi</b>(3), <b>pcre16</b>, <b>pcre32</b>, <b>pcre_config</b>(3).
</P>
<br><a name="SEC22" href="#TOC1">AUTHOR</a><br>
<br><a name="SEC23" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
@ -506,11 +523,11 @@ University Computing Service
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC23" href="#TOC1">REVISION</a><br>
<br><a name="SEC24" href="#TOC1">REVISION</a><br>
<P>
Last updated: 30 October 2012
Last updated: 12 May 2013
<br>
Copyright &copy; 1997-2012 University of Cambridge.
Copyright &copy; 1997-2013 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.

View File

@ -64,23 +64,63 @@ it is processed as if it were
<br>
<br>
Notice that there is a callout before and after each parenthesis and
alternation bar. Automatic callouts can be used for tracking the progress of
pattern matching. The
<a href="pcretest.html"><b>pcretest</b></a>
command has an option that sets automatic callouts; when it is used, the output
indicates how the pattern is matched. This is useful information when you are
trying to optimize the performance of a particular pattern.
alternation bar. If the pattern contains a conditional group whose condition is
an assertion, an automatic callout is inserted immediately before the
condition. Such a callout may also be inserted explicitly, for example:
<pre>
(?(?C9)(?=a)ab|de)
</pre>
This applies only to assertion conditions (because they are themselves
independent groups).
</P>
<P>
The use of callouts in a pattern makes it ineligible for optimization by the
just-in-time compiler. Studying such a pattern with the PCRE_STUDY_JIT_COMPILE
option always fails.
Automatic callouts can be used for tracking the progress of pattern matching.
The
<a href="pcretest.html"><b>pcretest</b></a>
program has a pattern qualifier (/C) that sets automatic callouts; when it is
used, the output indicates how the pattern is being matched. This is useful
information when you are trying to optimize the performance of a particular
pattern.
</P>
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
<P>
You should be aware that, because of optimizations in the way PCRE matches
patterns by default, callouts sometimes do not happen. For example, if the
pattern is
You should be aware that, because of optimizations in the way PCRE compiles and
matches patterns, callouts sometimes do not happen exactly as you might expect.
</P>
<P>
At compile time, PCRE "auto-possessifies" repeated items when it knows that
what follows cannot be part of the repeat. For example, a+[bc] is compiled as
if it were a++[bc]. The <b>pcretest</b> output when this pattern is anchored and
then applied with automatic callouts to the string "aaaa" is:
<pre>
---&#62;aaaa
+0 ^ ^
+1 ^ a+
+3 ^ ^ [bc]
No match
</pre>
This indicates that when matching [bc] fails, there is no backtracking into a+
and therefore the callouts that would be taken for the backtracks do not occur.
You can disable the auto-possessify feature by passing PCRE_NO_AUTO_POSSESS
to <b>pcre_compile()</b>, or starting the pattern with (*NO_AUTO_POSSESS). If
this is done in <b>pcretest</b> (using the /O qualifier), the output changes to
this:
<pre>
---&#62;aaaa
+0 ^ ^
+1 ^ a+
+3 ^ ^ [bc]
+3 ^ ^ [bc]
+3 ^ ^ [bc]
+3 ^^ [bc]
No match
</pre>
This time, when matching [bc] fails, the matcher backtracks into a+ and tries
again, repeatedly, until a+ itself fails.
</P>
<P>
Other optimizations that provide fast "no match" results also affect callouts.
For example, if the pattern is
<pre>
ab(?C4)cd
</pre>
@ -104,11 +144,11 @@ callouts such as the example above are obeyed.
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
<P>
During matching, when PCRE reaches a callout point, the external function
defined by <i>pcre_callout</i> or <i>pcre[16|32]_callout</i> is called
(if it is set). This applies to both normal and DFA matching. The only
argument to the callout function is a pointer to a <b>pcre_callout</b>
or <b>pcre[16|32]_callout</b> block.
These structures contains the following fields:
defined by <i>pcre_callout</i> or <i>pcre[16|32]_callout</i> is called (if it is
set). This applies to both normal and DFA matching. The only argument to the
callout function is a pointer to a <b>pcre_callout</b> or
<b>pcre[16|32]_callout</b> block. These structures contains the following
fields:
<pre>
int <i>version</i>;
int <i>callout_number</i>;
@ -141,10 +181,10 @@ automatically generated callouts).
<P>
The <i>offset_vector</i> field is a pointer to the vector of offsets that was
passed by the caller to the matching function. When <b>pcre_exec()</b> or
<b>pcre[16|32]_exec()</b> is used, the contents can be inspected, in order to extract
substrings that have been matched so far, in the same way as for extracting
substrings after a match has completed. For the DFA matching functions, this
field is not useful.
<b>pcre[16|32]_exec()</b> is used, the contents can be inspected, in order to
extract substrings that have been matched so far, in the same way as for
extracting substrings after a match has completed. For the DFA matching
functions, this field is not useful.
</P>
<P>
The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
@ -171,8 +211,10 @@ functions are used, because they do not support captured substrings.
</P>
<P>
The <i>capture_last</i> field contains the number of the most recently captured
substring. If no substrings have been captured, its value is -1. This is always
the case for the DFA matching functions.
substring. However, when a recursion exits, the value reverts to what it was
outside the recursion, as do the values of all captured substrings. If no
substrings have been captured, the value of <i>capture_last</i> is -1. This is
always the case for the DFA matching functions.
</P>
<P>
The <i>callout_data</i> field contains a value that is passed to a matching
@ -203,11 +245,12 @@ same callout number. However, they are set for all callouts.
</P>
<P>
The <i>mark</i> field is present from version 2 of the callout structure. In
callouts from <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> it contains a pointer to
the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
(*THEN) item in the match, or NULL if no such items have been passed. Instances
of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
callouts from the DFA matching functions this field always contains NULL.
callouts from <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> it contains a
pointer to the zero-terminated name of the most recently passed (*MARK),
(*PRUNE), or (*THEN) item in the match, or NULL if no such items have been
passed. Instances of (*PRUNE) or (*THEN) without a name do not obliterate a
previous (*MARK). In callouts from the DFA matching functions this field always
contains NULL.
</P>
<br><a name="SEC5" href="#TOC1">RETURN VALUES</a><br>
<P>
@ -234,9 +277,9 @@ Cambridge CB2 3QH, England.
</P>
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
<P>
Last updated: 24 June 2012
Last updated: 12 November 2013
<br>
Copyright &copy; 1997-2012 University of Cambridge.
Copyright &copy; 1997-2013 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.

View File

@ -36,10 +36,8 @@ these do not seem to have any use.
</P>
<P>
3. Capturing subpatterns that occur inside negative lookahead assertions are
counted, but their entries in the offsets vector are never set. Perl sets its
numerical variables from any such patterns that are matched before the
assertion fails to match something (thereby succeeding), but only if the
negative lookahead assertion contains just one branch.
counted, but their entries in the offsets vector are never set. Perl sometimes
(but not always) sets its numerical variables from inside negative assertions.
</P>
<P>
4. Though binary zero characters are supported in the subject string, they are
@ -102,24 +100,32 @@ in the
page.
</P>
<P>
10. If any of the backtracking control verbs are used in an assertion or in a
subpattern that is called as a subroutine (whether or not recursively), their
effect is confined to that subpattern; it does not extend to the surrounding
pattern. This is not always the case in Perl. In particular, if (*THEN) is
present in a group that is called as a subroutine, its action is limited to
that group, even if the group does not contain any | characters. There is one
exception to this: the name from a *(MARK), (*PRUNE), or (*THEN) that is
encountered in a successful positive assertion <i>is</i> passed back when a
match succeeds (compare capturing parentheses in assertions). Note that such
subpatterns are processed as anchored at the point where they are tested.
10. If any of the backtracking control verbs are used in a subpattern that is
called as a subroutine (whether or not recursively), their effect is confined
to that subpattern; it does not extend to the surrounding pattern. This is not
always the case in Perl. In particular, if (*THEN) is present in a group that
is called as a subroutine, its action is limited to that group, even if the
group does not contain any | characters. Note that such subpatterns are
processed as anchored at the point where they are tested.
</P>
<P>
11. There are some differences that are concerned with the settings of captured
11. If a pattern contains more than one backtracking control verb, the first
one that is backtracked onto acts. For example, in the pattern
A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
same as PCRE, but there are examples where it differs.
</P>
<P>
12. Most backtracking verbs in assertions have their normal actions. They are
not confined to the assertion.
</P>
<P>
13. There are some differences that are concerned with the settings of captured
strings when part of a pattern is repeated. For example, matching "aba" against
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b".
</P>
<P>
12. PCRE's handling of duplicate subpattern numbers and duplicate subpattern
14. PCRE's handling of duplicate subpattern numbers and duplicate subpattern
names is not as general as Perl's. This is a consequence of the fact the PCRE
works internally just with numbers, using an external table to translate
between numbers and names. In particular, a pattern such as (?|(?&#60;a&#62;A)|(?&#60;b)B),
@ -130,13 +136,26 @@ names map to capturing subpattern number 1. To avoid this confusing situation,
an error is given at compile time.
</P>
<P>
13. Perl recognizes comments in some places that PCRE does not, for example,
15. Perl recognizes comments in some places that PCRE does not, for example,
between the ( and ? at the start of a subpattern. If the /x modifier is set,
Perl allows white space between ( and ? but PCRE never does, even if the
PCRE_EXTENDED option is set.
Perl allows white space between ( and ? (though current Perls warn that this is
deprecated) but PCRE never does, even if the PCRE_EXTENDED option is set.
</P>
<P>
14. PCRE provides some extensions to the Perl regular expression facilities.
16. Perl, when in warning mode, gives warnings for character classes such as
[A-\d] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE has no
warning features, so it gives an error in these cases because they are almost
certainly user mistakes.
</P>
<P>
17. In PCRE, the upper/lower case character properties Lu and Ll are not
affected when case-independent matching is specified. For example, \p{Lu}
always matches an upper case letter. I think Perl has changed in this respect;
in the release at the time of writing (5.16), \p{Lu} and \p{Ll} match all
letters, regardless of case, when case independence is specified.
</P>
<P>
18. PCRE provides some extensions to the Perl regular expression facilities.
Perl 5.10 includes new features that are not in earlier versions of Perl, some
of which (such as named parentheses) have been in PCRE for some time. This list
is with respect to Perl 5.10:
@ -207,9 +226,9 @@ Cambridge CB2 3QH, England.
REVISION
</b><br>
<P>
Last updated: 25 August 2012
Last updated: 10 November 2013
<br>
Copyright &copy; 1997-2012 University of Cambridge.
Copyright &copy; 1997-2013 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.

View File

@ -37,8 +37,10 @@ man page, in case the conversion went wrong.
<b>pcregrep</b> searches files for character patterns, in the same way as other
grep commands do, but it uses the PCRE regular expression library to support
patterns that are compatible with the regular expressions of Perl 5. See
<a href="pcresyntax.html"><b>pcresyntax</b>(3)</a>
for a quick-reference summary of pattern syntax, or
<a href="pcrepattern.html"><b>pcrepattern</b>(3)</a>
for a full description of syntax and semantics of the regular expressions
for a full description of the syntax and semantics of the regular expressions
that PCRE supports.
</P>
<P>
@ -748,9 +750,9 @@ Cambridge CB2 3QH, England.
</P>
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
<P>
Last updated: 13 September 2012
Last updated: 03 April 2014
<br>
Copyright &copy; 1997-2012 University of Cambridge.
Copyright &copy; 1997-2014 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.

View File

@ -172,15 +172,9 @@ PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and
PCRE_PARTIAL_SOFT.
</P>
<P>
The unsupported pattern items are:
<pre>
\C match a single byte; not supported in UTF-8 mode
(?Cn) callouts
(*PRUNE) )
(*SKIP) ) backtracking control verbs
(*THEN) )
</pre>
Support for some of these may be added in future.
The only unsupported pattern items are \C (match a single data unit) when
running in a UTF mode, and a callout immediately before an assertion condition
in a conditional group.
</P>
<br><a name="SEC6" href="#TOC1">RETURN VALUES FROM JIT EXECUTION</a><br>
<P>
@ -449,9 +443,9 @@ Cambridge CB2 3QH, England.
</P>
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
<P>
Last updated: 31 October 2012
Last updated: 17 March 2013
<br>
Copyright &copy; 1997-2012 University of Cambridge.
Copyright &copy; 1997-2013 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.

View File

@ -21,9 +21,10 @@ practice be relevant.
</P>
<P>
The maximum length of a compiled pattern is approximately 64K data units (bytes
for the 8-bit library, 32-bit units for the 32-bit library, and 32-bit units for
the 32-bit library) if PCRE is compiled with the default internal linkage size
of 2 bytes. If you want to process regular expressions that are truly enormous,
for the 8-bit library, 16-bit units for the 16-bit library, and 32-bit units for
the 32-bit library) if PCRE is compiled with the default internal linkage size,
which is 2 bytes for the 8-bit and 16-bit libraries, and 4 bytes for the 32-bit
library. If you want to process regular expressions that are truly enormous,
you can compile PCRE with an internal linkage size of 3 or 4 (when building the
16-bit or 32-bit library, 3 is rounded up to 4). See the <b>README</b> file in
the source distribution and the
@ -36,7 +37,10 @@ All values in repeating quantifiers must be less than 65536.
</P>
<P>
There is no limit to the number of parenthesized subpatterns, but there can be
no more than 65535 capturing subpatterns.
no more than 65535 capturing subpatterns. There is, however, a limit to the
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
order to limit the amount of system stack used at compile time. The limit can
be specified when PCRE is built; the default is 250.
</P>
<P>
There is a limit to the number of forward references to subsequent subpatterns
@ -50,7 +54,7 @@ maximum number of named subpatterns is 10000.
</P>
<P>
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit library.
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
</P>
<P>
The maximum length of a subject string is the largest positive number that an
@ -77,9 +81,9 @@ Cambridge CB2 3QH, England.
REVISION
</b><br>
<P>
Last updated: 04 May 2012
Last updated: 05 November 2013
<br>
Copyright &copy; 1997-2012 University of Cambridge.
Copyright &copy; 1997-2013 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.

View File

@ -126,6 +126,15 @@ character of the subject. The algorithm does not automatically move on to find
matches that start at later positions.
</P>
<P>
PCRE's "auto-possessification" optimization usually applies to character
repeats at the end of a pattern (as well as internally). For example, the
pattern "a\d+" is compiled as if it were "a\d++" because there is no point
even considering the possibility of backtracking into the repeated digits. For
DFA matching, this means that only one possible match is found. If you really
do want multiple matches in such cases, either use an ungreedy repeat
("a\d+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
</P>
<P>
There are a number of features of PCRE regular expressions that are not
supported by the alternative matching algorithm. They are as follows:
</P>
@ -224,7 +233,7 @@ Cambridge CB2 3QH, England.
</P>
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
<P>
Last updated: 08 January 2012
Last updated: 12 November 2013
<br>
Copyright &copy; 1997-2012 University of Cambridge.
<br>

View File

@ -81,33 +81,36 @@ strings. This optimization is also disabled for partial matching.
<br><a name="SEC2" href="#TOC1">PARTIAL MATCHING USING pcre_exec() OR pcre[16|32]_exec()</a><br>
<P>
A partial match occurs during a call to <b>pcre_exec()</b> or
<b>pcre[16|32]_exec()</b> when the end of the subject string is reached successfully,
but matching cannot continue because more characters are needed. However, at
least one character in the subject must have been inspected. This character
need not form part of the final matched string; lookbehind assertions and the
\K escape sequence provide ways of inspecting characters before the start of a
matched substring. The requirement for inspecting at least one character exists
because an empty string can always be matched; without such a restriction there
would always be a partial match of an empty string at the end of the subject.
<b>pcre[16|32]_exec()</b> when the end of the subject string is reached
successfully, but matching cannot continue because more characters are needed.
However, at least one character in the subject must have been inspected. This
character need not form part of the final matched string; lookbehind assertions
and the \K escape sequence provide ways of inspecting characters before the
start of a matched substring. The requirement for inspecting at least one
character exists because an empty string can always be matched; without such a
restriction there would always be a partial match of an empty string at the end
of the subject.
</P>
<P>
If there are at least two slots in the offsets vector when a partial match is
returned, the first slot is set to the offset of the earliest character that
was inspected. For convenience, the second offset points to the end of the
subject so that a substring can easily be identified.
subject so that a substring can easily be identified. If there are at least
three slots in the offsets vector, the third slot is set to the offset of the
character where matching started.
</P>
<P>
For the majority of patterns, the first offset identifies the start of the
partially matched string. However, for patterns that contain lookbehind
assertions, or \K, or begin with \b or \B, earlier characters have been
inspected while carrying out the match. For example:
For the majority of patterns, the contents of the first and third slots will be
the same. However, for patterns that contain lookbehind assertions, or begin
with \b or \B, characters before the one where matching started may have been
inspected while carrying out the match. For example, consider this pattern:
<pre>
/(?&#60;=abc)123/
</pre>
This pattern matches "123", but only if it is preceded by "abc". If the subject
string is "xyzabc12", the offsets after a partial match are for the substring
"abc12", because all these characters are needed if another match is tried
with extra characters added to the subject.
string is "xyzabc12", the first two offsets after a partial match are for the
substring "abc12", because all these characters were inspected. However, the
third offset is set to 6, because that is the offset where matching began.
</P>
<P>
What happens when a partial match is identified depends on which of the two
@ -303,6 +306,16 @@ not retain the previously partially-matched string. It is up to the calling
program to do that if it needs to.
</P>
<P>
That means that, for an unanchored pattern, if a continued match fails, it is
not possible to try again at a new starting point. All this facility is capable
of doing is continuing with the previous match attempt. In the previous
example, if the second set of data is "ug23" the result is no match, even
though there would be a match for "aug23" if the entire string were given at
once. Depending on the application, this may or may not be what you want.
The only way to allow for starting again at the next character is to retain the
matched part of the subject and try a new complete match.
</P>
<P>
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
facility can be used to pass very long subject strings to the DFA matching
@ -334,10 +347,9 @@ processing time is needed.
<P>
<b>Note:</b> If the pattern contains lookbehind assertions, or \K, or starts
with \b or \B, the string that is returned for a partial match includes
characters that precede the partially matched string itself, because these must
be retained when adding on more characters for a subsequent matching attempt.
However, in some cases you may need to retain even earlier characters, as
discussed in the next section.
characters that precede the start of what would be returned for a complete
match, because it contains all the characters that were inspected during the
partial match.
</P>
<br><a name="SEC9" href="#TOC1">ISSUES WITH MULTI-SEGMENT MATCHING</a><br>
<P>
@ -356,12 +368,35 @@ includes the effect of PCRE_NOTEOL.
offsets that are returned for a partial match. However a lookbehind assertion
later in the pattern could require even earlier characters to be inspected. You
can handle this case by using the PCRE_INFO_MAXLOOKBEHIND option of the
<b>pcre_fullinfo()</b> or <b>pcre[16|32]_fullinfo()</b> functions to obtain the length
of the largest lookbehind in the pattern. This length is given in characters,
not bytes. If you always retain at least that many characters before the
partially matched string, all should be well. (Of course, near the start of the
subject, fewer characters may be present; in that case all characters should be
retained.)
<b>pcre_fullinfo()</b> or <b>pcre[16|32]_fullinfo()</b> functions to obtain the
length of the longest lookbehind in the pattern. This length is given in
characters, not bytes. If you always retain at least that many characters
before the partially matched string, all should be well. (Of course, near the
start of the subject, fewer characters may be present; in that case all
characters should be retained.)
</P>
<P>
From release 8.33, there is a more accurate way of deciding which characters to
retain. Instead of subtracting the length of the longest lookbehind from the
earliest inspected character (<i>offsets[0]</i>), the match start position
(<i>offsets[2]</i>) should be used, and the next match attempt started at the
<i>offsets[2]</i> character by setting the <i>startoffset</i> argument of
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>.
</P>
<P>
For example, if the pattern "(?&#60;=123)abc" is partially
matched against the string "xx123a", the three offset values returned are 2, 6,
and 5. This indicates that the matching process that gave a partial match
started at offset 5, but the characters "123a" were all inspected. The maximum
lookbehind for that pattern is 3, so taking that away from 5 shows that we need
only keep "123a", and the next match attempt can be started at offset 3 (that
is, at "a") when further characters have been added. When the match start is
not the earliest inspected character, <b>pcretest</b> shows it explicitly:
<pre>
re&#62; "(?&#60;=123)abc"
data&#62; xx123a\P\P
Partial match at offset 5: 123a
</PRE>
</P>
<P>
3. Because a partial match must always contain at least one character, what
@ -465,9 +500,9 @@ Cambridge CB2 3QH, England.
</P>
<br><a name="SEC11" href="#TOC1">REVISION</a><br>
<P>
Last updated: 24 June 2012
Last updated: 02 July 2013
<br>
Copyright &copy; 1997-2012 University of Cambridge.
Copyright &copy; 1997-2013 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.

File diff suppressed because it is too large Load Diff

View File

@ -13,7 +13,7 @@ from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">SYNOPSIS OF POSIX API</a>
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
<li><a name="TOC3" href="#SEC3">COMPILING A PATTERN</a>
<li><a name="TOC4" href="#SEC4">MATCHING NEWLINE CHARACTERS</a>
@ -23,23 +23,21 @@ man page, in case the conversion went wrong.
<li><a name="TOC8" href="#SEC8">AUTHOR</a>
<li><a name="TOC9" href="#SEC9">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS OF POSIX API</a><br>
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P>
<b>#include &#60;pcreposix.h&#62;</b>
</P>
<P>
<b>int regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b>
<b>int <i>cflags</i>);</b>
</P>
<P>
<b> int <i>cflags</i>);</b>
<br>
<br>
<b>int regexec(regex_t *<i>preg</i>, const char *<i>string</i>,</b>
<b>size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
</P>
<P>
<b>size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
<b>char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
</P>
<P>
<b> size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
<b> size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
<b> char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
<br>
<br>
<b>void regfree(regex_t *<i>preg</i>);</b>
</P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>

View File

@ -102,8 +102,8 @@ study data.
<br><a name="SEC3" href="#TOC1">RE-USING A PRECOMPILED PATTERN</a><br>
<P>
Re-using a precompiled pattern is straightforward. Having reloaded it into main
memory, called <b>pcre[16|32]_pattern_to_host_byte_order()</b> if necessary,
you pass its pointer to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> in
memory, called <b>pcre[16|32]_pattern_to_host_byte_order()</b> if necessary, you
pass its pointer to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> in
the usual way.
</P>
<P>
@ -119,6 +119,11 @@ in the
documentation.
</P>
<P>
<b>Warning:</b> The tables that <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b> use
must be the same as those that were used when the pattern was compiled. If this
is not the case, the behaviour is undefined.
</P>
<P>
If you did not provide custom character tables when the pattern was compiled,
the pointer in the compiled pattern is NULL, which causes the matching
functions to use PCRE's internal tables. Thus, you do not need to take any
@ -126,9 +131,9 @@ special action at run time in this case.
</P>
<P>
If you saved study data with the compiled pattern, you need to create your own
<b>pcre[16|32]_extra</b> data block and set the <i>study_data</i> field to point to the
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
<i>flags</i> field to indicate that study data is present. Then pass the
<b>pcre[16|32]_extra</b> data block and set the <i>study_data</i> field to point
to the reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in
the <i>flags</i> field to indicate that study data is present. Then pass the
<b>pcre[16|32]_extra</b> block to the matching function in the usual way. If the
pattern was studied for just-in-time optimization, that data cannot be saved,
and so is lost by a save/restore cycle.
@ -149,9 +154,9 @@ Cambridge CB2 3QH, England.
</P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P>
Last updated: 24 June 2012
Last updated: 12 November 2013
<br>
Copyright &copy; 1997-2012 University of Cambridge.
Copyright &copy; 1997-2013 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.

View File

@ -29,13 +29,13 @@ man page, in case the conversion went wrong.
<li><a name="TOC14" href="#SEC14">ATOMIC GROUPS</a>
<li><a name="TOC15" href="#SEC15">COMMENT</a>
<li><a name="TOC16" href="#SEC16">OPTION SETTING</a>
<li><a name="TOC17" href="#SEC17">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
<li><a name="TOC18" href="#SEC18">BACKREFERENCES</a>
<li><a name="TOC19" href="#SEC19">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
<li><a name="TOC20" href="#SEC20">CONDITIONAL PATTERNS</a>
<li><a name="TOC21" href="#SEC21">BACKTRACKING CONTROL</a>
<li><a name="TOC22" href="#SEC22">NEWLINE CONVENTIONS</a>
<li><a name="TOC23" href="#SEC23">WHAT \R MATCHES</a>
<li><a name="TOC17" href="#SEC17">NEWLINE CONVENTION</a>
<li><a name="TOC18" href="#SEC18">WHAT \R MATCHES</a>
<li><a name="TOC19" href="#SEC19">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
<li><a name="TOC20" href="#SEC20">BACKREFERENCES</a>
<li><a name="TOC21" href="#SEC21">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
<li><a name="TOC22" href="#SEC22">CONDITIONAL PATTERNS</a>
<li><a name="TOC23" href="#SEC23">BACKTRACKING CONTROL</a>
<li><a name="TOC24" href="#SEC24">CALLOUTS</a>
<li><a name="TOC25" href="#SEC25">SEE ALSO</a>
<li><a name="TOC26" href="#SEC26">AUTHOR</a>
@ -65,10 +65,14 @@ documentation. This document contains a quick-reference summary of the syntax.
\n newline (hex 0A)
\r carriage return (hex 0D)
\t tab (hex 09)
\0dd character with octal code 0dd
\ddd character with octal code ddd, or backreference
\o{ddd..} character with octal code ddd..
\xhh character with hex code hh
\x{hhh..} character with hex code hhh..
</PRE>
</pre>
Note that \0dd is always an octal code, and that \8 and \9 are the literal
characters "8" and "9".
</P>
<br><a name="SEC4" href="#TOC1">CHARACTER TYPES</a><br>
<P>
@ -92,9 +96,11 @@ documentation. This document contains a quick-reference summary of the syntax.
\W a "non-word" character
\X a Unicode extended grapheme cluster
</pre>
In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
characters, even in a UTF mode. However, this can be changed by setting the
PCRE_UCP option.
By default, \d, \s, and \w match only ASCII characters, even in UTF-8 mode
or in the 16- bit and 32-bit libraries. However, if locale-specific matching is
happening, \s and \w may also match characters with code points in the range
128-255. If the PCRE_UCP option is set, the behaviour of these escape sequences
is changed to use Unicode properties and they match many more characters.
</P>
<br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
<P>
@ -150,9 +156,13 @@ PCRE_UCP option.
<pre>
Xan Alphanumeric: union of properties L and N
Xps POSIX space: property Z or tab, NL, VT, FF, CR
Xsp Perl space: property Z or tab, NL, FF, CR
Xsp Perl space: property Z or tab, NL, VT, FF, CR
Xuc Univerally-named character: one that can be
represented by a Universal Character Name
Xwd Perl word: property Xan or underscore
</PRE>
</pre>
Perl and POSIX space are now the same. Perl added VT to its space character set
at release 5.18 and PCRE changed at release 8.34.
</P>
<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
<P>
@ -329,7 +339,8 @@ but some of them use Unicode properties if PCRE_UCP is set. You can use
<P>
<pre>
\K reset start of match
</PRE>
</pre>
\K is honoured in positive assertions, but ignored in negative ones.
</P>
<br><a name="SEC12" href="#TOC1">ALTERNATION</a><br>
<P>
@ -372,18 +383,45 @@ but some of them use Unicode properties if PCRE_UCP is set. You can use
(?x) extended (ignore white space)
(?-...) unset option(s)
</pre>
The following are recognized only at the start of a pattern or after one of the
newline-setting options with similar syntax:
The following are recognized only at the very start of a pattern or after one
of the newline or \R options with similar syntax. More than one of them may
appear.
<pre>
(*LIMIT_MATCH=d) set the match limit to d (decimal number)
(*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
(*NO_AUTO_POSSESS) no auto-possessification (PCRE_NO_AUTO_POSSESS)
(*NO_START_OPT) no start-match optimization (PCRE_NO_START_OPTIMIZE)
(*UTF8) set UTF-8 mode: 8-bit library (PCRE_UTF8)
(*UTF16) set UTF-16 mode: 16-bit library (PCRE_UTF16)
(*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32)
(*UTF) set appropriate UTF mode for the library in use
(*UCP) set PCRE_UCP (use Unicode properties for \d etc)
</pre>
Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the
limits set by the caller of pcre_exec(), not increase them.
</P>
<br><a name="SEC17" href="#TOC1">NEWLINE CONVENTION</a><br>
<P>
These are recognized only at the very start of the pattern or after option
settings with a similar syntax.
<pre>
(*CR) carriage return only
(*LF) linefeed only
(*CRLF) carriage return followed by linefeed
(*ANYCRLF) all three of the above
(*ANY) any Unicode newline sequence
</PRE>
</P>
<br><a name="SEC17" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
<br><a name="SEC18" href="#TOC1">WHAT \R MATCHES</a><br>
<P>
These are recognized only at the very start of the pattern or after option
setting with a similar syntax.
<pre>
(*BSR_ANYCRLF) CR, LF, or CRLF
(*BSR_UNICODE) any Unicode newline sequence
</PRE>
</P>
<br><a name="SEC19" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
<P>
<pre>
(?=...) positive look ahead
@ -393,7 +431,7 @@ newline-setting options with similar syntax:
</pre>
Each top-level branch of a look behind must be of a fixed length.
</P>
<br><a name="SEC18" href="#TOC1">BACKREFERENCES</a><br>
<br><a name="SEC20" href="#TOC1">BACKREFERENCES</a><br>
<P>
<pre>
\n reference by number (can be ambiguous)
@ -407,7 +445,7 @@ Each top-level branch of a look behind must be of a fixed length.
(?P=name) reference by name (Python)
</PRE>
</P>
<br><a name="SEC19" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
<br><a name="SEC21" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
<P>
<pre>
(?R) recurse whole pattern
@ -426,7 +464,7 @@ Each top-level branch of a look behind must be of a fixed length.
\g'-n' call subpattern by relative number (PCRE extension)
</PRE>
</P>
<br><a name="SEC20" href="#TOC1">CONDITIONAL PATTERNS</a><br>
<br><a name="SEC22" href="#TOC1">CONDITIONAL PATTERNS</a><br>
<P>
<pre>
(?(condition)yes-pattern)
@ -445,7 +483,7 @@ Each top-level branch of a look behind must be of a fixed length.
(?(assert)... assertion condition
</PRE>
</P>
<br><a name="SEC21" href="#TOC1">BACKTRACKING CONTROL</a><br>
<br><a name="SEC23" href="#TOC1">BACKTRACKING CONTROL</a><br>
<P>
The following act immediately they are reached:
<pre>
@ -468,27 +506,6 @@ pattern is not anchored.
(*THEN:NAME) equivalent to (*MARK:NAME)(*THEN)
</PRE>
</P>
<br><a name="SEC22" href="#TOC1">NEWLINE CONVENTIONS</a><br>
<P>
These are recognized only at the very start of the pattern or after a
(*BSR_...), (*UTF8), (*UTF16), (*UTF32) or (*UCP) option.
<pre>
(*CR) carriage return only
(*LF) linefeed only
(*CRLF) carriage return followed by linefeed
(*ANYCRLF) all three of the above
(*ANY) any Unicode newline sequence
</PRE>
</P>
<br><a name="SEC23" href="#TOC1">WHAT \R MATCHES</a><br>
<P>
These are recognized only at the very start of the pattern or after a
(*...) option that sets the newline convention or a UTF or UCP mode.
<pre>
(*BSR_ANYCRLF) CR, LF, or CRLF
(*BSR_UNICODE) any Unicode newline sequence
</PRE>
</P>
<br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
<P>
<pre>
@ -512,9 +529,9 @@ Cambridge CB2 3QH, England.
</P>
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
<P>
Last updated: 11 November 2012
Last updated: 08 January 2014
<br>
Copyright &copy; 1997-2012 University of Cambridge.
Copyright &copy; 1997-2014 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.

View File

@ -14,21 +14,22 @@ man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
<li><a name="TOC2" href="#SEC2">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
<li><a name="TOC3" href="#SEC3">COMMAND LINE OPTIONS</a>
<li><a name="TOC4" href="#SEC4">DESCRIPTION</a>
<li><a name="TOC5" href="#SEC5">PATTERN MODIFIERS</a>
<li><a name="TOC6" href="#SEC6">DATA LINES</a>
<li><a name="TOC7" href="#SEC7">THE ALTERNATIVE MATCHING FUNCTION</a>
<li><a name="TOC8" href="#SEC8">DEFAULT OUTPUT FROM PCRETEST</a>
<li><a name="TOC9" href="#SEC9">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a>
<li><a name="TOC10" href="#SEC10">RESTARTING AFTER A PARTIAL MATCH</a>
<li><a name="TOC11" href="#SEC11">CALLOUTS</a>
<li><a name="TOC12" href="#SEC12">NON-PRINTING CHARACTERS</a>
<li><a name="TOC13" href="#SEC13">SAVING AND RELOADING COMPILED PATTERNS</a>
<li><a name="TOC14" href="#SEC14">SEE ALSO</a>
<li><a name="TOC15" href="#SEC15">AUTHOR</a>
<li><a name="TOC16" href="#SEC16">REVISION</a>
<li><a name="TOC2" href="#SEC2">INPUT DATA FORMAT</a>
<li><a name="TOC3" href="#SEC3">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
<li><a name="TOC4" href="#SEC4">COMMAND LINE OPTIONS</a>
<li><a name="TOC5" href="#SEC5">DESCRIPTION</a>
<li><a name="TOC6" href="#SEC6">PATTERN MODIFIERS</a>
<li><a name="TOC7" href="#SEC7">DATA LINES</a>
<li><a name="TOC8" href="#SEC8">THE ALTERNATIVE MATCHING FUNCTION</a>
<li><a name="TOC9" href="#SEC9">DEFAULT OUTPUT FROM PCRETEST</a>
<li><a name="TOC10" href="#SEC10">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a>
<li><a name="TOC11" href="#SEC11">RESTARTING AFTER A PARTIAL MATCH</a>
<li><a name="TOC12" href="#SEC12">CALLOUTS</a>
<li><a name="TOC13" href="#SEC13">NON-PRINTING CHARACTERS</a>
<li><a name="TOC14" href="#SEC14">SAVING AND RELOADING COMPILED PATTERNS</a>
<li><a name="TOC15" href="#SEC15">SEE ALSO</a>
<li><a name="TOC16" href="#SEC16">AUTHOR</a>
<li><a name="TOC17" href="#SEC17">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P>
@ -63,25 +64,34 @@ conjunction with the test script and data files that are distributed as part of
PCRE, and are unlikely to be of use otherwise. They are all documented here,
but without much justification.
</P>
<br><a name="SEC2" href="#TOC1">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
<br><a name="SEC2" href="#TOC1">INPUT DATA FORMAT</a><br>
<P>
Input to <b>pcretest</b> is processed line by line, either by calling the C
library's <b>fgets()</b> function, or via the <b>libreadline</b> library (see
below). In Unix-like environments, <b>fgets()</b> treats any bytes other than
newline as data characters. However, in some Windows environments character 26
(hex 1A) causes an immediate end of file, and no further data is read. For
maximum portability, therefore, it is safest to use only ASCII characters in
<b>pcretest</b> input files.
</P>
<br><a name="SEC3" href="#TOC1">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
<P>
From release 8.30, two separate PCRE libraries can be built. The original one
supports 8-bit character strings, whereas the newer 16-bit library supports
character strings encoded in 16-bit units. From release 8.32, a third
library can be built, supporting character strings encoded in 32-bit units.
The <b>pcretest</b> program can be
used to test all three libraries. However, it is itself still an 8-bit program,
reading 8-bit input and writing 8-bit output. When testing the 16-bit or 32-bit
library, the patterns and data strings are converted to 16- or 32-bit format
before being passed to the PCRE library functions. Results are converted to
8-bit for output.
character strings encoded in 16-bit units. From release 8.32, a third library
can be built, supporting character strings encoded in 32-bit units. The
<b>pcretest</b> program can be used to test all three libraries. However, it is
itself still an 8-bit program, reading 8-bit input and writing 8-bit output.
When testing the 16-bit or 32-bit library, the patterns and data strings are
converted to 16- or 32-bit format before being passed to the PCRE library
functions. Results are converted to 8-bit for output.
</P>
<P>
References to functions and structures of the form <b>pcre[16|32]_xx</b> below
mean "<b>pcre_xx</b> when using the 8-bit library or <b>pcre16_xx</b> when using
the 16-bit library".
mean "<b>pcre_xx</b> when using the 8-bit library, <b>pcre16_xx</b> when using
the 16-bit library, or <b>pcre32_xx</b> when using the 32-bit library".
</P>
<br><a name="SEC3" href="#TOC1">COMMAND LINE OPTIONS</a><br>
<br><a name="SEC4" href="#TOC1">COMMAND LINE OPTIONS</a><br>
<P>
<b>-8</b>
If both the 8-bit library has been built, this option causes the 8-bit library
@ -110,23 +120,30 @@ internal form is output after compilation.
<P>
<b>-C</b>
Output the version number of the PCRE library, and all available information
about the optional features that are included, and then exit. All other options
are ignored.
about the optional features that are included, and then exit with zero exit
code. All other options are ignored.
</P>
<P>
<b>-C</b> <i>option</i>
Output information about a specific build-time option, then exit. This
functionality is intended for use in scripts such as <b>RunTest</b>. The
following options output the value indicated:
following options output the value and set the exit code as indicated:
<pre>
ebcdic-nl the code for LF (= NL) in an EBCDIC environment:
0x15 or 0x25
0 if used in an ASCII environment
linksize the internal link size (2, 3, or 4)
exit code is always 0
linksize the configured internal link size (2, 3, or 4)
exit code is set to the link size
newline the default newline setting:
CR, LF, CRLF, ANYCRLF, or ANY
exit code is always 0
bsr the default setting for what \R matches:
ANYCRLF or ANY
exit code is always 0
</pre>
The following options output 1 for true or zero for false:
The following options output 1 for true or 0 for false, and set the exit code
to the same value:
<pre>
ebcdic compiled for an EBCDIC environment
jit just-in-time support is available
@ -134,8 +151,10 @@ The following options output 1 for true or zero for false:
pcre32 the 32-bit library was built
pcre8 the 8-bit library was built
ucp Unicode property support is available
utf UTF-8 and/or UTF-16 and/or UTF-32 support is available
</PRE>
utf UTF-8 and/or UTF-16 and/or UTF-32 support
is available
</pre>
If an unknown option is given, an error message is output; the exit code is 0.
</P>
<P>
<b>-d</b>
@ -171,6 +190,11 @@ equivalent to adding <b>/M</b> to each regular expression. The size is given in
bytes for both libraries.
</P>
<P>
<b>-O</b>
Behave as if each pattern has the <b>/O</b> modifier, that is disable
auto-possessification for all patterns.
</P>
<P>
<b>-o</b> <i>osize</i>
Set the number of elements in the output vector that is used when calling
<b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> to be <i>osize</i>. The
@ -240,20 +264,25 @@ should never be studied (see the <b>/S</b> pattern modifier below).
</P>
<P>
<b>-t</b>
Run each compile, study, and match many times with a timer, and output
resulting time per compile or match (in milliseconds). Do not set <b>-m</b> with
<b>-t</b>, because you will then get the size output a zillion times, and the
timing will be distorted. You can control the number of iterations that are
used for timing by following <b>-t</b> with a number (as a separate item on the
command line). For example, "-t 1000" would iterate 1000 times. The default is
to iterate 500000 times.
Run each compile, study, and match many times with a timer, and output the
resulting times per compile, study, or match (in milliseconds). Do not set
<b>-m</b> with <b>-t</b>, because you will then get the size output a zillion
times, and the timing will be distorted. You can control the number of
iterations that are used for timing by following <b>-t</b> with a number (as a
separate item on the command line). For example, "-t 1000" iterates 1000 times.
The default is to iterate 500000 times.
</P>
<P>
<b>-tm</b>
This is like <b>-t</b> except that it times only the matching phase, not the
compile or study phases.
</P>
<br><a name="SEC4" href="#TOC1">DESCRIPTION</a><br>
<P>
<b>-T</b> <b>-TM</b>
These behave like <b>-t</b> and <b>-tm</b>, but in addition, at the end of a run,
the total times for all compiles, studies, and matches are output.
</P>
<br><a name="SEC5" href="#TOC1">DESCRIPTION</a><br>
<P>
If <b>pcretest</b> is given two filename arguments, it reads from the first and
writes to the second. If it is given only one filename argument, it reads from
@ -271,7 +300,7 @@ option states whether or not <b>readline()</b> will be used.
<P>
The program handles any number of sets of input on a single input file. Each
set starts with a regular expression, and continues with any number of data
lines to be matched against the pattern.
lines to be matched against that pattern.
</P>
<P>
Each data line is matched separately and independently. If you want to do
@ -310,7 +339,7 @@ backslash, because
is interpreted as the first line of a pattern that starts with "abc/", causing
pcretest to read the next line as a continuation of the regular expression.
</P>
<br><a name="SEC5" href="#TOC1">PATTERN MODIFIERS</a><br>
<br><a name="SEC6" href="#TOC1">PATTERN MODIFIERS</a><br>
<P>
A pattern may be followed by any number of modifiers, which are mostly single
characters, though some of these can be qualified by further characters.
@ -323,6 +352,7 @@ fall into several groups that are described in detail in the following
sections.
<pre>
<b>/8</b> set UTF mode
<b>/9</b> set PCRE_NEVER_UTF (locks out UTF mode)
<b>/?</b> disable UTF validity check
<b>/+</b> show remainder of subject after match
<b>/=</b> show all captures (not just those that are set)
@ -344,7 +374,9 @@ sections.
<b>/M</b> show compiled memory size
<b>/m</b> set PCRE_MULTILINE
<b>/N</b> set PCRE_NO_AUTO_CAPTURE
<b>/O</b> set PCRE_NO_AUTO_POSSESS
<b>/P</b> use the POSIX wrapper
<b>/Q</b> test external stack check function
<b>/S</b> study the pattern after compilation
<b>/s</b> set PCRE_DOTALL
<b>/T</b> select character tables
@ -395,12 +427,14 @@ options that do not correspond to anything in Perl:
<b>/8</b> PCRE_UTF32 ) when using the 32-bit
<b>/?</b> PCRE_NO_UTF32_CHECK ) library
<b>/9</b> PCRE_NEVER_UTF
<b>/A</b> PCRE_ANCHORED
<b>/C</b> PCRE_AUTO_CALLOUT
<b>/E</b> PCRE_DOLLAR_ENDONLY
<b>/f</b> PCRE_FIRSTLINE
<b>/J</b> PCRE_DUPNAMES
<b>/N</b> PCRE_NO_AUTO_CAPTURE
<b>/O</b> PCRE_NO_AUTO_POSSESS
<b>/U</b> PCRE_UNGREEDY
<b>/W</b> PCRE_UCP
<b>/X</b> PCRE_EXTRA
@ -504,7 +538,10 @@ below.
The <b>/I</b> modifier requests that <b>pcretest</b> output information about the
compiled pattern (whether it is anchored, has a fixed first character, and
so on). It does this by calling <b>pcre[16|32]_fullinfo()</b> after compiling a
pattern. If the pattern is studied, the results of that are also output.
pattern. If the pattern is studied, the results of that are also output. In
this output, the word "char" means a non-UTF character, that is, the value of a
single data item (8-bit, 16-bit, or 32-bit, depending on the library that is
being tested).
</P>
<P>
The <b>/K</b> modifier requests <b>pcretest</b> to show names from backtracking
@ -538,14 +575,22 @@ successfully studied with the PCRE_STUDY_JIT_COMPILE option, the size of the
JIT compiled code is also output.
</P>
<P>
The <b>/Q</b> modifier is used to test the use of <b>pcre_stack_guard</b>. It
must be followed by '0' or '1', specifying the return code to be given from an
external function that is passed to PCRE and used for stack checking during
compilation (see the
<a href="pcreapi.html"><b>pcreapi</b></a>
documentation for details).
</P>
<P>
The <b>/S</b> modifier causes <b>pcre[16|32]_study()</b> to be called after the
expression has been compiled, and the results used when the expression is
matched. There are a number of qualifying characters that may follow <b>/S</b>.
They may appear in any order.
</P>
<P>
If <b>S</b> is followed by an exclamation mark, <b>pcre[16|32]_study()</b> is called
with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
If <b>/S</b> is followed by an exclamation mark, <b>pcre[16|32]_study()</b> is
called with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
<b>pcre_extra</b> block, even when studying discovers no useful information.
</P>
<P>
@ -624,7 +669,38 @@ function:
The <b>/+</b> modifier works as described above. All other modifiers are
ignored.
</P>
<br><a name="SEC6" href="#TOC1">DATA LINES</a><br>
<br><b>
Locking out certain modifiers
</b><br>
<P>
PCRE can be compiled with or without support for certain features such as
UTF-8/16/32 or Unicode properties. Accordingly, the standard tests are split up
into a number of different files that are selected for running depending on
which features are available. When updating the tests, it is all too easy to
put a new test into the wrong file by mistake; for example, to put a test that
requires UTF support into a file that is used when it is not available. To help
detect such mistakes as early as possible, there is a facility for locking out
specific modifiers. If an input line for <b>pcretest</b> starts with the string
"&#60; forbid " the following sequence of characters is taken as a list of
forbidden modifiers. For example, in the test files that must not use UTF or
Unicode property support, this line appears:
<pre>
&#60; forbid 8W
</pre>
This locks out the /8 and /W modifiers. An immediate error is given if they are
subsequently encountered. If the character string contains &#60; but not &#62;, all the
multi-character modifiers that begin with &#60; are locked out. Otherwise, such
modifiers must be explicitly listed, for example:
<pre>
&#60; forbid &#60;JS&#62;&#60;cr&#62;
</pre>
There must be a single space between &#60; and "forbid" for this feature to be
recognised. If there is not, the line is interpreted either as a request to
re-load a pre-compiled pattern (see "SAVING AND RELOADING COMPILED PATTERNS"
below) or, if there is a another &#60; character, as a pattern that uses &#60; as its
delimiter.
</P>
<br><a name="SEC7" href="#TOC1">DATA LINES</a><br>
<P>
Before each data line is passed to <b>pcre[16|32]_exec()</b>, leading and trailing
white space is removed, and it is then scanned for \ escapes. Some of these
@ -644,6 +720,7 @@ recognized:
\v vertical tab (\x0b)
\nnn octal character (up to 3 octal digits); always
a byte unless &#62; 255 in UTF-8 or 16-bit or 32-bit mode
\o{dd...} octal character (any number of octal digits}
\xhh hexadecimal byte (up to 2 hex digits)
\x{hh...} hexadecimal character (any number of hex digits)
\A pass the PCRE_ANCHORED option to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b>
@ -748,7 +825,7 @@ API to be used, the only option-setting sequences that have any effect are \B,
\N, and \Z, causing REG_NOTBOL, REG_NOTEMPTY, and REG_NOTEOL, respectively,
to be passed to <b>regexec()</b>.
</P>
<br><a name="SEC7" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
<br><a name="SEC8" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
<P>
By default, <b>pcretest</b> uses the standard PCRE matching function,
<b>pcre[16|32]_exec()</b> to match each data line. PCRE also supports an
@ -765,7 +842,7 @@ This function finds all possible matches at a given point. If, however, the \F
escape sequence is present in the data line, it stops after the first match is
found. This is always the shortest possible match.
</P>
<br><a name="SEC8" href="#TOC1">DEFAULT OUTPUT FROM PCRETEST</a><br>
<br><a name="SEC9" href="#TOC1">DEFAULT OUTPUT FROM PCRETEST</a><br>
<P>
This section describes the output when the normal matching function,
<b>pcre[16|32]_exec()</b>, is being used.
@ -856,7 +933,7 @@ prompt is used for continuations), data lines may not. However newlines can be
included in data by means of the \n escape (or \r, \r\n, etc., depending on
the newline sequence setting).
</P>
<br><a name="SEC9" href="#TOC1">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a><br>
<br><a name="SEC10" href="#TOC1">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a><br>
<P>
When the alternative matching function, <b>pcre[16|32]_dfa_exec()</b>, is used (by
means of the \D escape sequence or the <b>-dfa</b> command line option), the
@ -892,7 +969,7 @@ at the end of the longest match. For example:
Since the matching function does not support substring capture, the escape
sequences that are concerned with captured substrings are not relevant.
</P>
<br><a name="SEC10" href="#TOC1">RESTARTING AFTER A PARTIAL MATCH</a><br>
<br><a name="SEC11" href="#TOC1">RESTARTING AFTER A PARTIAL MATCH</a><br>
<P>
When the alternative matching function has given the PCRE_ERROR_PARTIAL return,
indicating that the subject partially matched the pattern, you can restart the
@ -909,7 +986,7 @@ For further information about partial matching, see the
<a href="pcrepartial.html"><b>pcrepartial</b></a>
documentation.
</P>
<br><a name="SEC11" href="#TOC1">CALLOUTS</a><br>
<br><a name="SEC12" href="#TOC1">CALLOUTS</a><br>
<P>
If the pattern contains any callout requests, <b>pcretest</b>'s callout function
is called during matching. This works with both matching functions. By default,
@ -970,7 +1047,7 @@ the
<a href="pcrecallout.html"><b>pcrecallout</b></a>
documentation.
</P>
<br><a name="SEC12" href="#TOC1">NON-PRINTING CHARACTERS</a><br>
<br><a name="SEC13" href="#TOC1">NON-PRINTING CHARACTERS</a><br>
<P>
When <b>pcretest</b> is outputting text in the compiled version of a pattern,
bytes other than 32-126 are always treated as non-printing characters are are
@ -982,7 +1059,7 @@ string, it behaves in the same way, unless a different locale has been set for
the pattern (using the <b>/L</b> modifier). In this case, the <b>isprint()</b>
function to distinguish printing and non-printing characters.
</P>
<br><a name="SEC13" href="#TOC1">SAVING AND RELOADING COMPILED PATTERNS</a><br>
<br><a name="SEC14" href="#TOC1">SAVING AND RELOADING COMPILED PATTERNS</a><br>
<P>
The facilities described in this section are not available when the POSIX
interface to PCRE is being used, that is, when the <b>/P</b> pattern modifier is
@ -1013,10 +1090,9 @@ writing the file, <b>pcretest</b> expects to read a new pattern.
</P>
<P>
A saved pattern can be reloaded into <b>pcretest</b> by specifying &#60; and a file
name instead of a pattern. The name of the file must not contain a &#60; character,
as otherwise <b>pcretest</b> will interpret the line as a pattern delimited by &#60;
characters.
For example:
name instead of a pattern. There must be no space between &#60; and the file name,
which must not contain a &#60; character, as otherwise <b>pcretest</b> will
interpret the line as a pattern delimited by &#60; characters. For example:
<pre>
re&#62; &#60;/some/file
Compiled pattern loaded from /some/file
@ -1055,14 +1131,14 @@ string using a reloaded pattern is likely to cause <b>pcretest</b> to crash.
Finally, if you attempt to load a file that is not in the correct format, the
result is undefined.
</P>
<br><a name="SEC14" href="#TOC1">SEE ALSO</a><br>
<br><a name="SEC15" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre</b>(3), <b>pcre16</b>(3), <b>pcre32</b>(3), <b>pcreapi</b>(3),
<b>pcrecallout</b>(3),
<b>pcrejit</b>, <b>pcrematching</b>(3), <b>pcrepartial</b>(d),
<b>pcrepattern</b>(3), <b>pcreprecompile</b>(3).
</P>
<br><a name="SEC15" href="#TOC1">AUTHOR</a><br>
<br><a name="SEC16" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
@ -1071,11 +1147,11 @@ University Computing Service
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC16" href="#TOC1">REVISION</a><br>
<br><a name="SEC17" href="#TOC1">REVISION</a><br>
<P>
Last updated: 10 September 2012
Last updated: 09 February 2014
<br>
Copyright &copy; 1997-2012 University of Cambridge.
Copyright &copy; 1997-2014 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.

View File

@ -85,7 +85,9 @@ place. From release 7.3 of PCRE, the check is according the rules of RFC 3629,
which are themselves derived from the Unicode specification. Earlier releases
of PCRE followed the rules of RFC 2279, which allows the full range of 31-bit
values (0 to 0x7FFFFFFF). The current check allows only values in the range U+0
to U+10FFFF, excluding the surrogate area and the non-characters.
to U+10FFFF, excluding the surrogate area. (From release 8.33 the so-called
"non-character" code points are no longer excluded because Unicode corrigendum
#9 makes it clear that they should not be.)
</P>
<P>
Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
@ -96,10 +98,6 @@ surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
UTF-32.)
</P>
<P>
Also excluded are the "Non-Character" code points, which are U+FDD0 to U+FDEF
and the last two code points in each plane, U+??FFFE and U+??FFFF.
</P>
<P>
If an invalid UTF-8 string is passed to PCRE, an error return is given. At
compile time, the only additional information is the offset to the first byte
of the failing character. The run-time functions <b>pcre_exec()</b> and
@ -135,10 +133,6 @@ U+D800 to U+DFFF are independent code points. Values in the surrogate range
must be used in pairs in the correct manner.
</P>
<P>
Excluded are the "Non-Character" code points, which are U+FDD0 to U+FDEF
and the last two code points in each plane, U+??FFFE and U+??FFFF.
</P>
<P>
If an invalid UTF-16 string is passed to PCRE, an error return is given. At
compile time, the only additional information is the offset to the first data
unit of the failing character. The run-time functions <b>pcre16_exec()</b> and
@ -160,9 +154,7 @@ Validity of UTF-32 strings
When you set the PCRE_UTF32 flag, the strings of 32-bit data units that are
passed as patterns and subjects are (by default) checked for validity on entry
to the relevant functions. This check allows only values in the range U+0
to U+10FFFF, excluding the surrogate area U+D800 to U+DFFF, and the
"Non-Character" code points, which are U+FDD0 to U+FDEF and the last two
characters in each plane, U+??FFFE and U+??FFFF.
to U+10FFFF, excluding the surrogate area U+D800 to U+DFFF.
</P>
<P>
If an invalid UTF-32 string is passed to PCRE, an error return is given. At
@ -261,9 +253,9 @@ Cambridge CB2 3QH, England.
REVISION
</b><br>
<P>
Last updated: 11 November 2012
Last updated: 27 February 2013
<br>
Copyright &copy; 1997-2012 University of Cambridge.
Copyright &copy; 1997-2013 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.

View File

@ -11,27 +11,29 @@
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>Perl-compatible Regular Expressions (PCRE)</h1>
<p>
The HTML documentation for PCRE comprises the following pages:
The HTML documentation for PCRE consists of a number of pages that are listed
below in alphabetical order. If you are new to PCRE, please read the first one
first.
</p>
<table>
<tr><td><a href="pcre.html">pcre</a></td>
<td>&nbsp;&nbsp;Introductory page</td></tr>
<tr><td><a href="pcre-config.html">pcre-config</a></td>
<td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
<tr><td><a href="pcre16.html">pcre16</a></td>
<td>&nbsp;&nbsp;Discussion of the 16-bit PCRE library</td></tr>
<tr><td><a href="pcre32.html">pcre32</a></td>
<td>&nbsp;&nbsp;Discussion of the 32-bit PCRE library</td></tr>
<tr><td><a href="pcre-config.html">pcre-config</a></td>
<td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
<tr><td><a href="pcreapi.html">pcreapi</a></td>
<td>&nbsp;&nbsp;PCRE's native API</td></tr>
<tr><td><a href="pcrebuild.html">pcrebuild</a></td>
<td>&nbsp;&nbsp;Options for building PCRE</td></tr>
<td>&nbsp;&nbsp;Building PCRE</td></tr>
<tr><td><a href="pcrecallout.html">pcrecallout</a></td>
<td>&nbsp;&nbsp;The <i>callout</i> facility</td></tr>
@ -67,7 +69,7 @@ The HTML documentation for PCRE comprises the following pages:
<td>&nbsp;&nbsp;Some comments on performance</td></tr>
<tr><td><a href="pcreposix.html">pcreposix</a></td>
<td>&nbsp;&nbsp;The POSIX API to the PCRE library</td></tr>
<td>&nbsp;&nbsp;The POSIX API to the PCRE 8-bit library</td></tr>
<tr><td><a href="pcreprecompile.html">pcreprecompile</a></td>
<td>&nbsp;&nbsp;How to save and re-use compiled patterns</td></tr>
@ -118,13 +120,13 @@ functions.
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
<td>&nbsp;&nbsp;Free study data</td></tr>
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(Perl compatible)</td></tr>
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
<td>&nbsp;&nbsp;Free study data</td></tr>
<tr><td><a href="pcre_free_substring.html">pcre_free_substring</a></td>
<td>&nbsp;&nbsp;Free extracted substring</td></tr>
@ -140,14 +142,17 @@ functions.
<tr><td><a href="pcre_get_stringnumber.html">pcre_get_stringnumber</a></td>
<td>&nbsp;&nbsp;Convert captured string name to number</td></tr>
<tr><td><a href="pcre_get_stringtable_entries.html">pcre_get_stringtable_entries</a></td>
<td>&nbsp;&nbsp;Find table entries for given string name</td></tr>
<tr><td><a href="pcre_get_substring.html">pcre_get_substring</a></td>
<td>&nbsp;&nbsp;Extract numbered substring into new memory</td></tr>
<tr><td><a href="pcre_get_substring_list.html">pcre_get_substring_list</a></td>
<td>&nbsp;&nbsp;Extract all substrings into new memory</td></tr>
<tr><td><a href="pcre_info.html">pcre_info</a></td>
<td>&nbsp;&nbsp;Obsolete information extraction function</td></tr>
<tr><td><a href="pcre_jit_exec.html">pcre_jit_exec</a></td>
<td>&nbsp;&nbsp;Fast path interface to JIT matching</td></tr>
<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
<td>&nbsp;&nbsp;Create a stack for JIT matching</td></tr>

View File

@ -4,11 +4,11 @@ pcre-config - program to return PCRE configuration
.SH SYNOPSIS
.rs
.sp
.nf
.B pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
.ti +5n
.B [--libs16] [--libs32] [--libs-cpp] [--libs-posix]
.ti +5n
.B [--cflags] [--cflags-posix]
.B " [--libs16] [--libs32] [--libs-cpp] [--libs-posix]"
.B " [--cflags] [--cflags-posix]"
.fi
.
.
.SH DESCRIPTION

View File

@ -1,4 +1,4 @@
PCRE-CONFIG(1) PCRE-CONFIG(1)
PCRE-CONFIG(1) General Commands Manual PCRE-CONFIG(1)
@ -8,8 +8,8 @@ NAME
SYNOPSIS
pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
[--libs16] [--libs32] [--libs-cpp] [--libs-posix]
[--cflags] [--cflags-posix]
[--libs16] [--libs32] [--libs-cpp] [--libs-posix]
[--cflags] [--cflags-posix]
DESCRIPTION

View File

@ -1,4 +1,4 @@
.TH PCRE 3 "11 November 2012" "PCRE 8.32"
.TH PCRE 3 "08 January 2014" "PCRE 8.35"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH INTRODUCTION
@ -19,9 +19,9 @@ built. The majority of the work to make this possible was done by Zoltan
Herczeg.
.P
Starting with release 8.32 it is possible to compile a third separate PCRE
library, which supports 32-bit character strings (including
UTF-32 strings). The build process allows any set of the 8-, 16- and 32-bit
libraries. The work to make this possible was done by Christian Persch.
library that supports 32-bit character strings (including UTF-32 strings). The
build process allows any combination of the 8-, 16- and 32-bit libraries. The
work to make this possible was done by Christian Persch.
.P
The three libraries contain identical sets of functions, except that the names
in the 16-bit library start with \fBpcre16_\fP instead of \fBpcre_\fP, and the
@ -44,7 +44,7 @@ The current implementation of PCRE corresponds approximately with Perl 5.12,
including support for UTF-8/16/32 encoded strings and Unicode general category
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
enabled; it is not the default. The Unicode tables correspond to Unicode
release 6.2.0.
release 6.3.0.
.P
In addition to the Perl-compatible matching function, PCRE contains an
alternative function that matches the same compiled patterns in a different
@ -68,6 +68,7 @@ in the \fIContrib\fP directory at the primary FTP site, which is:
.\" HTML <a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre">
.\" </a>
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
.\"
.P
Details of exactly which Perl regular expression features are and are not
supported by PCRE are given in separate documents. See the
@ -95,8 +96,17 @@ available. The features themselves are described in the
\fBpcrebuild\fP
.\"
page. Documentation about building PCRE for various operating systems can be
found in the \fBREADME\fP and \fBNON-AUTOTOOLS_BUILD\fP files in the source
distribution.
found in the
.\" HTML <a href="README.txt">
.\" </a>
\fBREADME\fP
.\"
and
.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
.\" </a>
\fBNON-AUTOTOOLS_BUILD\fP
.\"
files in the source distribution.
.P
The libraries contains a number of undocumented internal functions and data
tables that are used by more than one of the exported external functions, but
@ -121,8 +131,11 @@ checked for UTF-8 validity. If the data string is very long, such a check might
use sufficiently many resources as to cause your application to lose
performance.
.P
The best way of guarding against this possibility is to use the
One way of guarding against this possibility is to use the
\fBpcre_fullinfo()\fP function to check the compiled pattern's options for UTF.
Alternatively, from release 8.33, you can set the PCRE_NEVER_UTF option at
compile time. This causes an compile time error if a pattern contains a
UTF-setting sequence.
.P
If your application is one that supports UTF, be aware that validity checking
can take time. If the same data string is to be matched many times, you can use
@ -145,15 +158,18 @@ page.
The user documentation for PCRE comprises a number of different sections. In
the "man" format, each of these is a separate "man page". In the HTML format,
each is a separate page, linked from the index page. In the plain text format,
all the sections, except the \fBpcredemo\fP section, are concatenated, for ease
of searching. The sections are as follows:
the descriptions of the \fBpcregrep\fP and \fBpcretest\fP programs are in files
called \fBpcregrep.txt\fP and \fBpcretest.txt\fP, respectively. The remaining
sections, except for the \fBpcredemo\fP section (which is a program listing),
are concatenated in \fBpcre.txt\fP, for ease of searching. The sections are as
follows:
.sp
pcre this document
pcre-config show PCRE installation configuration information
pcre16 details of the 16-bit library
pcre32 details of the 32-bit library
pcre-config show PCRE installation configuration information
pcreapi details of PCRE's native C API
pcrebuild options for building PCRE
pcrebuild building PCRE
pcrecallout details of the callout feature
pcrecompat discussion of Perl compatibility
pcrecpp details of the C++ wrapper for the 8-bit library
@ -175,8 +191,8 @@ of searching. The sections are as follows:
pcretest description of the \fBpcretest\fP testing command
pcreunicode discussion of Unicode and UTF-8/16/32 support
.sp
In addition, in the "man" and HTML formats, there is a short page for each
C library function, listing its arguments and results.
In the "man" and HTML formats, there is also a short page for each C library
function, listing its arguments and results.
.
.
.SH AUTHOR
@ -197,6 +213,6 @@ two digits 10, at the domain cam.ac.uk.
.rs
.sp
.nf
Last updated: 11 November 2012
Copyright (c) 1997-2012 University of Cambridge.
Last updated: 08 January 2014
Copyright (c) 1997-2014 University of Cambridge.
.fi

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
.TH PCRE 3 "08 November 2012" "PCRE 8.32"
.TH PCRE 3 "12 May 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.sp
@ -8,140 +8,120 @@ PCRE - Perl-compatible regular expressions
.SH "PCRE 16-BIT API BASIC FUNCTIONS"
.rs
.sp
.SM
.nf
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
.B " const unsigned char *\fItableptr\fP);"
.sp
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B int *\fIerrorcodeptr\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B " int *\fIerrorcodeptr\fP,"
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
.B " const unsigned char *\fItableptr\fP);"
.sp
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP);
.PP
.B " const char **\fIerrptr\fP);"
.sp
.B void pcre16_free_study(pcre16_extra *\fIextra\fP);
.PP
.sp
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
.PP
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
.sp
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP);
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
.fi
.
.
.SH "PCRE 16-BIT API STRING EXTRACTION FUNCTIONS"
.rs
.sp
.nf
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
.ti +5n
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
.PP
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
.B " PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);"
.sp
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
.ti +5n
.B int \fIbuffersize\fP);
.PP
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,"
.B " int \fIbuffersize\fP);"
.sp
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
.ti +5n
.B PCRE_SPTR16 *\fIstringptr\fP);
.PP
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
.B " PCRE_SPTR16 *\fIstringptr\fP);"
.sp
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIname\fP);
.PP
.B " PCRE_SPTR16 \fIname\fP);
.sp
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);
.PP
.B " PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);"
.sp
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP,
.ti +5n
.B PCRE_SPTR16 *\fIstringptr\fP);
.PP
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
.B " PCRE_SPTR16 *\fIstringptr\fP);"
.sp
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
.ti +5n
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR16 **\fIlistptr\fP);"
.PP
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR16 **\fIlistptr\fP);"
.sp
.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP);
.PP
.sp
.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP);
.fi
.
.
.SH "PCRE 16-BIT API AUXILIARY FUNCTIONS"
.rs
.sp
.nf
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
.PP
.sp
.B void pcre16_jit_stack_free(pcre16_jit_stack *\fIstack\fP);
.PP
.sp
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
.ti +5n
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
.PP
.B " pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);"
.sp
.B const unsigned char *pcre16_maketables(void);
.PP
.sp
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B " int \fIwhat\fP, void *\fIwhere\fP);"
.sp
.B int pcre16_refcount(pcre16 *\fIcode\fP, int \fIadjust\fP);
.PP
.sp
.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP);
.PP
.sp
.B const char *pcre16_version(void);
.PP
.sp
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
.ti +5n
.B pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);
.B " pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
.fi
.
.
.SH "PCRE 16-BIT API INDIRECTED FUNCTIONS"
.rs
.sp
.nf
.B void *(*pcre16_malloc)(size_t);
.PP
.sp
.B void (*pcre16_free)(void *);
.PP
.sp
.B void *(*pcre16_stack_malloc)(size_t);
.PP
.sp
.B void (*pcre16_stack_free)(void *);
.PP
.sp
.B int (*pcre16_callout)(pcre16_callout_block *);
.fi
.
.
.SH "PCRE 16-BIT API 16-BIT-ONLY FUNCTION"
.rs
.sp
.nf
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
.ti +5n
.B PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
.ti +5n
.B int \fIkeep_boms\fP);
.B " PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,"
.B " int \fIkeep_boms\fP);"
.fi
.
.
.SH "THE PCRE 16-BIT LIBRARY"
@ -246,8 +226,9 @@ buffer, including the zero terminator if the string was zero-terminated.
.SH "SUBJECT STRING OFFSETS"
.rs
.sp
The offsets within subject strings that are returned by the matching functions
are in 16-bit units rather than bytes.
The lengths and starting offsets of subject strings must be specified in 16-bit
data units, and the offsets within subject strings that are returned by the
matching functions are in also 16-bit units rather than bytes.
.
.
.SH "NAMED SUBPATTERNS"
@ -385,6 +366,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
Last updated: 08 November 2012
Copyright (c) 1997-2012 University of Cambridge.
Last updated: 12 May 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi

View File

@ -1,4 +1,4 @@
.TH PCRE 3 "08 November 2012" "PCRE 8.32"
.TH PCRE 3 "12 May 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.sp
@ -8,140 +8,119 @@ PCRE - Perl-compatible regular expressions
.SH "PCRE 32-BIT API BASIC FUNCTIONS"
.rs
.sp
.SM
.nf
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
.B " const unsigned char *\fItableptr\fP);"
.sp
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B int *\fIerrorcodeptr\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B " int *\fIerrorcodeptr\fP,"
.B " const unsigned char *\fItableptr\fP);"
.sp
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP);
.PP
.B " const char **\fIerrptr\fP);"
.sp
.B void pcre32_free_study(pcre32_extra *\fIextra\fP);
.PP
.sp
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
.PP
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
.sp
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP);
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
.fi
.
.
.SH "PCRE 32-BIT API STRING EXTRACTION FUNCTIONS"
.rs
.sp
.nf
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
.ti +5n
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
.PP
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
.B " PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);"
.sp
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
.ti +5n
.B int \fIbuffersize\fP);
.PP
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,"
.B " int \fIbuffersize\fP);"
.sp
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
.ti +5n
.B PCRE_SPTR32 *\fIstringptr\fP);
.PP
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
.B " PCRE_SPTR32 *\fIstringptr\fP);"
.sp
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIname\fP);
.PP
.B " PCRE_SPTR32 \fIname\fP);"
.sp
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
.PP
.B " PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);"
.sp
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP,
.ti +5n
.B PCRE_SPTR32 *\fIstringptr\fP);
.PP
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
.B " PCRE_SPTR32 *\fIstringptr\fP);"
.sp
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
.ti +5n
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
.PP
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR32 **\fIlistptr\fP);"
.sp
.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP);
.PP
.sp
.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP);
.fi
.
.
.SH "PCRE 32-BIT API AUXILIARY FUNCTIONS"
.rs
.sp
.nf
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
.PP
.sp
.B void pcre32_jit_stack_free(pcre32_jit_stack *\fIstack\fP);
.PP
.sp
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
.ti +5n
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
.PP
.B " pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);"
.sp
.B const unsigned char *pcre32_maketables(void);
.PP
.sp
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
.ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B " int \fIwhat\fP, void *\fIwhere\fP);"
.sp
.B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP);
.PP
.sp
.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP);
.PP
.sp
.B const char *pcre32_version(void);
.PP
.sp
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
.ti +5n
.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
.B " pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
.fi
.
.
.SH "PCRE 32-BIT API INDIRECTED FUNCTIONS"
.rs
.sp
.nf
.B void *(*pcre32_malloc)(size_t);
.PP
.sp
.B void (*pcre32_free)(void *);
.PP
.sp
.B void *(*pcre32_stack_malloc)(size_t);
.PP
.sp
.B void (*pcre32_stack_free)(void *);
.PP
.sp
.B int (*pcre32_callout)(pcre32_callout_block *);
.fi
.
.
.SH "PCRE 32-BIT API 32-BIT-ONLY FUNCTION"
.rs
.sp
.nf
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
.ti +5n
.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
.ti +5n
.B int \fIkeep_boms\fP);
.B " PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,"
.B " int \fIkeep_boms\fP);"
.fi
.
.
.SH "THE PCRE 32-BIT LIBRARY"
@ -246,8 +225,9 @@ buffer, including the zero terminator if the string was zero-terminated.
.SH "SUBJECT STRING OFFSETS"
.rs
.sp
The offsets within subject strings that are returned by the matching functions
are in 32-bit units rather than bytes.
The lengths and starting offsets of subject strings must be specified in 32-bit
data units, and the offsets within subject strings that are returned by the
matching functions are in also 32-bit units rather than bytes.
.
.
.SH "NAMED SUBPATTERNS"
@ -384,6 +364,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
Last updated: 08 November 2012
Copyright (c) 1997-2012 University of Cambridge.
Last updated: 12 May 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi

View File

@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
.ti +5n
.B pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);
.PP
.B " pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);"
.sp
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
.ti +5n
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
.PP
.B " pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);"
.sp
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
.ti +5n
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
.B " pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);"
.fi
.
.SH DESCRIPTION
.rs

View File

@ -1,4 +1,4 @@
.TH PCRE_COMPILE 3 "24 June 2012" "PCRE 8.30"
.TH PCRE_COMPILE 3 "01 October 2013" "PCRE 8.34"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
.B " const unsigned char *\fItableptr\fP);"
.sp
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
.B " const unsigned char *\fItableptr\fP);"
.sp
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
.B " const unsigned char *\fItableptr\fP);"
.fi
.
.SH DESCRIPTION
.rs
@ -56,6 +51,7 @@ The option bits are:
PCRE_FIRSTLINE Force matching to be before newline
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
PCRE_MULTILINE ^ and $ match newlines within data
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
sequences
@ -64,6 +60,8 @@ The option bits are:
PCRE_NEWLINE_LF Set LF as the newline sequence
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
theses (named ones available)
PCRE_NO_AUTO_POSSESS Disable auto-possessification
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
validity (only relevant if
PCRE_UTF16 is set)

View File

@ -1,4 +1,4 @@
.TH PCRE_COMPILE2 3 "24 June 2012" "PCRE 8.30"
.TH PCRE_COMPILE2 3 "01 October 2013" "PCRE 8.34"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B int *\fIerrorcodeptr\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B " int *\fIerrorcodeptr\fP,"
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
.B " const unsigned char *\fItableptr\fP);"
.sp
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B int *\fIerrorcodeptr\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B " int *\fIerrorcodeptr\fP,"
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
.B " const unsigned char *\fItableptr\fP);"
.sp
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B int *\fIerrorcodeptr\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.B " int *\fIerrorcodeptr\fP,£
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
.B " const unsigned char *\fItableptr\fP);"
.fi
.
.SH DESCRIPTION
.rs
@ -64,6 +56,7 @@ The option bits are:
PCRE_FIRSTLINE Force matching to be before newline
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
PCRE_MULTILINE ^ and $ match newlines within data
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
sequences
@ -72,6 +65,8 @@ The option bits are:
PCRE_NEWLINE_LF Set LF as the newline sequence
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
theses (named ones available)
PCRE_NO_AUTO_POSSESS Disable auto-possessification
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
validity (only relevant if
PCRE_UTF16 is set)

View File

@ -1,4 +1,4 @@
.TH PCRE_CONFIG 3 "24 June 2012" "PCRE 8.30"
.TH PCRE_CONFIG 3 "05 November 2013" "PCRE 8.34"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@ -33,6 +33,7 @@ point to an unsigned long integer. The available codes are:
target architecture for the JIT compiler,
or NULL if there is no JIT support
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
PCRE_CONFIG_PARENS_LIMIT Parentheses nesting limit
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
PCRE_CONFIG_MATCH_LIMIT_RECURSION
Internal recursion depth limit

View File

@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, const char *\fIstringname\fP,
.ti +5n
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
.PP
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
.B " char *\fIbuffer\fP, int \fIbuffersize\fP);"
.sp
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
.ti +5n
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
.PP
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
.B " PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);"
.sp
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
.ti +5n
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
.B " PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);"
.fi
.
.SH DESCRIPTION
.rs

View File

@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
.ti +5n
.B int \fIbuffersize\fP);
.PP
.B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,"
.B " int \fIbuffersize\fP);"
.sp
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
.ti +5n
.B int \fIbuffersize\fP);
.PP
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,"
.B " int \fIbuffersize\fP);"
.sp
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
.ti +5n
.B int \fIbuffersize\fP);
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,"
.B " int \fIbuffersize\fP);"
.fi
.
.SH DESCRIPTION
.rs

View File

@ -1,4 +1,4 @@
.TH PCRE_DFA_EXEC 3 "24 June 2012" "PCRE 8.30"
.TH PCRE_DFA_EXEC 3 "12 May 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP);
.PP
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
.sp
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP);
.PP
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
.sp
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP);
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
.fi
.
.SH DESCRIPTION
.rs
@ -44,16 +36,17 @@ are:
\fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
or is NULL
\fIsubject\fP Points to the subject string
\fIlength\fP Length of the subject string, in bytes
\fIstartoffset\fP Offset in bytes in the subject at which to
start matching
\fIlength\fP Length of the subject string
\fIstartoffset\fP Offset in the subject at which to start matching
\fIoptions\fP Option bits
\fIovector\fP Points to a vector of ints for result offsets
\fIovecsize\fP Number of elements in the vector
\fIworkspace\fP Points to a vector of ints used as working space
\fIwscount\fP Number of elements in the vector
.sp
The options are:
The units for \fIlength\fP and \fIstartoffset\fP are bytes for
\fBpcre_exec()\fP, 16-bit data items for \fBpcre16_exec()\fP, and 32-bit items
for \fBpcre32_exec()\fP. The options are:
.sp
PCRE_ANCHORED Match only at the first position
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF

View File

@ -1,4 +1,4 @@
.TH PCRE_EXEC 3 "24 June 2012" "PCRE 8.30"
.TH PCRE_EXEC 3 "12 May 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
.PP
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
.sp
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
.PP
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
.sp
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
.fi
.
.SH DESCRIPTION
.rs
@ -36,14 +31,15 @@ offsets to captured substrings. Its arguments are:
\fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
or is NULL
\fIsubject\fP Points to the subject string
\fIlength\fP Length of the subject string, in bytes
\fIstartoffset\fP Offset in bytes in the subject at which to
start matching
\fIlength\fP Length of the subject string
\fIstartoffset\fP Offset in the subject at which to start matching
\fIoptions\fP Option bits
\fIovector\fP Points to a vector of ints for result offsets
\fIovecsize\fP Number of elements in the vector (a multiple of 3)
.sp
The options are:
The units for \fIlength\fP and \fIstartoffset\fP are bytes for
\fBpcre_exec()\fP, 16-bit data items for \fBpcre16_exec()\fP, and 32-bit items
for \fBpcre32_exec()\fP. The options are:
.sp
PCRE_ANCHORED Match only at the first position
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF

View File

@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B " int \fIwhat\fP, void *\fIwhere\fP);"
.sp
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B " int \fIwhat\fP, void *\fIwhere\fP);"
.sp
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
.ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP);
.B " int \fIwhat\fP, void *\fIwhere\fP);"
.fi
.
.SH DESCRIPTION
.rs

View File

@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, const char *\fIstringname\fP,
.ti +5n
.B const char **\fIstringptr\fP);
.PP
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
.B " const char **\fIstringptr\fP);"
.sp
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
.ti +5n
.B PCRE_SPTR16 *\fIstringptr\fP);
.PP
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
.B " PCRE_SPTR16 *\fIstringptr\fP);"
.sp
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
.ti +5n
.B PCRE_SPTR32 *\fIstringptr\fP);
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
.B " PCRE_SPTR32 *\fIstringptr\fP);"
.fi
.
.SH DESCRIPTION
.rs

View File

@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIname\fP);
.PP
.B " const char *\fIname\fP);"
.sp
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIname\fP);
.PP
.B " PCRE_SPTR16 \fIname\fP);"
.sp
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIname\fP);
.B " PCRE_SPTR32 \fIname\fP);"
.fi
.
.SH DESCRIPTION
.rs

View File

@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
.PP
.B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);"
.sp
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);
.PP
.B " PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);"
.sp
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
.B " PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);"
.fi
.
.SH DESCRIPTION
.rs

View File

@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP,
.ti +5n
.B const char **\fIstringptr\fP);
.PP
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
.B " const char **\fIstringptr\fP);"
.sp
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP,
.ti +5n
.B PCRE_SPTR16 *\fIstringptr\fP);
.PP
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
.B " PCRE_SPTR16 *\fIstringptr\fP);"
.sp
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP,
.ti +5n
.B PCRE_SPTR32 *\fIstringptr\fP);
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
.B " PCRE_SPTR32 *\fIstringptr\fP);"
.fi
.
.SH DESCRIPTION
.rs

View File

@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre_get_substring_list(const char *\fIsubject\fP,
.ti +5n
.B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
.PP
.B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);"
.sp
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
.ti +5n
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR16 **\fIlistptr\fP);"
.PP
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR16 **\fIlistptr\fP);"
.sp
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
.ti +5n
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR32 **\fIlistptr\fP);"
.fi
.
.SH DESCRIPTION
.rs

View File

@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre_jit_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B pcre_jit_stack *\fIjstack\fP);
.PP
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
.B " pcre_jit_stack *\fIjstack\fP);"
.sp
.B int pcre16_jit_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B pcre_jit_stack *\fIjstack\fP);
.PP
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
.B " pcre_jit_stack *\fIjstack\fP);"
.sp
.B int pcre32_jit_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B pcre_jit_stack *\fIjstack\fP);
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
.B " pcre_jit_stack *\fIjstack\fP);"
.fi
.
.SH DESCRIPTION
.rs

View File

@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP,
.ti +5n
.B int \fImaxsize\fP);
.PP
.B " int \fImaxsize\fP);"
.sp
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP,
.ti +5n
.B int \fImaxsize\fP);
.PP
.B " int \fImaxsize\fP);"
.sp
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP,
.ti +5n
.B int \fImaxsize\fP);
.B " int \fImaxsize\fP);"
.fi
.
.SH DESCRIPTION
.rs

View File

@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP,
.ti +5n
.B pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);
.PP
.B " pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
.sp
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
.ti +5n
.B pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);
.PP
.B " pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
.sp
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
.ti +5n
.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
.B " pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
.fi
.
.SH DESCRIPTION
.rs

View File

@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP);
.PP
.B " const char **\fIerrptr\fP);"
.sp
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP);
.PP
.B " const char **\fIerrptr\fP);"
.sp
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP);
.B " const char **\fIerrptr\fP);"
.fi
.
.SH DESCRIPTION
.rs

View File

@ -6,12 +6,11 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
.ti +5n
.B PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,
.ti +5n
.B int \fIkeep_boms\fP);
.B " PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,"
.B " int \fIkeep_boms\fP);"
.fi
.
.
.SH DESCRIPTION

View File

@ -6,12 +6,11 @@ PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.PP
.SM
.nf
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
.ti +5n
.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,
.ti +5n
.B int \fIkeep_boms\fP);
.B " PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,"
.B " int \fIkeep_boms\fP);"
.fi
.
.
.SH DESCRIPTION

File diff suppressed because it is too large Load Diff

View File

@ -1,24 +1,54 @@
.TH PCREBUILD 3 "30 October 2012" "PCRE 8.32"
.TH PCREBUILD 3 "12 May 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.
.
.SH "BUILDING PCRE"
.rs
.sp
PCRE is distributed with a \fBconfigure\fP script that can be used to build the
library in Unix-like environments using the applications known as Autotools.
Also in the distribution are files to support building using \fBCMake\fP
instead of \fBconfigure\fP. The text file
.\" HTML <a href="README.txt">
.\" </a>
\fBREADME\fP
.\"
contains general information about building with Autotools (some of which is
repeated below), and also has some comments about building on various operating
systems. There is a lot more information about building PCRE without using
Autotools (including information about using \fBCMake\fP and building "by
hand") in the text file called
.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
.\" </a>
\fBNON-AUTOTOOLS-BUILD\fP.
.\"
You should consult this file as well as the
.\" HTML <a href="README.txt">
.\" </a>
\fBREADME\fP
.\"
file if you are building in a non-Unix-like environment.
.
.
.SH "PCRE BUILD-TIME OPTIONS"
.rs
.sp
This document describes the optional features of PCRE that can be selected when
the library is compiled. It assumes use of the \fBconfigure\fP script, where
the optional features are selected or deselected by providing options to
\fBconfigure\fP before running the \fBmake\fP command. However, the same
options can be selected in both Unix-like and non-Unix-like environments using
the GUI facility of \fBcmake-gui\fP if you are using \fBCMake\fP instead of
\fBconfigure\fP to build PCRE.
The rest of this document describes the optional features of PCRE that can be
selected when the library is compiled. It assumes use of the \fBconfigure\fP
script, where the optional features are selected or deselected by providing
options to \fBconfigure\fP before running the \fBmake\fP command. However, the
same options can be selected in both Unix-like and non-Unix-like environments
using the GUI facility of \fBcmake-gui\fP if you are using \fBCMake\fP instead
of \fBconfigure\fP to build PCRE.
.P
There is a lot more information about building PCRE without using
\fBconfigure\fP (including information about using \fBCMake\fP or building "by
hand") in the file called \fINON-AUTOTOOLS-BUILD\fP, which is part of the PCRE
distribution. You should consult this file as well as the \fIREADME\fP file if
you are building in a non-Unix-like environment.
If you are not using Autotools or \fBCMake\fP, option selection can be done by
editing the \fBconfig.h\fP file, or by passing parameter settings to the
compiler, as described in
.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
.\" </a>
\fBNON-AUTOTOOLS-BUILD\fP.
.\"
.P
The complete list of options for \fBconfigure\fP (which includes the standard
ones such as the selection of the installation directory) can be obtained by
@ -45,7 +75,7 @@ strings, by adding
.sp
--enable-pcre16
.sp
to the \fBconfigure\fP command. You can also build a separate
to the \fBconfigure\fP command. You can also build yet another separate
library, called \fBlibpcre32\fP, in which strings are contained in vectors of
32-bit data units and interpreted either as single-unit characters or UTF-32
strings, by adding
@ -65,8 +95,8 @@ an 8-bit program. None of these are built if you select only the 16-bit or
.SH "BUILDING SHARED AND STATIC LIBRARIES"
.rs
.sp
The PCRE building process uses \fBlibtool\fP to build both shared and static
Unix libraries by default. You can suppress one of these by adding one of
The Autotools PCRE building process uses \fBlibtool\fP to build both shared and
static libraries by default. You can suppress one of these by adding one of
.sp
--disable-shared
--disable-static
@ -515,6 +545,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
Last updated: 30 October 2012
Copyright (c) 1997-2012 University of Cambridge.
Last updated: 12 May 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi

View File

@ -1,4 +1,4 @@
.TH PCRECALLOUT 3 "24 June 2012" "PCRE 8.30"
.TH PCRECALLOUT 3 "12 November 2013" "PCRE 8.34"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@ -41,26 +41,64 @@ it is processed as if it were
(?C255)A(?C255)((?C255)\ed{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
.sp
Notice that there is a callout before and after each parenthesis and
alternation bar. Automatic callouts can be used for tracking the progress of
pattern matching. The
alternation bar. If the pattern contains a conditional group whose condition is
an assertion, an automatic callout is inserted immediately before the
condition. Such a callout may also be inserted explicitly, for example:
.sp
(?(?C9)(?=a)ab|de)
.sp
This applies only to assertion conditions (because they are themselves
independent groups).
.P
Automatic callouts can be used for tracking the progress of pattern matching.
The
.\" HREF
\fBpcretest\fP
.\"
command has an option that sets automatic callouts; when it is used, the output
indicates how the pattern is matched. This is useful information when you are
trying to optimize the performance of a particular pattern.
.P
The use of callouts in a pattern makes it ineligible for optimization by the
just-in-time compiler. Studying such a pattern with the PCRE_STUDY_JIT_COMPILE
option always fails.
program has a pattern qualifier (/C) that sets automatic callouts; when it is
used, the output indicates how the pattern is being matched. This is useful
information when you are trying to optimize the performance of a particular
pattern.
.
.
.SH "MISSING CALLOUTS"
.rs
.sp
You should be aware that, because of optimizations in the way PCRE matches
patterns by default, callouts sometimes do not happen. For example, if the
pattern is
You should be aware that, because of optimizations in the way PCRE compiles and
matches patterns, callouts sometimes do not happen exactly as you might expect.
.P
At compile time, PCRE "auto-possessifies" repeated items when it knows that
what follows cannot be part of the repeat. For example, a+[bc] is compiled as
if it were a++[bc]. The \fBpcretest\fP output when this pattern is anchored and
then applied with automatic callouts to the string "aaaa" is:
.sp
--->aaaa
+0 ^ ^
+1 ^ a+
+3 ^ ^ [bc]
No match
.sp
This indicates that when matching [bc] fails, there is no backtracking into a+
and therefore the callouts that would be taken for the backtracks do not occur.
You can disable the auto-possessify feature by passing PCRE_NO_AUTO_POSSESS
to \fBpcre_compile()\fP, or starting the pattern with (*NO_AUTO_POSSESS). If
this is done in \fBpcretest\fP (using the /O qualifier), the output changes to
this:
.sp
--->aaaa
+0 ^ ^
+1 ^ a+
+3 ^ ^ [bc]
+3 ^ ^ [bc]
+3 ^ ^ [bc]
+3 ^^ [bc]
No match
.sp
This time, when matching [bc] fails, the matcher backtracks into a+ and tries
again, repeatedly, until a+ itself fails.
.P
Other optimizations that provide fast "no match" results also affect callouts.
For example, if the pattern is
.sp
ab(?C4)cd
.sp
@ -84,11 +122,11 @@ callouts such as the example above are obeyed.
.rs
.sp
During matching, when PCRE reaches a callout point, the external function
defined by \fIpcre_callout\fP or \fIpcre[16|32]_callout\fP is called
(if it is set). This applies to both normal and DFA matching. The only
argument to the callout function is a pointer to a \fBpcre_callout\fP
or \fBpcre[16|32]_callout\fP block.
These structures contains the following fields:
defined by \fIpcre_callout\fP or \fIpcre[16|32]_callout\fP is called (if it is
set). This applies to both normal and DFA matching. The only argument to the
callout function is a pointer to a \fBpcre_callout\fP or
\fBpcre[16|32]_callout\fP block. These structures contains the following
fields:
.sp
int \fIversion\fP;
int \fIcallout_number\fP;
@ -119,10 +157,10 @@ automatically generated callouts).
.P
The \fIoffset_vector\fP field is a pointer to the vector of offsets that was
passed by the caller to the matching function. When \fBpcre_exec()\fP or
\fBpcre[16|32]_exec()\fP is used, the contents can be inspected, in order to extract
substrings that have been matched so far, in the same way as for extracting
substrings after a match has completed. For the DFA matching functions, this
field is not useful.
\fBpcre[16|32]_exec()\fP is used, the contents can be inspected, in order to
extract substrings that have been matched so far, in the same way as for
extracting substrings after a match has completed. For the DFA matching
functions, this field is not useful.
.P
The \fIsubject\fP and \fIsubject_length\fP fields contain copies of the values
that were passed to the matching function.
@ -144,8 +182,10 @@ value of \fIcapture_top\fP is one. This is always the case when the DFA
functions are used, because they do not support captured substrings.
.P
The \fIcapture_last\fP field contains the number of the most recently captured
substring. If no substrings have been captured, its value is -1. This is always
the case for the DFA matching functions.
substring. However, when a recursion exits, the value reverts to what it was
outside the recursion, as do the values of all captured substrings. If no
substrings have been captured, the value of \fIcapture_last\fP is -1. This is
always the case for the DFA matching functions.
.P
The \fIcallout_data\fP field contains a value that is passed to a matching
function specifically so that it can be passed back in callouts. It is passed
@ -173,11 +213,12 @@ help in distinguishing between different automatic callouts, which all have the
same callout number. However, they are set for all callouts.
.P
The \fImark\fP field is present from version 2 of the callout structure. In
callouts from \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP it contains a pointer to
the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
(*THEN) item in the match, or NULL if no such items have been passed. Instances
of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
callouts from the DFA matching functions this field always contains NULL.
callouts from \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP it contains a
pointer to the zero-terminated name of the most recently passed (*MARK),
(*PRUNE), or (*THEN) item in the match, or NULL if no such items have been
passed. Instances of (*PRUNE) or (*THEN) without a name do not obliterate a
previous (*MARK). In callouts from the DFA matching functions this field always
contains NULL.
.
.
.SH "RETURN VALUES"
@ -209,6 +250,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
Last updated: 24 June 2012
Copyright (c) 1997-2012 University of Cambridge.
Last updated: 12 November 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi

Some files were not shown because too many files have changed in this diff Show More