Regex: Update PCRE to v8.35.
I was über lazy at first, so took libs from SM. But actually it's quite easy to compile, so let's update to latest version \o/.
This commit is contained in:
parent
d1153b8049
commit
d4de0e6f1e
|
@ -108,9 +108,9 @@ while (<STDIN>)
|
|||
|
||||
# Handling .sp is subtle. If it is inside a literal section, do nothing if
|
||||
# the next line is a non literal text line; similarly, if not inside a
|
||||
# literal section, do nothing if a literal follows. The point being that
|
||||
# the <pre> and </pre> that delimit literal sections will do the spacing.
|
||||
# Always skip if no previous output.
|
||||
# literal section, do nothing if a literal follows, unless we are inside
|
||||
# a .nf/.ne section. The point being that the <pre> and </pre> that delimit
|
||||
# literal sections will do the spacing. Always skip if no previous output.
|
||||
|
||||
elsif (/^\.sp/)
|
||||
{
|
||||
|
@ -123,7 +123,7 @@ while (<STDIN>)
|
|||
}
|
||||
else
|
||||
{
|
||||
print TEMP "<br>\n<br>\n" if (!/^[\s.]/);
|
||||
print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
|
||||
}
|
||||
redo; # Now process the lookahead line we just read
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ Email domain: cam.ac.uk
|
|||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
Copyright (c) 1997-2014 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
|
@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2012 Zoltan Herczeg
|
||||
Copyright(c) 2010-2014 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2012 Zoltan Herczeg
|
||||
Copyright(c) 2009-2014 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
|
|
@ -60,6 +60,11 @@
|
|||
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
|
||||
# 2012-09-08 ChPe added PCRE32 support
|
||||
# 2012-10-23 PH added support for VALGRIND and GCOV
|
||||
# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings
|
||||
# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and
|
||||
# so it has been removed.
|
||||
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
|
||||
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
||||
|
||||
PROJECT(PCRE C CXX)
|
||||
|
||||
|
@ -128,6 +133,9 @@ SET(PCRE_EBCDIC_NL25 OFF CACHE BOOL
|
|||
SET(PCRE_LINK_SIZE "2" CACHE STRING
|
||||
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
|
||||
|
||||
SET(PCRE_PARENS_NEST_LIMIT "250" CACHE STRING
|
||||
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
|
||||
|
||||
SET(PCRE_MATCH_LIMIT "10000000" CACHE STRING
|
||||
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
|
||||
|
||||
|
@ -164,9 +172,6 @@ SET(PCRE_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
|
|||
SET(PCRE_SUPPORT_VALGRIND OFF CACHE BOOL
|
||||
"Enable Valgrind support.")
|
||||
|
||||
SET(PCRE_SUPPORT_COVERAGE OFF CACHE BOOL
|
||||
"Enable code coverage support using gcov.")
|
||||
|
||||
OPTION(PCRE_SHOW_REPORT "Show the final configuration report" ON)
|
||||
OPTION(PCRE_BUILD_PCREGREP "Build pcregrep" ON)
|
||||
OPTION(PCRE_BUILD_TESTS "Build the tests" ON)
|
||||
|
@ -181,6 +186,12 @@ IF (MINGW)
|
|||
OFF)
|
||||
ENDIF(MINGW)
|
||||
|
||||
IF(MSVC)
|
||||
OPTION(INSTALL_MSVC_PDB
|
||||
"ON=Install .pdb files built by MSVC, if generated"
|
||||
OFF)
|
||||
ENDIF(MSVC)
|
||||
|
||||
# bzip2 lib
|
||||
IF(BZIP2_FOUND)
|
||||
OPTION (PCRE_SUPPORT_LIBBZ2 "Enable support for linking pcregrep with libbz2." ON)
|
||||
|
@ -296,13 +307,6 @@ IF(PCRE_SUPPORT_VALGRIND)
|
|||
SET(SUPPORT_VALGRIND 1)
|
||||
ENDIF(PCRE_SUPPORT_VALGRIND)
|
||||
|
||||
IF(PCRE_SUPPORT_COVERAGE)
|
||||
SET(SUPPORT_GCOV 1)
|
||||
IF(NOT CMAKE_COMPILER_IS_GNUCC)
|
||||
MESSAGE(FATAL_ERROR "Code coverage reports can only be generated when using GCC")
|
||||
ENDIF(NOT CMAKE_COMPILER_IS_GNUCC)
|
||||
ENDIF(PCRE_SUPPORT_COVERAGE)
|
||||
|
||||
# This next one used to contain
|
||||
# SET(PCRETEST_LIBS ${READLINE_LIBRARY})
|
||||
# but I was advised to add the NCURSES test as well, along with
|
||||
|
@ -552,6 +556,17 @@ SET(PCREPOSIX_SOURCES
|
|||
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
|
||||
ENDIF(MINGW AND NOT PCRE_STATIC)
|
||||
|
||||
IF(MSVC AND NOT PCRE_STATIC)
|
||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
|
||||
SET(PCRE_SOURCES
|
||||
${PCRE_SOURCES} pcre.rc)
|
||||
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
|
||||
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
|
||||
SET(PCREPOSIX_SOURCES
|
||||
${PCREPOSIX_SOURCES} pcreposix.rc)
|
||||
ENDIF (EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
|
||||
ENDIF(MSVC AND NOT PCRE_STATIC)
|
||||
|
||||
SET(PCRECPP_HEADERS
|
||||
pcrecpp.h
|
||||
pcre_scanner.h
|
||||
|
@ -570,7 +585,7 @@ SET(PCRECPP_SOURCES
|
|||
ADD_DEFINITIONS(-DHAVE_CONFIG_H)
|
||||
|
||||
IF(MSVC)
|
||||
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE)
|
||||
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS)
|
||||
ENDIF(MSVC)
|
||||
|
||||
SET(CMAKE_INCLUDE_CURRENT_DIR 1)
|
||||
|
@ -772,7 +787,7 @@ MESSAGE(\" \")
|
|||
# This is a generated file.
|
||||
srcdir=${PROJECT_SOURCE_DIR}
|
||||
pcretest=${PCRETEST_EXE}
|
||||
source ${PROJECT_SOURCE_DIR}/RunTest
|
||||
. ${PROJECT_SOURCE_DIR}/RunTest
|
||||
if test \"$?\" != \"0\"; then exit 1; fi
|
||||
# End
|
||||
")
|
||||
|
@ -788,7 +803,7 @@ if test \"$?\" != \"0\"; then exit 1; fi
|
|||
srcdir=${PROJECT_SOURCE_DIR}
|
||||
pcregrep=${PCREGREP_EXE}
|
||||
pcretest=${PCRETEST_EXE}
|
||||
source ${PROJECT_SOURCE_DIR}/RunGrepTest
|
||||
. ${PROJECT_SOURCE_DIR}/RunGrepTest
|
||||
if test \"$?\" != \"0\"; then exit 1; fi
|
||||
# End
|
||||
")
|
||||
|
@ -877,6 +892,17 @@ INSTALL(FILES ${man1} DESTINATION man/man1)
|
|||
INSTALL(FILES ${man3} DESTINATION man/man3)
|
||||
INSTALL(FILES ${html} DESTINATION share/doc/pcre/html)
|
||||
|
||||
IF(MSVC AND INSTALL_MSVC_PDB)
|
||||
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre.pdb
|
||||
${PROJECT_BINARY_DIR}/pcreposix.pdb
|
||||
DESTINATION bin
|
||||
CONFIGURATIONS RelWithDebInfo)
|
||||
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcred.pdb
|
||||
${PROJECT_BINARY_DIR}/pcreposixd.pdb
|
||||
DESTINATION bin
|
||||
CONFIGURATIONS Debug)
|
||||
ENDIF(MSVC AND INSTALL_MSVC_PDB)
|
||||
|
||||
# help, only for nice output
|
||||
IF(BUILD_SHARED_LIBS)
|
||||
SET(BUILD_STATIC_LIBS OFF)
|
||||
|
@ -917,6 +943,7 @@ IF(PCRE_SHOW_REPORT)
|
|||
MESSAGE(STATUS " No stack recursion .............. : ${PCRE_NO_RECURSE}")
|
||||
MESSAGE(STATUS " POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}")
|
||||
MESSAGE(STATUS " Internal link size .............. : ${PCRE_LINK_SIZE}")
|
||||
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE_PARENS_NEST_LIMIT}")
|
||||
MESSAGE(STATUS " Match limit ..................... : ${PCRE_MATCH_LIMIT}")
|
||||
MESSAGE(STATUS " Match limit recursion ........... : ${PCRE_MATCH_LIMIT_RECURSION}")
|
||||
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
||||
|
@ -953,6 +980,11 @@ IF(PCRE_SHOW_REPORT)
|
|||
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
|
||||
MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")
|
||||
ENDIF(MINGW AND NOT PCRE_STATIC)
|
||||
|
||||
IF(MSVC)
|
||||
MESSAGE(STATUS " Install MSVC .pdb files ..........: ${INSTALL_MSVC_PDB}")
|
||||
ENDIF(MSVC)
|
||||
|
||||
MESSAGE(STATUS "")
|
||||
ENDIF(PCRE_SHOW_REPORT)
|
||||
|
||||
|
|
|
@ -1,6 +1,501 @@
|
|||
ChangeLog for PCRE
|
||||
------------------
|
||||
|
||||
Version 8.35 04-April-2014
|
||||
--------------------------
|
||||
|
||||
1. A new flag is set, when property checks are present in an XCLASS.
|
||||
When this flag is not set, PCRE can perform certain optimizations
|
||||
such as studying these XCLASS-es.
|
||||
|
||||
2. The auto-possessification of character sets were improved: a normal
|
||||
and an extended character set can be compared now. Furthermore
|
||||
the JIT compiler optimizes more character set checks.
|
||||
|
||||
3. Got rid of some compiler warnings for potentially uninitialized variables
|
||||
that show up only when compiled with -O2.
|
||||
|
||||
4. A pattern such as (?=ab\K) that uses \K in an assertion can set the start
|
||||
of a match later then the end of the match. The pcretest program was not
|
||||
handling the case sensibly - it was outputting from the start to the next
|
||||
binary zero. It now reports this situation in a message, and outputs the
|
||||
text from the end to the start.
|
||||
|
||||
5. Fast forward search is improved in JIT. Instead of the first three
|
||||
characters, any three characters with fixed position can be searched.
|
||||
Search order: first, last, middle.
|
||||
|
||||
6. Improve character range checks in JIT. Characters are read by an inprecise
|
||||
function now, which returns with an unknown value if the character code is
|
||||
above a certain treshold (e.g: 256). The only limitation is that the value
|
||||
must be bigger than the treshold as well. This function is useful, when
|
||||
the characters above the treshold are handled in the same way.
|
||||
|
||||
7. The macros whose names start with RAWUCHAR are placeholders for a future
|
||||
mode in which only the bottom 21 bits of 32-bit data items are used. To
|
||||
make this more memorable for those maintaining the code, the names have
|
||||
been changed to start with UCHAR21, and an extensive comment has been added
|
||||
to their definition.
|
||||
|
||||
8. Add missing (new) files sljitNativeTILEGX.c and sljitNativeTILEGX-encoder.c
|
||||
to the export list in Makefile.am (they were accidentally omitted from the
|
||||
8.34 tarball).
|
||||
|
||||
9. The informational output from pcretest used the phrase "starting byte set"
|
||||
which is inappropriate for the 16-bit and 32-bit libraries. As the output
|
||||
for "first char" and "need char" really means "non-UTF-char", I've changed
|
||||
"byte" to "char", and slightly reworded the output. The documentation about
|
||||
these values has also been (I hope) clarified.
|
||||
|
||||
10. Another JIT related optimization: use table jumps for selecting the correct
|
||||
backtracking path, when more than four alternatives are present inside a
|
||||
bracket.
|
||||
|
||||
11. Empty match is not possible, when the minimum length is greater than zero,
|
||||
and there is no \K in the pattern. JIT should avoid empty match checks in
|
||||
such cases.
|
||||
|
||||
12. In a caseless character class with UCP support, when a character with more
|
||||
than one alternative case was not the first character of a range, not all
|
||||
the alternative cases were added to the class. For example, s and \x{17f}
|
||||
are both alternative cases for S: the class [RST] was handled correctly,
|
||||
but [R-T] was not.
|
||||
|
||||
13. The configure.ac file always checked for pthread support when JIT was
|
||||
enabled. This is not used in Windows, so I have put this test inside a
|
||||
check for the presence of windows.h (which was already tested for).
|
||||
|
||||
14. Improve pattern prefix search by a simplified Boyer-Moore algorithm in JIT.
|
||||
The algorithm provides a way to skip certain starting offsets, and usually
|
||||
faster than linear prefix searches.
|
||||
|
||||
15. Change 13 for 8.20 updated RunTest to check for the 'fr' locale as well
|
||||
as for 'fr_FR' and 'french'. For some reason, however, it then used the
|
||||
Windows-specific input and output files, which have 'french' screwed in.
|
||||
So this could never have worked. One of the problems with locales is that
|
||||
they aren't always the same. I have now updated RunTest so that it checks
|
||||
the output of the locale test (test 3) against three different output
|
||||
files, and it allows the test to pass if any one of them matches. With luck
|
||||
this should make the test pass on some versions of Solaris where it was
|
||||
failing. Because of the uncertainty, the script did not used to stop if
|
||||
test 3 failed; it now does. If further versions of a French locale ever
|
||||
come to light, they can now easily be added.
|
||||
|
||||
16. If --with-pcregrep-bufsize was given a non-integer value such as "50K",
|
||||
there was a message during ./configure, but it did not stop. This now
|
||||
provokes an error. The invalid example in README has been corrected.
|
||||
If a value less than the minimum is given, the minimum value has always
|
||||
been used, but now a warning is given.
|
||||
|
||||
17. If --enable-bsr-anycrlf was set, the special 16/32-bit test failed. This
|
||||
was a bug in the test system, which is now fixed. Also, the list of various
|
||||
configurations that are tested for each release did not have one with both
|
||||
16/32 bits and --enable-bar-anycrlf. It now does.
|
||||
|
||||
18. pcretest was missing "-C bsr" for displaying the \R default setting.
|
||||
|
||||
19. Little endian PowerPC systems are supported now by the JIT compiler.
|
||||
|
||||
20. The fast forward newline mechanism could enter to an infinite loop on
|
||||
certain invalid UTF-8 input. Although we don't support these cases
|
||||
this issue can be fixed by a performance optimization.
|
||||
|
||||
21. Change 33 of 8.34 is not sufficient to ensure stack safety because it does
|
||||
not take account if existing stack usage. There is now a new global
|
||||
variable called pcre_stack_guard that can be set to point to an external
|
||||
function to check stack availability. It is called at the start of
|
||||
processing every parenthesized group.
|
||||
|
||||
22. A typo in the code meant that in ungreedy mode the max/min qualifier
|
||||
behaved like a min-possessive qualifier, and, for example, /a{1,3}b/U did
|
||||
not match "ab".
|
||||
|
||||
23. When UTF was disabled, the JIT program reported some incorrect compile
|
||||
errors. These messages are silenced now.
|
||||
|
||||
24. Experimental support for ARM-64 and MIPS-64 has been added to the JIT
|
||||
compiler.
|
||||
|
||||
25. Change all the temporary files used in RunGrepTest to be different to those
|
||||
used by RunTest so that the tests can be run simultaneously, for example by
|
||||
"make -j check".
|
||||
|
||||
|
||||
Version 8.34 15-December-2013
|
||||
-----------------------------
|
||||
|
||||
1. Add pcre[16|32]_jit_free_unused_memory to forcibly free unused JIT
|
||||
executable memory. Patch inspired by Carsten Klein.
|
||||
|
||||
2. ./configure --enable-coverage defined SUPPORT_GCOV in config.h, although
|
||||
this macro is never tested and has no effect, because the work to support
|
||||
coverage involves only compiling and linking options and special targets in
|
||||
the Makefile. The comment in config.h implied that defining the macro would
|
||||
enable coverage support, which is totally false. There was also support for
|
||||
setting this macro in the CMake files (my fault, I just copied it from
|
||||
configure). SUPPORT_GCOV has now been removed.
|
||||
|
||||
3. Make a small performance improvement in strlen16() and strlen32() in
|
||||
pcretest.
|
||||
|
||||
4. Change 36 for 8.33 left some unreachable statements in pcre_exec.c,
|
||||
detected by the Solaris compiler (gcc doesn't seem to be able to diagnose
|
||||
these cases). There was also one in pcretest.c.
|
||||
|
||||
5. Cleaned up a "may be uninitialized" compiler warning in pcre_exec.c.
|
||||
|
||||
6. In UTF mode, the code for checking whether a group could match an empty
|
||||
string (which is used for indefinitely repeated groups to allow for
|
||||
breaking an infinite loop) was broken when the group contained a repeated
|
||||
negated single-character class with a character that occupied more than one
|
||||
data item and had a minimum repetition of zero (for example, [^\x{100}]* in
|
||||
UTF-8 mode). The effect was undefined: the group might or might not be
|
||||
deemed as matching an empty string, or the program might have crashed.
|
||||
|
||||
7. The code for checking whether a group could match an empty string was not
|
||||
recognizing that \h, \H, \v, \V, and \R must match a character.
|
||||
|
||||
8. Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
|
||||
an empty string. If it can, pcretest shows this in its information output.
|
||||
|
||||
9. Fixed two related bugs that applied to Unicode extended grapheme clusters
|
||||
that were repeated with a maximizing qualifier (e.g. \X* or \X{2,5}) when
|
||||
matched by pcre_exec() without using JIT:
|
||||
|
||||
(a) If the rest of the pattern did not match after a maximal run of
|
||||
grapheme clusters, the code for backing up to try with fewer of them
|
||||
did not always back up over a full grapheme when characters that do not
|
||||
have the modifier quality were involved, e.g. Hangul syllables.
|
||||
|
||||
(b) If the match point in a subject started with modifier character, and
|
||||
there was no match, the code could incorrectly back up beyond the match
|
||||
point, and potentially beyond the first character in the subject,
|
||||
leading to a segfault or an incorrect match result.
|
||||
|
||||
10. A conditional group with an assertion condition could lead to PCRE
|
||||
recording an incorrect first data item for a match if no other first data
|
||||
item was recorded. For example, the pattern (?(?=ab)ab) recorded "a" as a
|
||||
first data item, and therefore matched "ca" after "c" instead of at the
|
||||
start.
|
||||
|
||||
11. Change 40 for 8.33 (allowing pcregrep to find empty strings) showed up a
|
||||
bug that caused the command "echo a | ./pcregrep -M '|a'" to loop.
|
||||
|
||||
12. The source of pcregrep now includes z/OS-specific code so that it can be
|
||||
compiled for z/OS as part of the special z/OS distribution.
|
||||
|
||||
13. Added the -T and -TM options to pcretest.
|
||||
|
||||
14. The code in pcre_compile.c for creating the table of named capturing groups
|
||||
has been refactored. Instead of creating the table dynamically during the
|
||||
actual compiling pass, the information is remembered during the pre-compile
|
||||
pass (on the stack unless there are more than 20 named groups, in which
|
||||
case malloc() is used) and the whole table is created before the actual
|
||||
compile happens. This has simplified the code (it is now nearly 150 lines
|
||||
shorter) and prepared the way for better handling of references to groups
|
||||
with duplicate names.
|
||||
|
||||
15. A back reference to a named subpattern when there is more than one of the
|
||||
same name now checks them in the order in which they appear in the pattern.
|
||||
The first one that is set is used for the reference. Previously only the
|
||||
first one was inspected. This change makes PCRE more compatible with Perl.
|
||||
|
||||
16. Unicode character properties were updated from Unicode 6.3.0.
|
||||
|
||||
17. The compile-time code for auto-possessification has been refactored, based
|
||||
on a patch by Zoltan Herczeg. It now happens after instead of during
|
||||
compilation. The code is cleaner, and more cases are handled. The option
|
||||
PCRE_NO_AUTO_POSSESS is added for testing purposes, and the -O and /O
|
||||
options in pcretest are provided to set it. It can also be set by
|
||||
(*NO_AUTO_POSSESS) at the start of a pattern.
|
||||
|
||||
18. The character VT has been added to the default ("C" locale) set of
|
||||
characters that match \s and are generally treated as white space,
|
||||
following this same change in Perl 5.18. There is now no difference between
|
||||
"Perl space" and "POSIX space". Whether VT is treated as white space in
|
||||
other locales depends on the locale.
|
||||
|
||||
19. The code for checking named groups as conditions, either for being set or
|
||||
for being recursed, has been refactored (this is related to 14 and 15
|
||||
above). Processing unduplicated named groups should now be as fast at
|
||||
numerical groups, and processing duplicated groups should be faster than
|
||||
before.
|
||||
|
||||
20. Two patches to the CMake build system, by Alexander Barkov:
|
||||
|
||||
(1) Replace the "source" command by "." in CMakeLists.txt because
|
||||
"source" is a bash-ism.
|
||||
|
||||
(2) Add missing HAVE_STDINT_H and HAVE_INTTYPES_H to config-cmake.h.in;
|
||||
without these the CMake build does not work on Solaris.
|
||||
|
||||
21. Perl has changed its handling of \8 and \9. If there is no previously
|
||||
encountered capturing group of those numbers, they are treated as the
|
||||
literal characters 8 and 9 instead of a binary zero followed by the
|
||||
literals. PCRE now does the same.
|
||||
|
||||
22. Following Perl, added \o{} to specify codepoints in octal, making it
|
||||
possible to specify values greater than 0777 and also making them
|
||||
unambiguous.
|
||||
|
||||
23. Perl now gives an error for missing closing braces after \x{... instead of
|
||||
treating the string as literal. PCRE now does the same.
|
||||
|
||||
24. RunTest used to grumble if an inappropriate test was selected explicitly,
|
||||
but just skip it when running all tests. This make it awkward to run ranges
|
||||
of tests when one of them was inappropriate. Now it just skips any
|
||||
inappropriate tests, as it always did when running all tests.
|
||||
|
||||
25. If PCRE_AUTO_CALLOUT and PCRE_UCP were set for a pattern that contained
|
||||
character types such as \d or \w, too many callouts were inserted, and the
|
||||
data that they returned was rubbish.
|
||||
|
||||
26. In UCP mode, \s was not matching two of the characters that Perl matches,
|
||||
namely NEL (U+0085) and MONGOLIAN VOWEL SEPARATOR (U+180E), though they
|
||||
were matched by \h. The code has now been refactored so that the lists of
|
||||
the horizontal and vertical whitespace characters used for \h and \v (which
|
||||
are defined only in one place) are now also used for \s.
|
||||
|
||||
27. Add JIT support for the 64 bit TileGX architecture.
|
||||
Patch by Jiong Wang (Tilera Corporation).
|
||||
|
||||
28. Possessive quantifiers for classes (both explicit and automatically
|
||||
generated) now use special opcodes instead of wrapping in ONCE brackets.
|
||||
|
||||
29. Whereas an item such as A{4}+ ignored the possessivenes of the quantifier
|
||||
(because it's meaningless), this was not happening when PCRE_CASELESS was
|
||||
set. Not wrong, but inefficient.
|
||||
|
||||
30. Updated perltest.pl to add /u (force Unicode mode) when /W (use Unicode
|
||||
properties for \w, \d, etc) is present in a test regex. Otherwise if the
|
||||
test contains no characters greater than 255, Perl doesn't realise it
|
||||
should be using Unicode semantics.
|
||||
|
||||
31. Upgraded the handling of the POSIX classes [:graph:], [:print:], and
|
||||
[:punct:] when PCRE_UCP is set so as to include the same characters as Perl
|
||||
does in Unicode mode.
|
||||
|
||||
32. Added the "forbid" facility to pcretest so that putting tests into the
|
||||
wrong test files can sometimes be quickly detected.
|
||||
|
||||
33. There is now a limit (default 250) on the depth of nesting of parentheses.
|
||||
This limit is imposed to control the amount of system stack used at compile
|
||||
time. It can be changed at build time by --with-parens-nest-limit=xxx or
|
||||
the equivalent in CMake.
|
||||
|
||||
34. Character classes such as [A-\d] or [a-[:digit:]] now cause compile-time
|
||||
errors. Perl warns for these when in warning mode, but PCRE has no facility
|
||||
for giving warnings.
|
||||
|
||||
35. Change 34 for 8.13 allowed quantifiers on assertions, because Perl does.
|
||||
However, this was not working for (?!) because it is optimized to (*FAIL),
|
||||
for which PCRE does not allow quantifiers. The optimization is now disabled
|
||||
when a quantifier follows (?!). I can't see any use for this, but it makes
|
||||
things uniform.
|
||||
|
||||
36. Perl no longer allows group names to start with digits, so I have made this
|
||||
change also in PCRE. It simplifies the code a bit.
|
||||
|
||||
37. In extended mode, Perl ignores spaces before a + that indicates a
|
||||
possessive quantifier. PCRE allowed a space before the quantifier, but not
|
||||
before the possessive +. It now does.
|
||||
|
||||
38. The use of \K (reset reported match start) within a repeated possessive
|
||||
group such as (a\Kb)*+ was not working.
|
||||
|
||||
40. Document that the same character tables must be used at compile time and
|
||||
run time, and that the facility to pass tables to pcre_exec() and
|
||||
pcre_dfa_exec() is for use only with saved/restored patterns.
|
||||
|
||||
41. Applied Jeff Trawick's patch CMakeLists.txt, which "provides two new
|
||||
features for Builds with MSVC:
|
||||
|
||||
1. Support pcre.rc and/or pcreposix.rc (as is already done for MinGW
|
||||
builds). The .rc files can be used to set FileDescription and many other
|
||||
attributes.
|
||||
|
||||
2. Add an option (-DINSTALL_MSVC_PDB) to enable installation of .pdb files.
|
||||
This allows higher-level build scripts which want .pdb files to avoid
|
||||
hard-coding the exact files needed."
|
||||
|
||||
42. Added support for [[:<:]] and [[:>:]] as used in the BSD POSIX library to
|
||||
mean "start of word" and "end of word", respectively, as a transition aid.
|
||||
|
||||
43. A minimizing repeat of a class containing codepoints greater than 255 in
|
||||
non-UTF 16-bit or 32-bit modes caused an internal error when PCRE was
|
||||
compiled to use the heap for recursion.
|
||||
|
||||
44. Got rid of some compiler warnings for unused variables when UTF but not UCP
|
||||
is configured.
|
||||
|
||||
|
||||
Version 8.33 28-May-2013
|
||||
------------------------
|
||||
|
||||
1. Added 'U' to some constants that are compared to unsigned integers, to
|
||||
avoid compiler signed/unsigned warnings. Added (int) casts to unsigned
|
||||
variables that are added to signed variables, to ensure the result is
|
||||
signed and can be negated.
|
||||
|
||||
2. Applied patch by Daniel Richard G for quashing MSVC warnings to the
|
||||
CMake config files.
|
||||
|
||||
3. Revise the creation of config.h.generic so that all boolean macros are
|
||||
#undefined, whereas non-boolean macros are #ifndef/#endif-ed. This makes
|
||||
overriding via -D on the command line possible.
|
||||
|
||||
4. Changing the definition of the variable "op" in pcre_exec.c from pcre_uchar
|
||||
to unsigned int is reported to make a quite noticeable speed difference in
|
||||
a specific Windows environment. Testing on Linux did also appear to show
|
||||
some benefit (and it is clearly not harmful). Also fixed the definition of
|
||||
Xop which should be unsigned.
|
||||
|
||||
5. Related to (4), changing the definition of the intermediate variable cc
|
||||
in repeated character loops from pcre_uchar to pcre_uint32 also gave speed
|
||||
improvements.
|
||||
|
||||
6. Fix forward search in JIT when link size is 3 or greater. Also removed some
|
||||
unnecessary spaces.
|
||||
|
||||
7. Adjust autogen.sh and configure.ac to lose warnings given by automake 1.12
|
||||
and later.
|
||||
|
||||
8. Fix two buffer over read issues in 16 and 32 bit modes. Affects JIT only.
|
||||
|
||||
9. Optimizing fast_forward_start_bits in JIT.
|
||||
|
||||
10. Adding support for callouts in JIT, and fixing some issues revealed
|
||||
during this work. Namely:
|
||||
|
||||
(a) Unoptimized capturing brackets incorrectly reset on backtrack.
|
||||
|
||||
(b) Minimum length was not checked before the matching is started.
|
||||
|
||||
11. The value of capture_last that is passed to callouts was incorrect in some
|
||||
cases when there was a capture on one path that was subsequently abandoned
|
||||
after a backtrack. Also, the capture_last value is now reset after a
|
||||
recursion, since all captures are also reset in this case.
|
||||
|
||||
12. The interpreter no longer returns the "too many substrings" error in the
|
||||
case when an overflowing capture is in a branch that is subsequently
|
||||
abandoned after a backtrack.
|
||||
|
||||
13. In the pathological case when an offset vector of size 2 is used, pcretest
|
||||
now prints out the matched string after a yield of 0 or 1.
|
||||
|
||||
14. Inlining subpatterns in recursions, when certain conditions are fulfilled.
|
||||
Only supported by the JIT compiler at the moment.
|
||||
|
||||
15. JIT compiler now supports 32 bit Macs thanks to Lawrence Velazquez.
|
||||
|
||||
16. Partial matches now set offsets[2] to the "bumpalong" value, that is, the
|
||||
offset of the starting point of the matching process, provided the offsets
|
||||
vector is large enough.
|
||||
|
||||
17. The \A escape now records a lookbehind value of 1, though its execution
|
||||
does not actually inspect the previous character. This is to ensure that,
|
||||
in partial multi-segment matching, at least one character from the old
|
||||
segment is retained when a new segment is processed. Otherwise, if there
|
||||
are no lookbehinds in the pattern, \A might match incorrectly at the start
|
||||
of a new segment.
|
||||
|
||||
18. Added some #ifdef __VMS code into pcretest.c to help VMS implementations.
|
||||
|
||||
19. Redefined some pcre_uchar variables in pcre_exec.c as pcre_uint32; this
|
||||
gives some modest performance improvement in 8-bit mode.
|
||||
|
||||
20. Added the PCRE-specific property \p{Xuc} for matching characters that can
|
||||
be expressed in certain programming languages using Universal Character
|
||||
Names.
|
||||
|
||||
21. Unicode validation has been updated in the light of Unicode Corrigendum #9,
|
||||
which points out that "non characters" are not "characters that may not
|
||||
appear in Unicode strings" but rather "characters that are reserved for
|
||||
internal use and have only local meaning".
|
||||
|
||||
22. When a pattern was compiled with automatic callouts (PCRE_AUTO_CALLOUT) and
|
||||
there was a conditional group that depended on an assertion, if the
|
||||
assertion was false, the callout that immediately followed the alternation
|
||||
in the condition was skipped when pcre_exec() was used for matching.
|
||||
|
||||
23. Allow an explicit callout to be inserted before an assertion that is the
|
||||
condition for a conditional group, for compatibility with automatic
|
||||
callouts, which always insert a callout at this point.
|
||||
|
||||
24. In 8.31, (*COMMIT) was confined to within a recursive subpattern. Perl also
|
||||
confines (*SKIP) and (*PRUNE) in the same way, and this has now been done.
|
||||
|
||||
25. (*PRUNE) is now supported by the JIT compiler.
|
||||
|
||||
26. Fix infinite loop when /(?<=(*SKIP)ac)a/ is matched against aa.
|
||||
|
||||
27. Fix the case where there are two or more SKIPs with arguments that may be
|
||||
ignored.
|
||||
|
||||
28. (*SKIP) is now supported by the JIT compiler.
|
||||
|
||||
29. (*THEN) is now supported by the JIT compiler.
|
||||
|
||||
30. Update RunTest with additional test selector options.
|
||||
|
||||
31. The way PCRE handles backtracking verbs has been changed in two ways.
|
||||
|
||||
(1) Previously, in something like (*COMMIT)(*SKIP), COMMIT would override
|
||||
SKIP. Now, PCRE acts on whichever backtracking verb is reached first by
|
||||
backtracking. In some cases this makes it more Perl-compatible, but Perl's
|
||||
rather obscure rules do not always do the same thing.
|
||||
|
||||
(2) Previously, backtracking verbs were confined within assertions. This is
|
||||
no longer the case for positive assertions, except for (*ACCEPT). Again,
|
||||
this sometimes improves Perl compatibility, and sometimes does not.
|
||||
|
||||
32. A number of tests that were in test 2 because Perl did things differently
|
||||
have been moved to test 1, because either Perl or PCRE has changed, and
|
||||
these tests are now compatible.
|
||||
|
||||
32. Backtracking control verbs are now handled in the same way in JIT and
|
||||
interpreter.
|
||||
|
||||
33. An opening parenthesis in a MARK/PRUNE/SKIP/THEN name in a pattern that
|
||||
contained a forward subroutine reference caused a compile error.
|
||||
|
||||
34. Auto-detect and optimize limited repetitions in JIT.
|
||||
|
||||
35. Implement PCRE_NEVER_UTF to lock out the use of UTF, in particular,
|
||||
blocking (*UTF) etc.
|
||||
|
||||
36. In the interpreter, maximizing pattern repetitions for characters and
|
||||
character types now use tail recursion, which reduces stack usage.
|
||||
|
||||
37. The value of the max lookbehind was not correctly preserved if a compiled
|
||||
and saved regex was reloaded on a host of different endianness.
|
||||
|
||||
38. Implemented (*LIMIT_MATCH) and (*LIMIT_RECURSION). As part of the extension
|
||||
of the compiled pattern block, expand the flags field from 16 to 32 bits
|
||||
because it was almost full.
|
||||
|
||||
39. Try madvise first before posix_madvise.
|
||||
|
||||
40. Change 7 for PCRE 7.9 made it impossible for pcregrep to find empty lines
|
||||
with a pattern such as ^$. It has taken 4 years for anybody to notice! The
|
||||
original change locked out all matches of empty strings. This has been
|
||||
changed so that one match of an empty string per line is recognized.
|
||||
Subsequent searches on the same line (for colouring or for --only-matching,
|
||||
for example) do not recognize empty strings.
|
||||
|
||||
41. Applied a user patch to fix a number of spelling mistakes in comments.
|
||||
|
||||
42. Data lines longer than 65536 caused pcretest to crash.
|
||||
|
||||
43. Clarified the data type for length and startoffset arguments for pcre_exec
|
||||
and pcre_dfa_exec in the function-specific man pages, where they were
|
||||
explicitly stated to be in bytes, never having been updated. I also added
|
||||
some clarification to the pcreapi man page.
|
||||
|
||||
44. A call to pcre_dfa_exec() with an output vector size less than 2 caused
|
||||
a segmentation fault.
|
||||
|
||||
|
||||
Version 8.32 30-November-2012
|
||||
-----------------------------
|
||||
|
||||
|
@ -1508,7 +2003,8 @@ Version 7.9 11-Apr-09
|
|||
7. A pattern that could match an empty string could cause pcregrep to loop; it
|
||||
doesn't make sense to accept an empty string match in pcregrep, so I have
|
||||
locked it out (using PCRE's PCRE_NOTEMPTY option). By experiment, this
|
||||
seems to be how GNU grep behaves.
|
||||
seems to be how GNU grep behaves. [But see later change 40 for release
|
||||
8.33.]
|
||||
|
||||
8. The pattern (?(?=.*b)b|^) was incorrectly compiled as "match must be at
|
||||
start or after a newline", because the conditional assertion was not being
|
||||
|
@ -1751,7 +2247,7 @@ Version 7.7 07-May-08
|
|||
containing () gave an internal compiling error instead of "reference to
|
||||
non-existent subpattern". Fortunately, when the pattern did exist, the
|
||||
compiled code was correct. (When scanning forwards to check for the
|
||||
existencd of the subpattern, it was treating the data ']' as terminating
|
||||
existence of the subpattern, it was treating the data ']' as terminating
|
||||
the class, so got the count wrong. When actually compiling, the reference
|
||||
was subsequently set up correctly.)
|
||||
|
||||
|
|
|
@ -29,9 +29,9 @@ while (scalar(@ARGV) > 0)
|
|||
^\.TH\s\S|
|
||||
^\.SH\s\S|
|
||||
^\.SS\s\S|
|
||||
^\.TP(?:\s\d+)?\s*$|
|
||||
^\.ti\s\S|
|
||||
^\.TP(?:\s?\d+)?\s*$|
|
||||
^\.SM\s*$|
|
||||
^\.br\s*$|
|
||||
^\.rs\s*$|
|
||||
^\.sp\s*$|
|
||||
^\.nf\s*$|
|
||||
|
|
|
@ -54,12 +54,12 @@ Support for 16-bit and 32-bit data strings
|
|||
|
||||
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
|
||||
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
|
||||
in any combination of 8-bit, 16-bit or 32-bit modes, creating different
|
||||
libraries. In the description that follows, the word "short" is
|
||||
used for a 16-bit data quantity, and the word "unit" is used for a quantity
|
||||
that is a byte in 8-bit mode, a short in 16-bit mode and a 32-bit unsigned
|
||||
integer in 32-bit mode. However, so as not to over-complicate the text, the
|
||||
names of PCRE functions are given in 8-bit form only.
|
||||
in any combination of 8-bit, 16-bit or 32-bit modes, creating up to three
|
||||
different libraries. In the description that follows, the word "short" is used
|
||||
for a 16-bit data quantity, and the word "unit" is used for a quantity that is
|
||||
a byte in 8-bit mode, a short in 16-bit mode and a 32-bit word in 32-bit mode.
|
||||
However, so as not to over-complicate the text, the names of PCRE functions are
|
||||
given in 8-bit form only.
|
||||
|
||||
|
||||
Computing the memory requirement: how it was
|
||||
|
@ -94,6 +94,11 @@ runs more slowly than before (30% or more, depending on the pattern) because it
|
|||
is doing a full analysis of the pattern. My hope was that this would not be a
|
||||
big issue, and in the event, nobody has commented on it.
|
||||
|
||||
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
|
||||
(default 250, settable at build time) so as to put a limit on the amount of
|
||||
system stack used by pcre_compile(). This is a safety feature for environments
|
||||
with small stacks where the patterns are provided by users.
|
||||
|
||||
|
||||
Traditional matching function
|
||||
-----------------------------
|
||||
|
@ -122,27 +127,28 @@ same way. See the user documentation for details.
|
|||
The algorithm that is used for pcre_dfa_exec() is not a traditional FSM,
|
||||
because it may have a number of states active at one time. More work would be
|
||||
needed at compile time to produce a traditional FSM where only one state is
|
||||
ever active at once. I believe some other regex matchers work this way.
|
||||
ever active at once. I believe some other regex matchers work this way. JIT
|
||||
support is not available for this kind of matching.
|
||||
|
||||
|
||||
Changeable options
|
||||
------------------
|
||||
|
||||
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL) may
|
||||
change in the middle of patterns. From PCRE 8.13, their processing is handled
|
||||
entirely at compile time by generating different opcodes for the different
|
||||
settings. The runtime functions do not need to keep track of an options state
|
||||
any more.
|
||||
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and some
|
||||
others) may change in the middle of patterns. From PCRE 8.13, their processing
|
||||
is handled entirely at compile time by generating different opcodes for the
|
||||
different settings. The runtime functions do not need to keep track of an
|
||||
options state any more.
|
||||
|
||||
|
||||
Format of compiled patterns
|
||||
---------------------------
|
||||
|
||||
The compiled form of a pattern is a vector of units (bytes in 8-bit mode, or
|
||||
shorts in 16-bit mode, 32-bit unsigned integers in 32-bit mode), containing
|
||||
items of variable length. The first unit in an item contains an opcode, and
|
||||
the length of the item is either implicit in the opcode or contained in the
|
||||
data that follows it.
|
||||
The compiled form of a pattern is a vector of unsigned units (bytes in 8-bit
|
||||
mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing items of
|
||||
variable length. The first unit in an item contains an opcode, and the length
|
||||
of the item is either implicit in the opcode or contained in the data that
|
||||
follows it.
|
||||
|
||||
In many cases listed below, LINK_SIZE data values are specified for offsets
|
||||
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
|
||||
|
@ -151,8 +157,10 @@ default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or
|
|||
LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
|
||||
larger than 2 is necessary only when patterns whose compiled length is greater
|
||||
than 64K are going to be processed. In this description, we assume the "normal"
|
||||
compilation options. Data values that are counts (e.g. for quantifiers) are
|
||||
always just two bytes long (one short in 16-bit mode).
|
||||
compilation options. Data values that are counts (e.g. quantifiers) are two
|
||||
bytes long in 8-bit mode (most significant byte first), or one unit in 16-bit
|
||||
and 32-bit modes.
|
||||
|
||||
|
||||
Opcodes with no following data
|
||||
------------------------------
|
||||
|
@ -162,7 +170,7 @@ These items are all just one unit long
|
|||
OP_END end of pattern
|
||||
OP_ANY match any one character other than newline
|
||||
OP_ALLANY match any one character, including newline
|
||||
OP_ANYBYTE match any single byte, even in UTF-8 mode
|
||||
OP_ANYBYTE match any single unit, even in UTF-8/16 mode
|
||||
OP_SOD match start of data: \A
|
||||
OP_SOM, start of match (subject + offset): \G
|
||||
OP_SET_SOM, set start of match (\K)
|
||||
|
@ -180,28 +188,33 @@ These items are all just one unit long
|
|||
OP_VSPACE \v
|
||||
OP_NOT_WORDCHAR \W
|
||||
OP_WORDCHAR \w
|
||||
OP_EODN match end of data or \n at end: \Z
|
||||
OP_EODN match end of data or newline at end: \Z
|
||||
OP_EOD match end of data: \z
|
||||
OP_DOLL $ (end of data, or before final newline)
|
||||
OP_DOLLM $ multiline mode (end of data or before newline)
|
||||
OP_EXTUNI match an extended Unicode character
|
||||
OP_EXTUNI match an extended Unicode grapheme cluster
|
||||
OP_ANYNL match any Unicode newline sequence
|
||||
|
||||
OP_ASSERT_ACCEPT )
|
||||
OP_ACCEPT ) These are Perl 5.10's "backtracking control
|
||||
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
|
||||
OP_FAIL ) parentheses, it may be preceded by one or more
|
||||
OP_PRUNE ) OP_CLOSE, followed by a 2-byte number,
|
||||
OP_SKIP ) indicating which parentheses must be closed.
|
||||
OP_PRUNE ) OP_CLOSE, each followed by a count that
|
||||
OP_SKIP ) indicates which parentheses must be closed.
|
||||
OP_THEN )
|
||||
|
||||
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
|
||||
This ends the assertion, not the entire pattern match.
|
||||
|
||||
|
||||
Backtracking control verbs with (optional) data
|
||||
-----------------------------------------------
|
||||
Backtracking control verbs with optional data
|
||||
---------------------------------------------
|
||||
|
||||
(*THEN) without an argument generates the opcode OP_THEN and no following data.
|
||||
OP_MARK is followed by the mark name, preceded by a one-unit length, and
|
||||
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
|
||||
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
|
||||
following in the same format.
|
||||
following in the same format as OP_MARK.
|
||||
|
||||
|
||||
Matching literal characters
|
||||
|
@ -212,6 +225,10 @@ casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
|
|||
the character may be more than one unit long. In UTF-32 mode, characters
|
||||
are always exactly one unit long.
|
||||
|
||||
If there is only one character in a character class, OP_CHAR or OP_CHARI is
|
||||
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
|
||||
for something like [^a]).
|
||||
|
||||
|
||||
Repeating single characters
|
||||
---------------------------
|
||||
|
@ -232,10 +249,9 @@ following opcodes, which come in caseful and caseless versions:
|
|||
|
||||
Each opcode is followed by the character that is to be repeated. In ASCII mode,
|
||||
these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
|
||||
UTF-32 mode these are one-unit items.
|
||||
Those with "MIN" in their names are the minimizing versions. Those with "POS"
|
||||
in their names are possessive versions. Other repeats make use of these
|
||||
opcodes:
|
||||
UTF-32 mode these are one-unit items. Those with "MIN" in their names are the
|
||||
minimizing versions. Those with "POS" in their names are possessive versions.
|
||||
Other repeats make use of these opcodes:
|
||||
|
||||
Caseful Caseless
|
||||
OP_UPTO OP_UPTOI
|
||||
|
@ -243,10 +259,15 @@ opcodes:
|
|||
OP_POSUPTO OP_POSUPTOI
|
||||
OP_EXACT OP_EXACTI
|
||||
|
||||
Each of these is followed by a two-byte (one short) count (most significant
|
||||
byte first in 8-bit mode) and then the repeated character. OP_UPTO matches from
|
||||
0 to the given number. A repeat with a non-zero minimum and a fixed maximum is
|
||||
coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or OPT_POSUPTO).
|
||||
Each of these is followed by a count and then the repeated character. OP_UPTO
|
||||
matches from 0 to the given number. A repeat with a non-zero minimum and a
|
||||
fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or
|
||||
OPT_POSUPTO).
|
||||
|
||||
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
|
||||
etc.) are used for repeated, negated, single-character classes such as [^a]*.
|
||||
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
|
||||
positive single-character classes.
|
||||
|
||||
|
||||
Repeating character types
|
||||
|
@ -277,7 +298,10 @@ Match by Unicode property
|
|||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
||||
character by testing its Unicode property (the \p and \P escape sequences).
|
||||
Each is followed by two units that encode the desired property as a type and a
|
||||
value.
|
||||
value. The types are a set of #defines of the form PT_xxx, and the values are
|
||||
enumerations of the form ucp_xx, defined in the ucp.h source file. The value is
|
||||
relevant only for PT_GC (General Category), PT_PC (Particular Category), and
|
||||
PT_SC (Script).
|
||||
|
||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||
three units: OP_PROP or OP_NOTPROP, and then the desired property type and
|
||||
|
@ -287,67 +311,88 @@ value.
|
|||
Character classes
|
||||
-----------------
|
||||
|
||||
If there is only one character in the class, OP_CHAR or OP_CHARI is used for a
|
||||
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
|
||||
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
|
||||
something like [^a]).
|
||||
|
||||
Another set of 13 repeating opcodes (called OP_NOTSTAR etc.) are used for
|
||||
repeated, negated, single-character classes. The normal single-character
|
||||
opcodes (OP_STAR, etc.) are used for repeated positive single-character
|
||||
classes.
|
||||
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
|
||||
negated, single-character classes. The normal single-character opcodes
|
||||
(OP_STAR, etc.) are used for repeated positive single-character classes.
|
||||
|
||||
When there is more than one character in a class and all the characters are
|
||||
When there is more than one character in a class, and all the code points are
|
||||
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
|
||||
negative one. In either case, the opcode is followed by a 32-byte (16-short)
|
||||
bit map containing a 1 bit for every character that is acceptable. The bits are
|
||||
counted from the least significant end of each unit. In caseless mode, bits for
|
||||
both cases are set.
|
||||
negative one. In either case, the opcode is followed by a 32-byte (16-short,
|
||||
8-word) bit map containing a 1 bit for every character that is acceptable. The
|
||||
bits are counted from the least significant end of each unit. In caseless mode,
|
||||
bits for both cases are set.
|
||||
|
||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32 mode,
|
||||
subject characters with values greater than 255 can be handled correctly. For
|
||||
OP_CLASS they do not match, whereas for OP_NCLASS they do.
|
||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32
|
||||
mode, subject characters with values greater than 255 can be handled correctly.
|
||||
For OP_CLASS they do not match, whereas for OP_NCLASS they do.
|
||||
|
||||
For classes containing characters with values greater than 255, OP_XCLASS is
|
||||
used. It optionally uses a bit map (if any characters lie within it), followed
|
||||
by a list of pairs (for a range) and single characters. In caseless mode, both
|
||||
cases are explicitly listed. There is a flag character than indicates whether
|
||||
it is a positive or a negative class.
|
||||
For classes containing characters with values greater than 255 or that contain
|
||||
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any code points
|
||||
are less than 256, followed by a list of pairs (for a range) and single
|
||||
characters. In caseless mode, both cases are explicitly listed.
|
||||
|
||||
OP_XCLASS is followed by a unit containing flag bits: XCL_NOT indicates that
|
||||
this is a negative class, and XCL_MAP indicates that a bit map is present.
|
||||
There follows the bit map, if XCL_MAP is set, and then a sequence of items
|
||||
coded as follows:
|
||||
|
||||
XCL_END marks the end of the list
|
||||
XCL_SINGLE one character follows
|
||||
XCL_RANGE two characters follow
|
||||
XCL_PROP a Unicode property (type, value) follows
|
||||
XCL_NOTPROP a Unicode property (type, value) follows
|
||||
|
||||
If a range starts with a code point less than 256 and ends with one greater
|
||||
than 256, an XCL_RANGE item is used, without setting any bits in the bit map.
|
||||
This means that if no other items in the class set bits in the map, a map is
|
||||
not needed.
|
||||
|
||||
|
||||
Back references
|
||||
---------------
|
||||
|
||||
OP_REF (caseful) or OP_REFI (caseless) is followed by two bytes (one short)
|
||||
containing the reference number.
|
||||
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
|
||||
reference number if the reference is to a unique capturing group (either by
|
||||
number or by name). When named groups are used, there may be more than one
|
||||
group with the same name. In this case, a reference by name generates OP_DNREF
|
||||
or OP_DNREFI. These are followed by two counts: the index (not the byte offset)
|
||||
in the group name table of the first entry for the requred name, followed by
|
||||
the number of groups with the same name.
|
||||
|
||||
|
||||
Repeating character classes and back references
|
||||
-----------------------------------------------
|
||||
|
||||
Single-character classes are handled specially (see above). This section
|
||||
applies to OP_CLASS and OP_REF[I]. In both cases, the repeat information
|
||||
follows the base item. The matching code looks at the following opcode to see
|
||||
if it is one of
|
||||
applies to other classes and also to back references. In both cases, the repeat
|
||||
information follows the base item. The matching code looks at the following
|
||||
opcode to see if it is one of
|
||||
|
||||
OP_CRSTAR
|
||||
OP_CRMINSTAR
|
||||
OP_CRPOSSTAR
|
||||
OP_CRPLUS
|
||||
OP_CRMINPLUS
|
||||
OP_CRPOSPLUS
|
||||
OP_CRQUERY
|
||||
OP_CRMINQUERY
|
||||
OP_CRPOSQUERY
|
||||
OP_CRRANGE
|
||||
OP_CRMINRANGE
|
||||
OP_CRPOSRANGE
|
||||
|
||||
All but the last two are just single-unit items. The others are followed by
|
||||
four bytes (two shorts) of data, comprising the minimum and maximum repeat
|
||||
counts. There are no special possessive opcodes for these repeats; a possessive
|
||||
repeat is compiled into an atomic group.
|
||||
All but the last three are single-unit items, with no data. The others are
|
||||
followed by the minimum and maximum repeat counts.
|
||||
|
||||
|
||||
Brackets and alternation
|
||||
------------------------
|
||||
|
||||
A pair of non-capturing (round) brackets is wrapped round each expression at
|
||||
A pair of non-capturing round brackets is wrapped round each expression at
|
||||
compile time, so alternation always happens in the context of brackets.
|
||||
|
||||
[Note for North Americans: "bracket" to some English speakers, including
|
||||
|
@ -364,13 +409,13 @@ A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
|
|||
next alternative OP_ALT or, if there aren't any branches, to the matching
|
||||
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
|
||||
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
|
||||
number immediately follows the offset, always as a 2-byte (one short) item.
|
||||
number is a count that immediately follows the offset.
|
||||
|
||||
OP_KET is used for subpatterns that do not repeat indefinitely, and
|
||||
OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or
|
||||
maximally respectively (see below for possessive repetitions). All three are
|
||||
followed by LINK_SIZE bytes giving (as a positive number) the offset back to
|
||||
the matching bracket opcode.
|
||||
OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN
|
||||
and OP_KETRMAX are used for indefinite repetitions, minimally or maximally
|
||||
respectively (see below for possessive repetitions). All three are followed by
|
||||
LINK_SIZE bytes giving (as a positive number) the offset back to the matching
|
||||
bracket opcode.
|
||||
|
||||
If a subpattern is quantified such that it is permitted to match zero times, it
|
||||
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
||||
|
@ -397,6 +442,7 @@ final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
|
|||
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
||||
OP_KETRMAX, and if so, to break the loop.
|
||||
|
||||
|
||||
Possessive brackets
|
||||
-------------------
|
||||
|
||||
|
@ -407,26 +453,34 @@ of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
|
|||
repetition is zero, the group is preceded by OP_BRAPOSZERO.
|
||||
|
||||
|
||||
Once-only (atomic) groups
|
||||
-------------------------
|
||||
|
||||
These are just like other subpatterns, but they start with the opcode
|
||||
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
|
||||
within the atomic group; the latter when there are. The distinction is needed
|
||||
for when there is a backtrack to before the group - any captures within the
|
||||
group must be reset, so it is necessary to retain backtracking points inside
|
||||
the group even after it is complete in order to do this. When there are no
|
||||
captures in an atomic group, all the backtracking can be discarded when it is
|
||||
complete. This is more efficient, and also uses less stack.
|
||||
|
||||
The check for matching an empty string in an unbounded repeat is handled
|
||||
entirely at runtime, so there are just these two opcodes for atomic groups.
|
||||
|
||||
|
||||
Assertions
|
||||
----------
|
||||
|
||||
Forward assertions are just like other subpatterns, but starting with one of
|
||||
the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
||||
Forward assertions are also just like other subpatterns, but starting with one
|
||||
of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
||||
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
||||
is OP_REVERSE, followed by a two byte (one short) count of the number of
|
||||
characters to move back the pointer in the subject string. In ASCII mode, the
|
||||
count is a number of units, but in UTF-8/16 mode each character may occupy more
|
||||
than one unit; in UTF-32 mode each character occupies exactly one unit.
|
||||
A separate count is present in each alternative of a lookbehind
|
||||
assertion, allowing them to have different fixed lengths.
|
||||
|
||||
|
||||
Once-only (atomic) subpatterns
|
||||
------------------------------
|
||||
|
||||
These are also just like other subpatterns, but they start with the opcode
|
||||
OP_ONCE. The check for matching an empty string in an unbounded repeat is
|
||||
handled entirely at runtime, so there is just this one opcode.
|
||||
is OP_REVERSE, followed by a count of the number of characters to move back the
|
||||
pointer in the subject string. In ASCII mode, the count is a number of units,
|
||||
but in UTF-8/16 mode each character may occupy more than one unit; in UTF-32
|
||||
mode each character occupies exactly one unit. A separate count is present in
|
||||
each alternative of a lookbehind assertion, allowing them to have different
|
||||
fixed lengths.
|
||||
|
||||
|
||||
Conditional subpatterns
|
||||
|
@ -435,28 +489,29 @@ Conditional subpatterns
|
|||
These are like other subpatterns, but they start with the opcode OP_COND, or
|
||||
OP_SCOND for one that might match an empty string in an unbounded repeat. If
|
||||
the condition is a back reference, this is stored at the start of the
|
||||
subpattern using the opcode OP_CREF followed by two bytes (one short)
|
||||
containing the reference number. OP_NCREF is used instead if the reference was
|
||||
generated by name (so that the runtime code knows to check for duplicate
|
||||
names).
|
||||
subpattern using the opcode OP_CREF followed by a count containing the
|
||||
reference number, provided that the reference is to a unique capturing group.
|
||||
If the reference was by name and there is more than one group with that name,
|
||||
OP_DNCREF is used instead. It is followed by two counts: the index in the group
|
||||
names table, and the number of groups with the same name.
|
||||
|
||||
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
|
||||
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
|
||||
subpattern using the opcode OP_RREF or OP_NRREF (cf OP_NCREF), and a value of
|
||||
zero for "the whole pattern". For a DEFINE condition, just the single unit
|
||||
OP_DEF is used (it has no associated data). Otherwise, a conditional subpattern
|
||||
always starts with one of the assertions.
|
||||
subpattern using the opcode OP_RREF (with a value of zero for "the whole
|
||||
pattern") or OP_DNRREF (with data as for OP_DNCREF). For a DEFINE condition,
|
||||
just the single unit OP_DEF is used (it has no associated data). Otherwise, a
|
||||
conditional subpattern always starts with one of the assertions.
|
||||
|
||||
|
||||
Recursion
|
||||
---------
|
||||
|
||||
Recursion either matches the current regex, or some subexpression. The opcode
|
||||
OP_RECURSE is followed by an value which is the offset to the starting bracket
|
||||
from the start of the whole pattern. From release 6.5, OP_RECURSE is
|
||||
automatically wrapped inside OP_ONCE brackets (because otherwise some patterns
|
||||
broke it). OP_RECURSE is also used for "subroutine" calls, even though they
|
||||
are not strictly a recursion.
|
||||
OP_RECURSE is followed by aLINK_SIZE value that is the offset to the starting
|
||||
bracket from the start of the whole pattern. From release 6.5, OP_RECURSE is
|
||||
automatically wrapped inside OP_ONCE brackets, because otherwise some patterns
|
||||
broke it. OP_RECURSE is also used for "subroutine" calls, even though they are
|
||||
not strictly a recursion.
|
||||
|
||||
|
||||
Callout
|
||||
|
@ -464,10 +519,10 @@ Callout
|
|||
|
||||
OP_CALLOUT is followed by one unit of data that holds a callout number in the
|
||||
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
|
||||
cases there follows a two-byte (one short) value giving the offset in the
|
||||
pattern to the start of the following item, and another two-byte (one short)
|
||||
item giving the length of the next item.
|
||||
|
||||
cases there follows a count giving the offset in the pattern string to the
|
||||
start of the following item, and another count giving the length of this item.
|
||||
These values make is possible for pcretest to output useful tracing information
|
||||
using automatic callouts.
|
||||
|
||||
Philip Hazel
|
||||
February 2012
|
||||
November 2013
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
Installation Instructions
|
||||
*************************
|
||||
|
||||
Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation,
|
||||
Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
|
@ -12,8 +12,8 @@ without warranty of any kind.
|
|||
Basic Installation
|
||||
==================
|
||||
|
||||
Briefly, the shell commands `./configure; make; make install' should
|
||||
configure, build, and install this package. The following
|
||||
Briefly, the shell command `./configure && make && make install'
|
||||
should configure, build, and install this package. The following
|
||||
more-detailed instructions are generic; see the `README' file for
|
||||
instructions specific to this package. Some packages provide this
|
||||
`INSTALL' file but do not implement all of the features documented
|
||||
|
@ -309,9 +309,10 @@ causes the specified `gcc' to be used as the C compiler (unless it is
|
|||
overridden in the site shell script).
|
||||
|
||||
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
|
||||
an Autoconf bug. Until the bug is fixed you can use this workaround:
|
||||
an Autoconf limitation. Until the limitation is lifted, you can use
|
||||
this workaround:
|
||||
|
||||
CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
|
||||
CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
|
||||
|
||||
`configure' Invocation
|
||||
======================
|
||||
|
@ -367,4 +368,3 @@ operates.
|
|||
|
||||
`configure' also accepts some other, not widely useful, options. Run
|
||||
`configure --help' for more details.
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ Email domain: cam.ac.uk
|
|||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
Copyright (c) 1997-2014 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -35,7 +35,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2012 Zoltan Herczeg
|
||||
Copyright(c) 2010-2014 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -46,7 +46,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2012 Zoltan Herczeg
|
||||
Copyright(c) 2009-2014 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
|
|
@ -14,11 +14,16 @@ dist_doc_DATA = \
|
|||
NEWS \
|
||||
README
|
||||
|
||||
# Note that pcrecpp.html is not in this list; it is listed separately below.
|
||||
|
||||
dist_html_DATA = \
|
||||
doc/html/NON-AUTOTOOLS-BUILD.txt \
|
||||
doc/html/README.txt \
|
||||
doc/html/index.html \
|
||||
doc/html/pcre-config.html \
|
||||
doc/html/pcre.html \
|
||||
doc/html/pcre16.html \
|
||||
doc/html/pcre-config.html \
|
||||
doc/html/pcre32.html \
|
||||
doc/html/pcre_assign_jit_stack.html \
|
||||
doc/html/pcre_compile.html \
|
||||
doc/html/pcre_compile2.html \
|
||||
|
@ -44,6 +49,7 @@ dist_html_DATA = \
|
|||
doc/html/pcre_refcount.html \
|
||||
doc/html/pcre_study.html \
|
||||
doc/html/pcre_utf16_to_host_byte_order.html \
|
||||
doc/html/pcre_utf32_to_host_byte_order.html \
|
||||
doc/html/pcre_version.html \
|
||||
doc/html/pcreapi.html \
|
||||
doc/html/pcrebuild.html \
|
||||
|
@ -65,10 +71,6 @@ dist_html_DATA = \
|
|||
doc/html/pcretest.html \
|
||||
doc/html/pcreunicode.html
|
||||
|
||||
# doc/html/pcre32.html \
|
||||
# doc/html/pcre_utf32_to_host_byte_order.html \
|
||||
#
|
||||
|
||||
pcrecpp_html = doc/html/pcrecpp.html
|
||||
dist_noinst_DATA = $(pcrecpp_html)
|
||||
|
||||
|
@ -140,14 +142,16 @@ pcre.h.generic: pcre.h.in configure.ac
|
|||
cp -p pcre.h $@
|
||||
|
||||
# It is more complicated for config.h.generic. We need the version that results
|
||||
# from a default configuration. We can get this by doing a configure in a
|
||||
# temporary directory. However, some trickery is needed,
|
||||
# because the source directory may already be configured. If you
|
||||
# just try running configure in a new directory, it complains. For this reason,
|
||||
# we move config.status out of the way while doing the default configuration.
|
||||
# The resulting config.h is munged by perl to put #ifdefs round any #defines
|
||||
# and to get rid of any gcc-specific visibility settings. Make sure that
|
||||
# PCRE_EXP_DEFN is unset (in case it has visibility settings).
|
||||
# from a default configuration so as to get all the default values for PCRE
|
||||
# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by
|
||||
# doing a configure in a temporary directory. However, some trickery is needed,
|
||||
# because the source directory may already be configured. If you just try
|
||||
# running configure in a new directory, it complains. For this reason, we move
|
||||
# config.status out of the way while doing the default configuration. The
|
||||
# resulting config.h is munged by perl to put #ifdefs round any #defines for
|
||||
# macros with values, and to #undef all boolean macros such as HAVE_xxx and
|
||||
# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. Make
|
||||
# sure that PCRE_EXP_DEFN is unset (in case it has visibility settings).
|
||||
config.h.generic: configure.ac
|
||||
rm -rf $@ _generic
|
||||
mkdir _generic
|
||||
|
@ -160,8 +164,10 @@ config.h.generic: configure.ac
|
|||
-e 'if(/PCRE_EXP_DEFN/){print"/* #undef PCRE_EXP_DEFN */\n";$$blank=0;next;}' \
|
||||
-e 'if(/to make a symbol visible/){next;}' \
|
||||
-e 'if(/__attribute__ \(\(visibility/){next;}' \
|
||||
-e 'if(/^#define\s(?!PACKAGE)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;}' \
|
||||
-e 'else {if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}}' \
|
||||
-e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \
|
||||
-e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \
|
||||
-e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \
|
||||
-e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \
|
||||
_generic/config.h >$@
|
||||
rm -rf _generic
|
||||
|
||||
|
@ -344,15 +350,19 @@ EXTRA_DIST += \
|
|||
sljit/sljitExecAllocator.c \
|
||||
sljit/sljitLir.c \
|
||||
sljit/sljitLir.h \
|
||||
sljit/sljitNativeARM_Thumb2.c \
|
||||
sljit/sljitNativeARM_v5.c \
|
||||
sljit/sljitNativeARM_32.c \
|
||||
sljit/sljitNativeARM_64.c \
|
||||
sljit/sljitNativeARM_T2_32.c \
|
||||
sljit/sljitNativeMIPS_32.c \
|
||||
sljit/sljitNativeMIPS_64.c \
|
||||
sljit/sljitNativeMIPS_common.c \
|
||||
sljit/sljitNativePPC_32.c \
|
||||
sljit/sljitNativePPC_64.c \
|
||||
sljit/sljitNativePPC_common.c \
|
||||
sljit/sljitNativeSPARC_32.c \
|
||||
sljit/sljitNativeSPARC_common.c \
|
||||
sljit/sljitNativeTILEGX_64.c \
|
||||
sljit/sljitNativeTILEGX-encoder.c \
|
||||
sljit/sljitNativeX86_32.c \
|
||||
sljit/sljitNativeX86_64.c \
|
||||
sljit/sljitNativeX86_common.c \
|
||||
|
@ -572,6 +582,8 @@ EXTRA_DIST += \
|
|||
testdata/testoutput1 \
|
||||
testdata/testoutput2 \
|
||||
testdata/testoutput3 \
|
||||
testdata/testoutput3A \
|
||||
testdata/testoutput3B \
|
||||
testdata/testoutput4 \
|
||||
testdata/testoutput5 \
|
||||
testdata/testoutput6 \
|
||||
|
@ -610,8 +622,10 @@ CLEANFILES += \
|
|||
teststderr \
|
||||
testtemp* \
|
||||
testtry \
|
||||
testNinput
|
||||
|
||||
testNinput \
|
||||
testtrygrep \
|
||||
teststderrgrep \
|
||||
testNinputgrep
|
||||
|
||||
# PCRE demonstration program. No longer built automatcally. The point is that
|
||||
# the users should build it themselves. So just distribute the source.
|
||||
|
@ -659,11 +673,13 @@ if WITH_PCRE_CPP
|
|||
pkgconfig_DATA += libpcrecpp.pc
|
||||
endif
|
||||
|
||||
# Note that pcrecpp.3 is not in this list, but is included separately below.
|
||||
|
||||
dist_man_MANS = \
|
||||
doc/pcre-config.1 \
|
||||
doc/pcre.3 \
|
||||
doc/pcre16.3 \
|
||||
doc/pcre32.3 \
|
||||
doc/pcre-config.1 \
|
||||
doc/pcre_assign_jit_stack.3 \
|
||||
doc/pcre_compile.3 \
|
||||
doc/pcre_compile2.3 \
|
||||
|
@ -695,6 +711,7 @@ dist_man_MANS = \
|
|||
doc/pcrebuild.3 \
|
||||
doc/pcrecallout.3 \
|
||||
doc/pcrecompat.3 \
|
||||
doc/pcredemo.3 \
|
||||
doc/pcregrep.1 \
|
||||
doc/pcrejit.3 \
|
||||
doc/pcrelimits.3 \
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,6 +1,88 @@
|
|||
News about PCRE releases
|
||||
------------------------
|
||||
|
||||
Release 8.35 04-April-2014
|
||||
--------------------------
|
||||
|
||||
There have been performance improvements for classes containing non-ASCII
|
||||
characters and the "auto-possessification" feature has been extended. Other
|
||||
minor improvements have been implemented and bugs fixed. There is a new callout
|
||||
feature to enable applications to do detailed stack checks at compile time, to
|
||||
avoid running out of stack for deeply nested parentheses. The JIT compiler has
|
||||
been extended with experimental support for ARM-64, MIPS-64, and PPC-LE.
|
||||
|
||||
|
||||
Release 8.34 15-December-2013
|
||||
-----------------------------
|
||||
|
||||
As well as fixing the inevitable bugs, performance has been improved by
|
||||
refactoring and extending the amount of "auto-possessification" that PCRE does.
|
||||
Other notable changes:
|
||||
|
||||
. Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
|
||||
an empty string. If it can, pcretest shows this in its information output.
|
||||
|
||||
. A back reference to a named subpattern when there is more than one of the
|
||||
same name now checks them in the order in which they appear in the pattern.
|
||||
The first one that is set is used for the reference. Previously only the
|
||||
first one was inspected. This change makes PCRE more compatible with Perl.
|
||||
|
||||
. Unicode character properties were updated from Unicode 6.3.0.
|
||||
|
||||
. The character VT has been added to the set of characters that match \s and
|
||||
are generally treated as white space, following this same change in Perl
|
||||
5.18. There is now no difference between "Perl space" and "POSIX space".
|
||||
|
||||
. Perl has changed its handling of \8 and \9. If there is no previously
|
||||
encountered capturing group of those numbers, they are treated as the
|
||||
literal characters 8 and 9 instead of a binary zero followed by the
|
||||
literals. PCRE now does the same.
|
||||
|
||||
. Following Perl, added \o{} to specify codepoints in octal, making it
|
||||
possible to specify values greater than 0777 and also making them
|
||||
unambiguous.
|
||||
|
||||
. In UCP mode, \s was not matching two of the characters that Perl matches,
|
||||
namely NEL (U+0085) and MONGOLIAN VOWEL SEPARATOR (U+180E), though they
|
||||
were matched by \h.
|
||||
|
||||
. Add JIT support for the 64 bit TileGX architecture.
|
||||
|
||||
. Upgraded the handling of the POSIX classes [:graph:], [:print:], and
|
||||
[:punct:] when PCRE_UCP is set so as to include the same characters as Perl
|
||||
does in Unicode mode.
|
||||
|
||||
. Perl no longer allows group names to start with digits, so I have made this
|
||||
change also in PCRE.
|
||||
|
||||
. Added support for [[:<:]] and [[:>:]] as used in the BSD POSIX library to
|
||||
mean "start of word" and "end of word", respectively, as a transition aid.
|
||||
|
||||
|
||||
Release 8.33 28-May-2013
|
||||
--------------------------
|
||||
|
||||
A number of bugs are fixed, and some performance improvements have been made.
|
||||
There are also some new features, of which these are the most important:
|
||||
|
||||
. The behaviour of the backtracking verbs has been rationalized and
|
||||
documented in more detail.
|
||||
|
||||
. JIT now supports callouts and all of the backtracking verbs.
|
||||
|
||||
. Unicode validation has been updated in the light of Unicode Corrigendum #9,
|
||||
which points out that "non characters" are not "characters that may not
|
||||
appear in Unicode strings" but rather "characters that are reserved for
|
||||
internal use and have only local meaning".
|
||||
|
||||
. (*LIMIT_MATCH=d) and (*LIMIT_RECURSION=d) have been added so that the
|
||||
creator of a pattern can specify lower (but not higher) limits for the
|
||||
matching process.
|
||||
|
||||
. The PCRE_NEVER_UTF option is available to prevent pattern-writers from using
|
||||
the (*UTF) feature, as this could be a security issue.
|
||||
|
||||
|
||||
Release 8.32 30-November-2012
|
||||
-----------------------------
|
||||
|
||||
|
@ -591,7 +673,7 @@ some of the new functionality in Perl 5.005.
|
|||
Another (I hope this is the last!) change has been made to the API for the
|
||||
pcre_compile() function. An additional argument has been added to make it
|
||||
possible to pass over a pointer to character tables built in the current
|
||||
locale by pcre_maketables(). To use the default tables, this new arguement
|
||||
locale by pcre_maketables(). To use the default tables, this new argument
|
||||
should be passed as NULL.
|
||||
|
||||
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05
|
||||
|
|
|
@ -9,11 +9,14 @@ This document contains the following sections:
|
|||
Building for virtual Pascal
|
||||
Stack size in Windows environments
|
||||
Linking programs in Windows environments
|
||||
Calling conventions in Windows environments
|
||||
Comments about Win32 builds
|
||||
Building PCRE on Windows with CMake
|
||||
Use of relative paths with CMake on Windows
|
||||
Testing with RunTest.bat
|
||||
Building under Windows CE with Visual Studio 200x
|
||||
Building under Windows with BCC5.5
|
||||
Building using Borland C++ Builder 2007 (CB2007) and higher
|
||||
Building PCRE on OpenVMS
|
||||
Building PCRE on Stratus OpenVOS
|
||||
Building PCRE on native z/OS and z/VM
|
||||
|
@ -168,8 +171,8 @@ can skip ahead to the CMake section.
|
|||
pcre16_version.c
|
||||
pcre16_xclass.c
|
||||
|
||||
(7') If you want to build a 16-bit library (as well as, or instead of the 8-bit
|
||||
or 32-bit libraries) repeat steps 5-6 with the following files:
|
||||
(8) If you want to build a 32-bit library (as well as, or instead of the 8-bit
|
||||
or 16-bit libraries) repeat steps 5-6 with the following files:
|
||||
|
||||
pcre32_byte_order.c
|
||||
pcre32_chartables.c
|
||||
|
@ -194,30 +197,31 @@ can skip ahead to the CMake section.
|
|||
pcre32_version.c
|
||||
pcre32_xclass.c
|
||||
|
||||
(8) If you want to build the POSIX wrapper functions (which apply only to the
|
||||
(9) If you want to build the POSIX wrapper functions (which apply only to the
|
||||
8-bit library), ensure that you have the pcreposix.h file and then compile
|
||||
pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result
|
||||
(on its own) as the pcreposix library.
|
||||
|
||||
(9) The pcretest program can be linked with any combination of the 8-bit, 16-bit
|
||||
and 32-bit libraries (depending on what you selected in config.h). Compile
|
||||
pcretest.c and pcre_printint.c (again, don't forget -DHAVE_CONFIG_H) and
|
||||
link them together with the appropriate library/ies. If you compiled an
|
||||
8-bit library, pcretest also needs the pcreposix wrapper library unless
|
||||
you compiled it with -DNOPOSIX.
|
||||
(10) The pcretest program can be linked with any combination of the 8-bit,
|
||||
16-bit and 32-bit libraries (depending on what you selected in config.h).
|
||||
Compile pcretest.c and pcre_printint.c (again, don't forget
|
||||
-DHAVE_CONFIG_H) and link them together with the appropriate library/ies.
|
||||
If you compiled an 8-bit library, pcretest also needs the pcreposix
|
||||
wrapper library unless you compiled it with -DNOPOSIX.
|
||||
|
||||
(10) Run pcretest on the testinput files in the testdata directory, and check
|
||||
(11) Run pcretest on the testinput files in the testdata directory, and check
|
||||
that the output matches the corresponding testoutput files. There are
|
||||
comments about what each test does in the section entitled "Testing PCRE"
|
||||
in the README file. If you compiled more than one of the 8-bit, 16-bit and
|
||||
32-bit libraries, you need to run pcretest with the -16 option to do 16-bit
|
||||
tests and with the -32 option to do 32-bit tests.
|
||||
32-bit libraries, you need to run pcretest with the -16 option to do
|
||||
16-bit tests and with the -32 option to do 32-bit tests.
|
||||
|
||||
Some tests are relevant only when certain build-time options are selected.
|
||||
For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run if
|
||||
you have built PCRE without it. See the comments at the start of each
|
||||
For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run
|
||||
if you have built PCRE without it. See the comments at the start of each
|
||||
testinput file. If you have a suitable Unix-like shell, the RunTest script
|
||||
will run the appropriate tests for you.
|
||||
will run the appropriate tests for you. The command "RunTest list" will
|
||||
output a list of all the tests.
|
||||
|
||||
Note that the supplied files are in Unix format, with just LF characters
|
||||
as line terminators. You may need to edit them to change this if your
|
||||
|
@ -227,11 +231,11 @@ can skip ahead to the CMake section.
|
|||
locale to "french" rather than "fr_FR", and there some minor output
|
||||
differences.
|
||||
|
||||
(11) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested
|
||||
(12) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested
|
||||
by the testdata files. However, you might also like to build and run
|
||||
the JIT test program, pcre_jit_test.c.
|
||||
the freestanding JIT test program, pcre_jit_test.c.
|
||||
|
||||
(12) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
||||
(13) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
||||
uses only the basic 8-bit PCRE library (it does not need the pcreposix
|
||||
library).
|
||||
|
||||
|
@ -428,16 +432,13 @@ CMake build process. In the CMake GUI, the cache can be deleted by selecting
|
|||
|
||||
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
|
||||
|
||||
A PCRE user comments as follows:
|
||||
A PCRE user comments as follows: I thought that others may want to know the
|
||||
current state of CMAKE_USE_RELATIVE_PATHS support on Windows. Here it is:
|
||||
|
||||
I thought that others may want to know the current state of
|
||||
CMAKE_USE_RELATIVE_PATHS support on Windows.
|
||||
|
||||
Here it is:
|
||||
-- AdditionalIncludeDirectories is only partially modified (only the
|
||||
first path - see below)
|
||||
first path - see below)
|
||||
-- Only some of the contained file paths are modified - shown below for
|
||||
pcre.vcproj
|
||||
pcre.vcproj
|
||||
-- It properly modifies
|
||||
|
||||
I am sure CMake people can fix that if they want to. Until then one will
|
||||
|
@ -449,9 +450,9 @@ deal.
|
|||
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
|
||||
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
|
||||
|
||||
RelativePath="pcre.h">
|
||||
RelativePath="pcre_chartables.c">
|
||||
RelativePath="pcre_chartables.c.rule">
|
||||
RelativePath="pcre.h"
|
||||
RelativePath="pcre_chartables.c"
|
||||
RelativePath="pcre_chartables.c.rule"
|
||||
|
||||
|
||||
TESTING WITH RUNTEST.BAT
|
||||
|
@ -489,20 +490,6 @@ To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
|
|||
pcre_scanner_unittest.exe.
|
||||
|
||||
|
||||
BUILDING UNDER WINDOWS WITH BCC5.5
|
||||
|
||||
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
|
||||
|
||||
Some of the core BCC libraries have a version of PCRE from 1998 built in,
|
||||
which can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a
|
||||
version mismatch. I'm including an easy workaround below, if you'd like to
|
||||
include it in the non-unix instructions:
|
||||
|
||||
When linking a project with BCC5.5, pcre.lib must be included before any of
|
||||
the libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command
|
||||
line.
|
||||
|
||||
|
||||
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
|
||||
|
||||
Vincent Richomme sent a zip archive of files to help with this process. They
|
||||
|
@ -510,11 +497,149 @@ can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
|
|||
site.
|
||||
|
||||
|
||||
BUILDING UNDER WINDOWS WITH BCC5.5
|
||||
|
||||
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
|
||||
|
||||
Some of the core BCC libraries have a version of PCRE from 1998 built in, which
|
||||
can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a version
|
||||
mismatch. I'm including an easy workaround below, if you'd like to include it
|
||||
in the non-unix instructions:
|
||||
|
||||
When linking a project with BCC5.5, pcre.lib must be included before any of the
|
||||
libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command line.
|
||||
|
||||
|
||||
BUILDING USING BORLAND C++ BUILDER 2007 (CB2007) AND HIGHER
|
||||
|
||||
A PCRE user sent these comments about this environment (see also the comment
|
||||
from another user that follows them):
|
||||
|
||||
The XE versions of C++ Builder come with a RegularExpressionsCore class which
|
||||
contain a version of TPerlRegEx. However, direct use of the C PCRE library may
|
||||
be desirable.
|
||||
|
||||
The default makevp.bat, however, supplied with PCRE builds a version of PCRE
|
||||
that is not usable with any version of C++ Builder because the compiler ships
|
||||
with an embedded version of PCRE, version 2.01 from 1998! [See also the note
|
||||
about BCC5.5 above.] If you want to use PCRE you'll need to rename the
|
||||
functions (pcre_compile to pcre_compile_bcc, etc) or do as I have done and just
|
||||
use the 16 bit versions. I'm using std::wstring everywhere anyway. Since the
|
||||
embedded version of PCRE does not have the 16 bit function names, there is no
|
||||
conflict.
|
||||
|
||||
Building PCRE using a C++ Builder static library project file (recommended):
|
||||
|
||||
1. Rename or remove pcre.h, pcreposi.h, and pcreposix.h from your C++ Builder
|
||||
original include path.
|
||||
|
||||
2. Download PCRE from pcre.org and extract to a directory.
|
||||
|
||||
3. Rename pcre_chartables.c.dist to pcre_chartables.c, pcre.h.generic to
|
||||
pcre.h, and config.h.generic to config.h.
|
||||
|
||||
4. Edit pcre.h and pcre_config.c so that they include config.h.
|
||||
|
||||
5. Edit config.h like so:
|
||||
|
||||
Comment out the following lines:
|
||||
#define PACKAGE "pcre"
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
#define PACKAGE_NAME "PCRE"
|
||||
#define PACKAGE_STRING "PCRE 8.32"
|
||||
#define PACKAGE_TARNAME "pcre"
|
||||
#define PACKAGE_URL ""
|
||||
#define PACKAGE_VERSION "8.32"
|
||||
|
||||
Add the following lines:
|
||||
#ifndef SUPPORT_UTF
|
||||
#define SUPPORT_UTF 100 // any value is fine
|
||||
#endif
|
||||
|
||||
#ifndef SUPPORT_UCP
|
||||
#define SUPPORT_UCP 101 // any value is fine
|
||||
#endif
|
||||
|
||||
#ifndef SUPPORT_UCP
|
||||
#define SUPPORT_PCRE16 102 // any value is fine
|
||||
#endif
|
||||
|
||||
#ifndef SUPPORT_UTF8
|
||||
#define SUPPORT_UTF8 103 // any value is fine
|
||||
#endif
|
||||
|
||||
6. Build a C++ Builder project using the IDE. Go to File / New / Other and
|
||||
choose Static Library. You can name it pcre.cbproj or whatever. Now set your
|
||||
paths by going to Project / Options. Set the Include path. Do this from the
|
||||
"Base" option to apply to both Release and Debug builds. Now add the following
|
||||
files to the project:
|
||||
|
||||
pcre.h
|
||||
pcre16_byte_order.c
|
||||
pcre16_chartables.c
|
||||
pcre16_compile.c
|
||||
pcre16_config.c
|
||||
pcre16_dfa_exec.c
|
||||
pcre16_exec.c
|
||||
pcre16_fullinfo.c
|
||||
pcre16_get.c
|
||||
pcre16_globals.c
|
||||
pcre16_maketables.c
|
||||
pcre16_newline.c
|
||||
pcre16_ord2utf16.c
|
||||
pcre16_printint.c
|
||||
pcre16_refcount.c
|
||||
pcre16_string_utils.c
|
||||
pcre16_study.c
|
||||
pcre16_tables.c
|
||||
pcre16_ucd.c
|
||||
pcre16_utf16_utils.c
|
||||
pcre16_valid_utf16.c
|
||||
pcre16_version.c
|
||||
pcre16_xclass.c
|
||||
|
||||
//Optional
|
||||
pcre_version.c
|
||||
|
||||
7. After compiling the .lib file, copy the .lib and header files to a project
|
||||
you want to use PCRE with. Enjoy.
|
||||
|
||||
Optional ... Building PCRE using the makevp.bat file:
|
||||
|
||||
1. Edit makevp_c.txt and makevp_l.txt and change all the names to the 16 bit
|
||||
versions.
|
||||
|
||||
2. Edit makevp.bat and set the path to C++ Builder. Run makevp.bat.
|
||||
|
||||
Another PCRE user added this comment:
|
||||
|
||||
Another approach I successfully used for some years with BCB 5 and 6 was to
|
||||
make sure that include and library paths of PCRE are configured before the
|
||||
default paths of the IDE in the dialogs where one can manage those paths.
|
||||
Afterwards one can open the project files using a text editor and manually add
|
||||
the self created library for pcre itself, pcrecpp doesn't ship with the IDE, in
|
||||
the library nodes where the IDE manages its own libraries to link against in
|
||||
front of the IDE-own libraries. This way one can use the default PCRE function
|
||||
names without getting access violations on runtime.
|
||||
|
||||
<ALLLIB value="libpcre.lib $(LIBFILES) $(LIBRARIES) import32.lib cp32mt.lib"/>
|
||||
|
||||
|
||||
BUILDING PCRE ON OPENVMS
|
||||
|
||||
Dan Mooney sent the following comments about building PCRE on OpenVMS. They
|
||||
relate to an older version of PCRE that used fewer source files, so the exact
|
||||
commands will need changing. See the current list of source files above.
|
||||
Stephen Hoffman sent the following, in December 2012:
|
||||
|
||||
"Here <http://labs.hoffmanlabs.com/node/1847> is a very short write-up on the
|
||||
OpenVMS port and here
|
||||
|
||||
<http://labs.hoffmanlabs.com/labsnotes/pcre-vms-8_32.zip>
|
||||
|
||||
is a zip with the OpenVMS files, and with one modified testing-related PCRE
|
||||
file." This is a port of PCRE 8.32.
|
||||
|
||||
Earlier, Dan Mooney sent the following comments about building PCRE on OpenVMS.
|
||||
They relate to an older version of PCRE that used fewer source files, so the
|
||||
exact commands will need changing. See the current list of source files above.
|
||||
|
||||
"It was quite easy to compile and link the library. I don't have a formal
|
||||
make file but the attached file [reproduced below] contains the OpenVMS DCL
|
||||
|
@ -636,4 +761,4 @@ There is also a mirror here:
|
|||
http://www.vsoft-software.com/downloads.html
|
||||
|
||||
==========================
|
||||
Last Updated: 21 November 2012
|
||||
Last Updated: 14 May 2013
|
||||
|
|
|
@ -25,6 +25,12 @@
|
|||
# when the HTML documentation is built. It works like this so that
|
||||
# doc/html can be deleted and re-created from scratch.
|
||||
|
||||
# README & NON-AUTOTOOLS-BUILD
|
||||
# These files are copied into the doc/html directory, with .txt
|
||||
# extensions so that they can by hyperlinked from the HTML
|
||||
# documentation, because some people just go to the HTML without
|
||||
# looking for text files.
|
||||
|
||||
|
||||
# First, sort out the documentation. Remove pcredemo.3 first because it won't
|
||||
# pass the markup check (it is created below, using markup that none of the
|
||||
|
@ -122,6 +128,8 @@ if [ $? != 0 ] ; then exit 1; fi
|
|||
echo "Making HTML documentation"
|
||||
/bin/rm html/*
|
||||
cp index.html.src html/index.html
|
||||
cp ../README html/README.txt
|
||||
cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt
|
||||
|
||||
for file in *.1 ; do
|
||||
base=`basename $file .1`
|
||||
|
@ -218,7 +226,6 @@ files="\
|
|||
pcre_string_utils.c \
|
||||
pcre_study.c \
|
||||
pcre_tables.c \
|
||||
pcre_ucp_searchfuncs.c \
|
||||
pcre_valid_utf8.c \
|
||||
pcre_version.c \
|
||||
pcre_xclass.c \
|
||||
|
@ -238,8 +245,6 @@ files="\
|
|||
pcre_stringpiece_unittest.cc \
|
||||
perltest.pl \
|
||||
ucp.h \
|
||||
ucpinternal.h \
|
||||
ucptable.h \
|
||||
makevp.bat \
|
||||
pcre.def \
|
||||
libpcre.def \
|
||||
|
|
|
@ -9,8 +9,10 @@ from:
|
|||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE at
|
||||
pcre-dev@exim.org. You can access the archives and subscribe or manage your
|
||||
subscription here:
|
||||
|
||||
pcre-dev@exim.org
|
||||
https://lists.exim.org/mailman/listinfo/pcre-dev
|
||||
|
||||
Please read the NEWS file if you are upgrading from a previous release.
|
||||
The contents of this README file are:
|
||||
|
@ -25,6 +27,8 @@ The contents of this README file are:
|
|||
Shared libraries
|
||||
Cross-compiling using autotools
|
||||
Using HP's ANSI C++ compiler (aCC)
|
||||
Compiling in Tru64 using native compilers
|
||||
Using Sun's compilers for Solaris
|
||||
Using PCRE from MySQL
|
||||
Making new tarballs
|
||||
Testing PCRE
|
||||
|
@ -35,10 +39,10 @@ The contents of this README file are:
|
|||
The PCRE APIs
|
||||
-------------
|
||||
|
||||
PCRE is written in C, and it has its own API. There are three sets of functions,
|
||||
one for the 8-bit library, which processes strings of bytes, one for the
|
||||
16-bit library, which processes strings of 16-bit values, and one for the 32-bit
|
||||
library, which processes strings of 32-bit values. The distribution also
|
||||
PCRE is written in C, and it has its own API. There are three sets of
|
||||
functions, one for the 8-bit library, which processes strings of bytes, one for
|
||||
the 16-bit library, which processes strings of 16-bit values, and one for the
|
||||
32-bit library, which processes strings of 32-bit values. The distribution also
|
||||
includes a set of C++ wrapper functions (see the pcrecpp man page for details),
|
||||
courtesy of Google Inc., which can be used to call the 8-bit PCRE library from
|
||||
C++.
|
||||
|
@ -81,11 +85,12 @@ documentation is supplied in two other forms:
|
|||
1. There are files called doc/pcre.txt, doc/pcregrep.txt, and
|
||||
doc/pcretest.txt in the source distribution. The first of these is a
|
||||
concatenation of the text forms of all the section 3 man pages except
|
||||
those that summarize individual functions. The other two are the text
|
||||
forms of the section 1 man pages for the pcregrep and pcretest commands.
|
||||
These text forms are provided for ease of scanning with text editors or
|
||||
similar tools. They are installed in <prefix>/share/doc/pcre, where
|
||||
<prefix> is the installation prefix (defaulting to /usr/local).
|
||||
the listing of pcredemo.c and those that summarize individual functions.
|
||||
The other two are the text forms of the section 1 man pages for the
|
||||
pcregrep and pcretest commands. These text forms are provided for ease of
|
||||
scanning with text editors or similar tools. They are installed in
|
||||
<prefix>/share/doc/pcre, where <prefix> is the installation prefix
|
||||
(defaulting to /usr/local).
|
||||
|
||||
2. A set of files containing all the documentation in HTML form, hyperlinked
|
||||
in various ways, and rooted in a file called index.html, is distributed in
|
||||
|
@ -110,6 +115,11 @@ contributions provided support for compiling PCRE on various flavours of
|
|||
Windows (I myself do not use Windows). Nowadays there is more Windows support
|
||||
in the standard distribution, so these contibutions have been archived.
|
||||
|
||||
A PCRE user maintains downloadable Windows binaries of the pcregrep and
|
||||
pcretest programs here:
|
||||
|
||||
http://www.rexegg.com/pcregrep-pcretest.html
|
||||
|
||||
|
||||
Building PCRE on non-Unix-like systems
|
||||
--------------------------------------
|
||||
|
@ -260,9 +270,17 @@ library. They are also documented in the pcrebuild man page.
|
|||
|
||||
on the "configure" command.
|
||||
|
||||
. PCRE has a counter that can be set to limit the amount of resources it uses.
|
||||
If the limit is exceeded during a match, the match fails. The default is ten
|
||||
million. You can change the default by setting, for example,
|
||||
. PCRE has a counter that limits the depth of nesting of parentheses in a
|
||||
pattern. This limits the amount of system stack that a pattern uses when it
|
||||
is compiled. The default is 250, but you can change it by setting, for
|
||||
example,
|
||||
|
||||
--with-parens-nest-limit=500
|
||||
|
||||
. PCRE has a counter that can be set to limit the amount of resources it uses
|
||||
when matching a pattern. If the limit is exceeded during a match, the match
|
||||
fails. The default is ten million. You can change the default by setting, for
|
||||
example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
|
@ -342,7 +360,8 @@ library. They are also documented in the pcrebuild man page.
|
|||
report is generated by running "make coverage". If ccache is installed on
|
||||
your system, it must be disabled when building PCRE for coverage reporting.
|
||||
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
||||
running "make" to build PCRE.
|
||||
running "make" to build PCRE. There is more information about coverage
|
||||
reporting in the "pcrebuild" documentation.
|
||||
|
||||
. The pcregrep program currently supports only 8-bit data files, and so
|
||||
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
|
||||
|
@ -354,12 +373,12 @@ library. They are also documented in the pcrebuild man page.
|
|||
|
||||
Of course, the relevant libraries must be installed on your system.
|
||||
|
||||
. The default size of internal buffer used by pcregrep can be set by, for
|
||||
example:
|
||||
. The default size (in bytes) of the internal buffer used by pcregrep can be
|
||||
set by, for example:
|
||||
|
||||
--with-pcregrep-bufsize=50K
|
||||
--with-pcregrep-bufsize=51200
|
||||
|
||||
The default value is 20K.
|
||||
The value must be a plain integer. The default is 20480.
|
||||
|
||||
. It is possible to compile pcretest so that it links with the libreadline
|
||||
or libedit libraries, by specifying, respectively,
|
||||
|
@ -575,6 +594,27 @@ running the "configure" script:
|
|||
CXXLDFLAGS="-lstd_v2 -lCsup_v2"
|
||||
|
||||
|
||||
Compiling in Tru64 using native compilers
|
||||
-----------------------------------------
|
||||
|
||||
The following error may occur when compiling with native compilers in the Tru64
|
||||
operating system:
|
||||
|
||||
CXX libpcrecpp_la-pcrecpp.lo
|
||||
cxx: Error: /usr/lib/cmplrs/cxx/V7.1-006/include/cxx/iosfwd, line 58: #error
|
||||
directive: "cannot include iosfwd -- define __USE_STD_IOSTREAM to
|
||||
override default - see section 7.1.2 of the C++ Using Guide"
|
||||
#error "cannot include iosfwd -- define __USE_STD_IOSTREAM to override default
|
||||
- see section 7.1.2 of the C++ Using Guide"
|
||||
|
||||
This may be followed by other errors, complaining that 'namespace "std" has no
|
||||
member'. The solution to this is to add the line
|
||||
|
||||
#define __USE_STD_IOSTREAM 1
|
||||
|
||||
to the config.h file.
|
||||
|
||||
|
||||
Using Sun's compilers for Solaris
|
||||
---------------------------------
|
||||
|
||||
|
@ -624,27 +664,40 @@ NON-AUTOTOOLS-BUILD.
|
|||
The RunTest script runs the pcretest test program (which is documented in its
|
||||
own man page) on each of the relevant testinput files in the testdata
|
||||
directory, and compares the output with the contents of the corresponding
|
||||
testoutput files. Some tests are relevant only when certain build-time options
|
||||
were selected. For example, the tests for UTF-8/16/32 support are run only if
|
||||
--enable-utf was used. RunTest outputs a comment when it skips a test.
|
||||
testoutput files. RunTest uses a file called testtry to hold the main output
|
||||
from pcretest. Other files whose names begin with "test" are used as working
|
||||
files in some tests.
|
||||
|
||||
Some tests are relevant only when certain build-time options were selected. For
|
||||
example, the tests for UTF-8/16/32 support are run only if --enable-utf was
|
||||
used. RunTest outputs a comment when it skips a test.
|
||||
|
||||
Many of the tests that are not skipped are run up to three times. The second
|
||||
run forces pcre_study() to be called for all patterns except for a few in some
|
||||
tests that are marked "never study" (see the pcretest program for how this is
|
||||
done). If JIT support is available, the non-DFA tests are run a third time,
|
||||
this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option.
|
||||
This testing can be suppressed by putting "nojit" on the RunTest command line.
|
||||
|
||||
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
|
||||
libraries that are enabled. If you want to run just one set of tests, call
|
||||
RunTest with either the -8, -16 or -32 option.
|
||||
|
||||
RunTest uses a file called testtry to hold the main output from pcretest.
|
||||
Other files whose names begin with "test" are used as working files in some
|
||||
tests. To run pcretest on just one or more specific test files, give their
|
||||
numbers as arguments to RunTest, for example:
|
||||
If valgrind is installed, you can run the tests under it by putting "valgrind"
|
||||
on the RunTest command line. To run pcretest on just one or more specific test
|
||||
files, give their numbers as arguments to RunTest, for example:
|
||||
|
||||
RunTest 2 7 11
|
||||
|
||||
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
|
||||
end), or a number preceded by ~ to exclude a test. For example:
|
||||
|
||||
Runtest 3-15 ~10
|
||||
|
||||
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
|
||||
except test 13. Whatever order the arguments are in, the tests are always run
|
||||
in numerical order.
|
||||
|
||||
You can also call RunTest with the single argument "list" to cause it to output
|
||||
a list of tests.
|
||||
|
||||
|
@ -704,21 +757,24 @@ test is run only when JIT support is not available. They test some JIT-specific
|
|||
features such as information output from pcretest about JIT compilation.
|
||||
|
||||
The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and
|
||||
the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit mode.
|
||||
These are tests that generate different output in the two modes. They are for
|
||||
general cases, UTF-8/16/32 support, and Unicode property support, respectively.
|
||||
the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit
|
||||
mode. These are tests that generate different output in the two modes. They are
|
||||
for general cases, UTF-8/16/32 support, and Unicode property support,
|
||||
respectively.
|
||||
|
||||
The twentieth test is run only in 16/32-bit mode. It tests some specific
|
||||
16/32-bit features of the DFA matching engine.
|
||||
|
||||
The twenty-first and twenty-second tests are run only in 16/32-bit mode, when the
|
||||
link size is set to 2 for the 16-bit library. They test reloading pre-compiled patterns.
|
||||
The twenty-first and twenty-second tests are run only in 16/32-bit mode, when
|
||||
the link size is set to 2 for the 16-bit library. They test reloading
|
||||
pre-compiled patterns.
|
||||
|
||||
The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are for
|
||||
general cases, and UTF-16 support, respectively.
|
||||
The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are
|
||||
for general cases, and UTF-16 support, respectively.
|
||||
|
||||
The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are
|
||||
for general cases, and UTF-32 support, respectively.
|
||||
|
||||
The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are for
|
||||
general cases, and UTF-32 support, respectively.
|
||||
|
||||
Character tables
|
||||
----------------
|
||||
|
@ -932,4 +988,4 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 27 October 2012
|
||||
Last updated: 17 January 2014
|
||||
|
|
|
@ -69,427 +69,447 @@ utf8=$?
|
|||
|
||||
echo "Testing pcregrep main features"
|
||||
|
||||
echo "---------------------------- Test 1 ------------------------------" >testtry
|
||||
(cd $srcdir; $valgrind $pcregrep PATTERN ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 1 ------------------------------" >testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 2 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep '^PATTERN' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 2 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep '^PATTERN' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 3 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 3 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 4 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -ic PATTERN ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 4 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -ic PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 5 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 5 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 6 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 6 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 7 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 7 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 8 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 8 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 9 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 9 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 10 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 10 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 11 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -vn pattern ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 11 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -vn pattern ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 12 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -ix pattern ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 12 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -ix pattern ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 13 -----------------------------" >>testtry
|
||||
echo seventeen >testtemp1
|
||||
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist -f $builddir/testtemp1 ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 13 -----------------------------" >>testtrygrep
|
||||
echo seventeen >testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist -f $builddir/testtemp1grep ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 14 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 14 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 15 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep 'abc^*' ./testdata/grepinput) 2>>testtry >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 15 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep 'abc^*' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 16 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtry >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 16 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtrygrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 17 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -M 'the\noutput' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 17 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -M 'the\noutput' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 18 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 18 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 19 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -Mix 'Pattern' ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 19 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -Mix 'Pattern' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 20 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 20 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 21 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -nA3 'four' ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 21 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -nA3 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 22 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -nB3 'four' ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 22 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -nB3 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 23 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -C3 'four' ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 23 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -C3 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 24 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -A9 'four' ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 24 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -A9 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 25 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -nB9 'four' ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 25 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -nB9 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 26 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -A9 -B9 'four' ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 26 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -A9 -B9 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 27 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -A10 'four' ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 27 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -A10 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 28 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -nB10 'four' ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 28 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -nB10 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 29 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -C12 -B10 'four' ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 29 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -C12 -B10 'four' ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 30 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 30 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 31 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 31 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 32 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 32 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 33 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep 'fox' ./testdata/grepnonexist) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 33 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 34 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -s 'fox' ./testdata/grepnonexist) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 34 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -s 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 35 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 35 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 36 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 36 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 37 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep '^(a+)*\d' ./testdata/grepinput) >>testtry 2>teststderr
|
||||
echo "RC=$?" >>testtry
|
||||
echo "======== STDERR ========" >>testtry
|
||||
cat teststderr >>testtry
|
||||
echo "---------------------------- Test 37 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep '^(a+)*\d' ./testdata/grepinput) >>testtrygrep 2>teststderrgrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
echo "======== STDERR ========" >>testtrygrep
|
||||
cat teststderrgrep >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 38 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep '>\x00<' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 38 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep '>\x00<' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 39 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -A1 'before the binary zero' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 39 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -A1 'before the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 40 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -B1 'after the binary zero' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 40 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -B1 'after the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 41 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 41 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 42 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 42 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 43 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -on 'before|zero|after' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 43 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -on 'before|zero|after' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 44 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -on -e before -ezero -e after ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 44 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -on -e before -ezero -e after ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 45 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 45 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 46 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtry >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 46 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtrygrep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 47 ------------------------------" >>testtry
|
||||
echo "---------------------------- Test 47 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -Fx "AB.VE
|
||||
elephant" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
elephant" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 48 ------------------------------" >>testtry
|
||||
echo "---------------------------- Test 48 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -F "AB.VE
|
||||
elephant" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
elephant" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 49 ------------------------------" >>testtry
|
||||
echo "---------------------------- Test 49 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -F -e DATA -e "AB.VE
|
||||
elephant" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
elephant" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 50 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 50 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 51 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -Mv "brown\sfox" ./testdata/grepinputv) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 51 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -Mv "brown\sfox" ./testdata/grepinputv) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 52 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --colour=always jumps ./testdata/grepinputv) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 52 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --colour=always jumps ./testdata/grepinputv) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 53 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 53 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 54 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 54 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 55 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 55 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 56 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -c lazy ./testdata/grepinput*) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 56 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -c lazy ./testdata/grepinput*) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 57 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -c -l lazy ./testdata/grepinput*) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 57 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -c -l lazy ./testdata/grepinput*) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 58 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --regex=PATTERN ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 58 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --regex=PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 59 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --regexp=PATTERN ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 59 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --regexp=PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 60 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --regex PATTERN ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 60 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --regex PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 61 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --regexp PATTERN ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 61 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --regexp PATTERN ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 62 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 62 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 63 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 63 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 64 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 64 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 65 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 65 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 66 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 66 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 67 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 67 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 68 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 68 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 69 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -vn --colour=always pattern ./testdata/grepinputx) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 69 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -vn --colour=always pattern ./testdata/grepinputx) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 70 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 70 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 71 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|^03" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 71 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 72 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 72 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 73 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 73 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 74 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -o "^01|02|^03" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 74 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -o "^01|02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 75 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --color=always "^01|02|^03" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 75 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --color=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 76 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 76 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 77 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|03" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 77 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 78 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|03" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 78 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 79 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 79 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 80 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -o "\b01|\b02" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 80 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -o "\b01|\b02" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 81 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 81 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 82 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 82 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 83 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 83 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 84 -----------------------------" >>testtry
|
||||
echo testdata/grepinput3 >testtemp1
|
||||
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1 "fox|complete|t7") >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 84 -----------------------------" >>testtrygrep
|
||||
echo testdata/grepinput3 >testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1grep "fox|complete|t7") >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 85 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 85 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 86 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep "dog" ./testdata/grepbinary) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 86 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 87 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep "cat" ./testdata/grepbinary) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 87 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 88 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -v "cat" ./testdata/grepbinary) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 88 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -v "cat" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 89 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -I "dog" ./testdata/grepbinary) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 89 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -I "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 90 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 90 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 91 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -a "dog" ./testdata/grepbinary) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 91 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -a "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 92 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --binary-files=text "dog" ./testdata/grepbinary) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 92 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --binary-files=text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 93 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --text "dog" ./testdata/grepbinary) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 93 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 94 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 94 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 95 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 95 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 96 -----------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 96 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 97 -----------------------------" >>testtry
|
||||
echo "grepinput$" >testtemp1
|
||||
echo "grepinput8" >>testtemp1
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude-from $builddir/testtemp1 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 97 -----------------------------" >>testtrygrep
|
||||
echo "grepinput$" >testtemp1grep
|
||||
echo "grepinput8" >>testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 98 -----------------------------" >>testtry
|
||||
echo "grepinput$" >testtemp1
|
||||
echo "grepinput8" >>testtemp1
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 98 -----------------------------" >>testtrygrep
|
||||
echo "grepinput$" >testtemp1grep
|
||||
echo "grepinput8" >>testtemp1grep
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 99 -----------------------------" >>testtry
|
||||
echo "grepinput$" >testtemp1
|
||||
echo "grepinput8" >testtemp2
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include grepinput --exclude-from $builddir/testtemp1 --exclude-from=$builddir/testtemp2 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 99 -----------------------------" >>testtrygrep
|
||||
echo "grepinput$" >testtemp1grep
|
||||
echo "grepinput8" >testtemp2grep
|
||||
(cd $srcdir; $valgrind $pcregrep -L -r --include grepinput --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 100 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 100 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 101 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test 101 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 102 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -n "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 103 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 104 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -n --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 105 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep --colour=always "ipsum|" ./testdata/grepinput3) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 106 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; echo "a" | $valgrind $pcregrep -M "|a" ) >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
|
||||
# Now compare the results.
|
||||
|
||||
$cf $srcdir/testdata/grepoutput testtry
|
||||
$cf $srcdir/testdata/grepoutput testtrygrep
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
|
||||
|
@ -498,15 +518,15 @@ if [ $? != 0 ] ; then exit 1; fi
|
|||
if [ $utf8 -ne 0 ] ; then
|
||||
echo "Testing pcregrep UTF-8 features"
|
||||
|
||||
echo "---------------------------- Test U1 ------------------------------" >testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test U1 ------------------------------" >testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test U2 ------------------------------" >>testtry
|
||||
(cd $srcdir; $valgrind $pcregrep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtry
|
||||
echo "RC=$?" >>testtry
|
||||
echo "---------------------------- Test U2 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $pcregrep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
$cf $srcdir/testdata/grepoutput8 testtry
|
||||
$cf $srcdir/testdata/grepoutput8 testtrygrep
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
else
|
||||
|
@ -522,28 +542,28 @@ fi
|
|||
# starts with a hyphen. These tests are run in the build directory.
|
||||
|
||||
echo "Testing pcregrep newline settings"
|
||||
printf "abc\rdef\r\nghi\njkl" >testNinput
|
||||
printf "abc\rdef\r\nghi\njkl" >testNinputgrep
|
||||
|
||||
printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtry
|
||||
$valgrind $pcregrep -n -N CR "^(abc|def|ghi|jkl)" testNinput >>testtry
|
||||
printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtrygrep
|
||||
$valgrind $pcregrep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtry
|
||||
$valgrind $pcregrep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinput >>testtry
|
||||
printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $pcregrep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtry
|
||||
printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtrygrep
|
||||
pattern=`printf 'def\rjkl'`
|
||||
$valgrind $pcregrep -n --newline=cr -F "$pattern" testNinput >>testtry
|
||||
$valgrind $pcregrep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtry
|
||||
$valgrind $pcregrep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinput >>testtry
|
||||
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $pcregrep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtry
|
||||
$valgrind $pcregrep -n --newline=any "^(abc|def|ghi|jkl)" testNinput >>testtry
|
||||
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $pcregrep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
|
||||
printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtry
|
||||
$valgrind $pcregrep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinput >>testtry
|
||||
printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtrygrep
|
||||
$valgrind $pcregrep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
|
||||
|
||||
$cf $srcdir/testdata/grepoutputN testtry
|
||||
$cf $srcdir/testdata/grepoutputN testtrygrep
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
exit 0
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#! /bin/sh
|
||||
|
||||
###############################################################################
|
||||
# Run the PCRE tests using the pcretest program. The appropriate tests are
|
||||
# selected, depending on which build-time options were used.
|
||||
#
|
||||
|
@ -13,21 +14,38 @@
|
|||
# UTF-8 with the UTF-8 check turned off; for this, studying must also be
|
||||
# disabled with /SS.
|
||||
#
|
||||
# When JIT support is available, all the tests are also run with -s+ to test
|
||||
# (again, almost) everything with studying and the JIT option. There are also
|
||||
# two tests for JIT-specific features, one to be run when JIT support is
|
||||
# available, and one when it is not.
|
||||
# When JIT support is available, all appropriate tests are also run with -s+ to
|
||||
# test (again, almost) everything with studying and the JIT option, unless
|
||||
# "nojit" is given on the command line. There are also two tests for
|
||||
# JIT-specific features, one to be run when JIT support is available (unless
|
||||
# "nojit" is specified), and one when it is not.
|
||||
#
|
||||
# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
|
||||
# possible to select which to test by the arguments -8, -16 or -32.
|
||||
# possible to select which to test by giving "-8", "-16" or "-32" on the
|
||||
# command line.
|
||||
#
|
||||
# Other arguments for this script can be individual test numbers, or the word
|
||||
# "valgrind", "valgrind-log" or "sim" followed by an argument to run cross-
|
||||
# compiled executables under a simulator, for example:
|
||||
# As well as "nojit", "-8", "-16", and "-32", arguments for this script are
|
||||
# individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
|
||||
# end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
|
||||
# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
|
||||
# except test 10. Whatever order the arguments are in, the tests are always run
|
||||
# in numerical order.
|
||||
#
|
||||
# The special argument "3S" runs test 3, stopping if it fails. Test 3 is the
|
||||
# locale test, and failure usually means there's an issue with the locale
|
||||
# rather than a bug in PCRE, so normally subsequent tests are run. "3S" is
|
||||
# useful when you want to debug or update the test.
|
||||
#
|
||||
# Inappropriate tests are automatically skipped (with a comment to say so): for
|
||||
# example, if JIT support is not compiled, test 12 is skipped, whereas if JIT
|
||||
# support is compiled, test 13 is skipped.
|
||||
#
|
||||
# Other arguments can be one of the words "valgrind", "valgrind-log", or "sim"
|
||||
# followed by an argument to run cross-compiled executables under a simulator,
|
||||
# for example:
|
||||
#
|
||||
# RunTest 3 sim "qemu-arm -s 8388608"
|
||||
#
|
||||
#
|
||||
# There are two special cases where only one argument is allowed:
|
||||
#
|
||||
# If the first and only argument is "ebcdic", the script runs the special
|
||||
|
@ -36,7 +54,7 @@
|
|||
#
|
||||
# If the script is obeyed as "RunTest list", a list of available tests is
|
||||
# output, but none of them are run.
|
||||
|
||||
###############################################################################
|
||||
|
||||
# Define test titles in variables so that they can be output as a list. Some
|
||||
# of them are modified (e.g. with -8 or -16) when used in the actual tests.
|
||||
|
@ -53,8 +71,8 @@ title8="Test 8: DFA matching main functionality"
|
|||
title9="Test 9: DFA matching with UTF"
|
||||
title10="Test 10: DFA matching with Unicode properties"
|
||||
title11="Test 11: Internal offsets and code size tests"
|
||||
title12="Test 12: JIT-specific features (JIT available)"
|
||||
title13="Test 13: JIT-specific features (JIT not available)"
|
||||
title12="Test 12: JIT-specific features (when JIT is available)"
|
||||
title13="Test 13: JIT-specific features (when JIT is not available)"
|
||||
title14="Test 14: Specials for the basic 8-bit library"
|
||||
title15="Test 15: Specials for the 8-bit library with UTF-8 support"
|
||||
title16="Test 16: Specials for the 8-bit library with Unicode propery support"
|
||||
|
@ -69,6 +87,8 @@ title24="Test 24: Specials for the 16-bit library with UTF-16 support"
|
|||
title25="Test 25: Specials for the 32-bit library"
|
||||
title26="Test 26: Specials for the 32-bit library with UTF-32 support"
|
||||
|
||||
maxtest=26
|
||||
|
||||
if [ $# -eq 1 -a "$1" = "list" ]; then
|
||||
echo $title1
|
||||
echo $title2 "(not UTF)"
|
||||
|
@ -151,17 +171,19 @@ fi
|
|||
|
||||
# Default values
|
||||
|
||||
valgrind=
|
||||
sim=
|
||||
arg8=
|
||||
arg16=
|
||||
arg32=
|
||||
nojit=
|
||||
sim=
|
||||
skip=
|
||||
valgrind=
|
||||
|
||||
# This is in case the caller has set aliases (as I do - PH)
|
||||
unset cp ls mv rm
|
||||
|
||||
# Select which tests to run; for those that are explicitly requested, check
|
||||
# that the necessary optional facilities are available.
|
||||
# Process options and select which tests to run; for those that are explicitly
|
||||
# requested, check that the necessary optional facilities are available.
|
||||
|
||||
do1=no
|
||||
do2=no
|
||||
|
@ -221,10 +243,34 @@ while [ $# -gt 0 ] ; do
|
|||
-8) arg8=yes;;
|
||||
-16) arg16=yes;;
|
||||
-32) arg32=yes;;
|
||||
nojit) nojit=yes;;
|
||||
sim) shift; sim=$1;;
|
||||
valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
|
||||
valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all --log-file=report.%p ";;
|
||||
sim) shift; sim=$1;;
|
||||
*) echo "Unknown test number '$1'"; exit 1;;
|
||||
~*)
|
||||
if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
|
||||
skip="$skip `expr "$1" : '~\([0-9]*\)*$'`"
|
||||
else
|
||||
echo "Unknown option or test selector '$1'"; exit 1
|
||||
fi
|
||||
;;
|
||||
*-*)
|
||||
if expr "$1" : '[0-9][0-9]*-[0-9]*$' >/dev/null; then
|
||||
tf=`expr "$1" : '\([0-9]*\)'`
|
||||
tt=`expr "$1" : '.*-\([0-9]*\)'`
|
||||
if [ "$tt" = "" ] ; then tt=$maxtest; fi
|
||||
if expr \( "$tf" "<" 1 \) \| \( "$tt" ">" "$maxtest" \) >/dev/null; then
|
||||
echo "Invalid test range '$1'"; exit 1
|
||||
fi
|
||||
while expr "$tf" "<=" "$tt" >/dev/null; do
|
||||
eval do${tf}=yes
|
||||
tf=`expr $tf + 1`
|
||||
done
|
||||
else
|
||||
echo "Invalid test range '$1'"; exit 1
|
||||
fi
|
||||
;;
|
||||
*) echo "Unknown option or test selector '$1'"; exit 1;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
@ -309,79 +355,12 @@ ucp=$?
|
|||
jitopt=
|
||||
$sim ./pcretest -C jit >/dev/null
|
||||
jit=$?
|
||||
if [ $jit -ne 0 ] ; then
|
||||
if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
|
||||
jitopt=-s+
|
||||
fi
|
||||
|
||||
if [ $utf -eq 0 ] ; then
|
||||
if [ $do4 = yes ] ; then
|
||||
echo "Can't run test 4 because UTF support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do5 = yes ] ; then
|
||||
echo "Can't run test 5 because UTF support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do9 = yes ] ; then
|
||||
echo "Can't run test 8 because UTF support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do15 = yes ] ; then
|
||||
echo "Can't run test 15 because UTF support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do18 = yes ] ; then
|
||||
echo "Can't run test 18 because UTF support is not configured"
|
||||
fi
|
||||
if [ $do22 = yes ] ; then
|
||||
echo "Can't run test 22 because UTF support is not configured"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $ucp -eq 0 ] ; then
|
||||
if [ $do6 = yes ] ; then
|
||||
echo "Can't run test 6 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do7 = yes ] ; then
|
||||
echo "Can't run test 7 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do10 = yes ] ; then
|
||||
echo "Can't run test 10 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do16 = yes ] ; then
|
||||
echo "Can't run test 16 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do19 = yes ] ; then
|
||||
echo "Can't run test 19 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $link_size -ne 2 ] ; then
|
||||
if [ $do11 = yes ] ; then
|
||||
echo "Can't run test 11 because the link size ($link_size) is not 2"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $jit -eq 0 ] ; then
|
||||
if [ $do12 = "yes" ] ; then
|
||||
echo "Can't run test 12 because JIT support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
if [ $do13 = "yes" ] ; then
|
||||
echo "Can't run test 13 because JIT support is configured"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# If no specific tests were requested, select all. Those that are not
|
||||
# relevant will be skipped.
|
||||
# relevant will be automatically skipped.
|
||||
|
||||
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
||||
$do5 = no -a $do6 = no -a $do7 = no -a $do8 = no -a \
|
||||
|
@ -418,6 +397,11 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
|||
do26=yes
|
||||
fi
|
||||
|
||||
# Handle any explicit skips at this stage, so that an argument list may consist
|
||||
# only of explicit skips.
|
||||
|
||||
for i in $skip; do eval do$i=no; done
|
||||
|
||||
# Show which release and which test data
|
||||
|
||||
echo ""
|
||||
|
@ -479,8 +463,9 @@ fi
|
|||
|
||||
# Locale-specific tests, provided that either the "fr_FR" or the "french"
|
||||
# locale is available. The former is the Unix-like standard; the latter is
|
||||
# for Windows. Another possibility is "fr", which needs to be run against
|
||||
# the Windows-specific input and output files.
|
||||
# for Windows. Another possibility is "fr". Unfortunately, different versions
|
||||
# of the French locale give different outputs for some items. This test passes
|
||||
# if the output matches any one of the alternative output files.
|
||||
|
||||
if [ $do3 = yes ] ; then
|
||||
locale -a | grep '^fr_FR$' >/dev/null
|
||||
|
@ -488,20 +473,28 @@ if [ $do3 = yes ] ; then
|
|||
locale=fr_FR
|
||||
infile=$testdata/testinput3
|
||||
outfile=$testdata/testoutput3
|
||||
outfile2=$testdata/testoutput3A
|
||||
outfile3=$testdata/testoutput3B
|
||||
else
|
||||
infile=test3input
|
||||
outfile=test3output
|
||||
outfile2=test3outputA
|
||||
outfile3=test3outputB
|
||||
locale -a | grep '^french$' >/dev/null
|
||||
if [ $? -eq 0 ] ; then
|
||||
locale=french
|
||||
sed 's/fr_FR/french/' $testdata/testinput3 >test3input
|
||||
sed 's/fr_FR/french/' $testdata/testoutput3 >test3output
|
||||
sed 's/fr_FR/french/' $testdata/testoutput3A >test3outputA
|
||||
sed 's/fr_FR/french/' $testdata/testoutput3B >test3outputB
|
||||
else
|
||||
locale -a | grep '^fr$' >/dev/null
|
||||
if [ $? -eq 0 ] ; then
|
||||
locale=fr
|
||||
sed 's/fr_FR/fr/' $testdata/wintestinput3 >test3input
|
||||
sed 's/fr_FR/fr/' $testdata/wintestoutput3 >test3output
|
||||
sed 's/fr_FR/fr/' $testdata/intestinput3 >test3input
|
||||
sed 's/fr_FR/fr/' $testdata/intestoutput3 >test3output
|
||||
sed 's/fr_FR/fr/' $testdata/intestoutput3A >test3outputA
|
||||
sed 's/fr_FR/fr/' $testdata/intestoutput3B >test3outputB
|
||||
else
|
||||
locale=
|
||||
fi
|
||||
|
@ -513,18 +506,20 @@ if [ $do3 = yes ] ; then
|
|||
for opt in "" "-s" $jitopt; do
|
||||
$sim $valgrind ./pcretest -q $bmode $opt $infile testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf $outfile testtry
|
||||
if [ $? != 0 ] ; then
|
||||
echo " "
|
||||
echo "Locale test did not run entirely successfully."
|
||||
echo "This usually means that there is a problem with the locale"
|
||||
echo "settings rather than a bug in PCRE."
|
||||
break;
|
||||
else
|
||||
if $cf $outfile testtry >teststdout || \
|
||||
$cf $outfile2 testtry >teststdout || \
|
||||
$cf $outfile3 testtry >teststdout
|
||||
then
|
||||
if [ "$opt" = "-s" ] ; then echo " OK with study"
|
||||
elif [ "$opt" = "-s+" ] ; then echo " OK with JIT study"
|
||||
else echo " OK"
|
||||
fi
|
||||
else
|
||||
echo "** Locale test did not run successfully. The output did not match"
|
||||
echo " $outfile, $outfile2 or $outfile3."
|
||||
echo " This may mean that there is a problem with the locale settings rather"
|
||||
echo " than a bug in PCRE."
|
||||
exit 1
|
||||
fi
|
||||
else exit 1
|
||||
fi
|
||||
|
@ -700,7 +695,7 @@ fi
|
|||
|
||||
if [ $do12 = yes ] ; then
|
||||
echo $title12
|
||||
if [ $jit -eq 0 ] ; then
|
||||
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
|
||||
echo " Skipped because JIT is not available or not usable"
|
||||
else
|
||||
$sim $valgrind ./pcretest -q $bmode $testdata/testinput12 testtry
|
||||
|
@ -1010,6 +1005,6 @@ fi
|
|||
done
|
||||
|
||||
# Clean up local working files
|
||||
rm -f test3input test3output testNinput testsaved* teststderr teststdout testtry
|
||||
rm -f test3input test3output test3outputA testNinput testsaved* teststderr teststdout testtry
|
||||
|
||||
# End
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
@rem tests 4 5 9 15 and 18 require utf support
|
||||
@rem tests 6 7 10 16 and 19 require ucp support
|
||||
@rem 11 requires ucp and link size 2
|
||||
@rem 12 requires presense of jit support
|
||||
@rem 12 requires presence of jit support
|
||||
@rem 13 requires absence of jit support
|
||||
@rem Sheri P also added override tests for study and jit testing
|
||||
@rem Zoltan Herczeg added libpcre16 support
|
||||
|
|
649
tools/pcre/aclocal.m4
vendored
649
tools/pcre/aclocal.m4
vendored
File diff suppressed because it is too large
Load Diff
270
tools/pcre/ar-lib
Normal file
270
tools/pcre/ar-lib
Normal file
|
@ -0,0 +1,270 @@
|
|||
#! /bin/sh
|
||||
# Wrapper for Microsoft lib.exe
|
||||
|
||||
me=ar-lib
|
||||
scriptversion=2012-03-01.08; # UTC
|
||||
|
||||
# Copyright (C) 2010-2013 Free Software Foundation, Inc.
|
||||
# Written by Peter Rosin <peda@lysator.liu.se>.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# This file is maintained in Automake, please report
|
||||
# bugs to <bug-automake@gnu.org> or send patches to
|
||||
# <automake-patches@gnu.org>.
|
||||
|
||||
|
||||
# func_error message
|
||||
func_error ()
|
||||
{
|
||||
echo "$me: $1" 1>&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
file_conv=
|
||||
|
||||
# func_file_conv build_file
|
||||
# Convert a $build file to $host form and store it in $file
|
||||
# Currently only supports Windows hosts.
|
||||
func_file_conv ()
|
||||
{
|
||||
file=$1
|
||||
case $file in
|
||||
/ | /[!/]*) # absolute file, and not a UNC file
|
||||
if test -z "$file_conv"; then
|
||||
# lazily determine how to convert abs files
|
||||
case `uname -s` in
|
||||
MINGW*)
|
||||
file_conv=mingw
|
||||
;;
|
||||
CYGWIN*)
|
||||
file_conv=cygwin
|
||||
;;
|
||||
*)
|
||||
file_conv=wine
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
case $file_conv in
|
||||
mingw)
|
||||
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
|
||||
;;
|
||||
cygwin)
|
||||
file=`cygpath -m "$file" || echo "$file"`
|
||||
;;
|
||||
wine)
|
||||
file=`winepath -w "$file" || echo "$file"`
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# func_at_file at_file operation archive
|
||||
# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE
|
||||
# for each of them.
|
||||
# When interpreting the content of the @FILE, do NOT use func_file_conv,
|
||||
# since the user would need to supply preconverted file names to
|
||||
# binutils ar, at least for MinGW.
|
||||
func_at_file ()
|
||||
{
|
||||
operation=$2
|
||||
archive=$3
|
||||
at_file_contents=`cat "$1"`
|
||||
eval set x "$at_file_contents"
|
||||
shift
|
||||
|
||||
for member
|
||||
do
|
||||
$AR -NOLOGO $operation:"$member" "$archive" || exit $?
|
||||
done
|
||||
}
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
func_error "no command. Try '$0 --help' for more information."
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<EOF
|
||||
Usage: $me [--help] [--version] PROGRAM ACTION ARCHIVE [MEMBER...]
|
||||
|
||||
Members may be specified in a file named with @FILE.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "$me, version $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
if test $# -lt 3; then
|
||||
func_error "you must specify a program, an action and an archive"
|
||||
fi
|
||||
|
||||
AR=$1
|
||||
shift
|
||||
while :
|
||||
do
|
||||
if test $# -lt 2; then
|
||||
func_error "you must specify a program, an action and an archive"
|
||||
fi
|
||||
case $1 in
|
||||
-lib | -LIB \
|
||||
| -ltcg | -LTCG \
|
||||
| -machine* | -MACHINE* \
|
||||
| -subsystem* | -SUBSYSTEM* \
|
||||
| -verbose | -VERBOSE \
|
||||
| -wx* | -WX* )
|
||||
AR="$AR $1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
action=$1
|
||||
shift
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
orig_archive=$1
|
||||
shift
|
||||
func_file_conv "$orig_archive"
|
||||
archive=$file
|
||||
|
||||
# strip leading dash in $action
|
||||
action=${action#-}
|
||||
|
||||
delete=
|
||||
extract=
|
||||
list=
|
||||
quick=
|
||||
replace=
|
||||
index=
|
||||
create=
|
||||
|
||||
while test -n "$action"
|
||||
do
|
||||
case $action in
|
||||
d*) delete=yes ;;
|
||||
x*) extract=yes ;;
|
||||
t*) list=yes ;;
|
||||
q*) quick=yes ;;
|
||||
r*) replace=yes ;;
|
||||
s*) index=yes ;;
|
||||
S*) ;; # the index is always updated implicitly
|
||||
c*) create=yes ;;
|
||||
u*) ;; # TODO: don't ignore the update modifier
|
||||
v*) ;; # TODO: don't ignore the verbose modifier
|
||||
*)
|
||||
func_error "unknown action specified"
|
||||
;;
|
||||
esac
|
||||
action=${action#?}
|
||||
done
|
||||
|
||||
case $delete$extract$list$quick$replace,$index in
|
||||
yes,* | ,yes)
|
||||
;;
|
||||
yesyes*)
|
||||
func_error "more than one action specified"
|
||||
;;
|
||||
*)
|
||||
func_error "no action specified"
|
||||
;;
|
||||
esac
|
||||
|
||||
if test -n "$delete"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
func_error "archive not found"
|
||||
fi
|
||||
for member
|
||||
do
|
||||
case $1 in
|
||||
@*)
|
||||
func_at_file "${1#@}" -REMOVE "$archive"
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$1"
|
||||
$AR -NOLOGO -REMOVE:"$file" "$archive" || exit $?
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
elif test -n "$extract"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
func_error "archive not found"
|
||||
fi
|
||||
if test $# -gt 0; then
|
||||
for member
|
||||
do
|
||||
case $1 in
|
||||
@*)
|
||||
func_at_file "${1#@}" -EXTRACT "$archive"
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$1"
|
||||
$AR -NOLOGO -EXTRACT:"$file" "$archive" || exit $?
|
||||
;;
|
||||
esac
|
||||
done
|
||||
else
|
||||
$AR -NOLOGO -LIST "$archive" | sed -e 's/\\/\\\\/g' | while read member
|
||||
do
|
||||
$AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
|
||||
done
|
||||
fi
|
||||
|
||||
elif test -n "$quick$replace"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
if test -z "$create"; then
|
||||
echo "$me: creating $orig_archive"
|
||||
fi
|
||||
orig_archive=
|
||||
else
|
||||
orig_archive=$archive
|
||||
fi
|
||||
|
||||
for member
|
||||
do
|
||||
case $1 in
|
||||
@*)
|
||||
func_file_conv "${1#@}"
|
||||
set x "$@" "@$file"
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$1"
|
||||
set x "$@" "$file"
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
shift
|
||||
done
|
||||
|
||||
if test -n "$orig_archive"; then
|
||||
$AR -NOLOGO -OUT:"$archive" "$orig_archive" "$@" || exit $?
|
||||
else
|
||||
$AR -NOLOGO -OUT:"$archive" "$@" || exit $?
|
||||
fi
|
||||
|
||||
elif test -n "$list"; then
|
||||
if test ! -f "$orig_archive"; then
|
||||
func_error "archive not found"
|
||||
fi
|
||||
$AR -NOLOGO -LIST "$archive" || exit $?
|
||||
fi
|
|
@ -1,10 +1,9 @@
|
|||
#! /bin/sh
|
||||
# Wrapper for compilers which do not understand '-c -o'.
|
||||
|
||||
scriptversion=2012-03-05.13; # UTC
|
||||
scriptversion=2012-10-14.11; # UTC
|
||||
|
||||
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2009, 2010, 2012 Free
|
||||
# Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
|
||||
# Written by Tom Tromey <tromey@cygnus.com>.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
|
@ -113,6 +112,11 @@ func_cl_dashl ()
|
|||
lib=$dir/$lib.lib
|
||||
break
|
||||
fi
|
||||
if test -f "$dir/lib$lib.a"; then
|
||||
found=yes
|
||||
lib=$dir/lib$lib.a
|
||||
break
|
||||
fi
|
||||
done
|
||||
IFS=$save_IFS
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#cmakedefine HAVE_SYS_TYPES_H 1
|
||||
#cmakedefine HAVE_UNISTD_H 1
|
||||
#cmakedefine HAVE_WINDOWS_H 1
|
||||
#cmakedefine HAVE_STDINT_H 1
|
||||
#cmakedefine HAVE_INTTYPES_H 1
|
||||
|
||||
#cmakedefine HAVE_TYPE_TRAITS_H 1
|
||||
#cmakedefine HAVE_BITS_TYPE_TRAITS_H 1
|
||||
|
@ -44,6 +46,7 @@
|
|||
#define NEWLINE @NEWLINE@
|
||||
#define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@
|
||||
#define LINK_SIZE @PCRE_LINK_SIZE@
|
||||
#define PARENS_NEST_LIMIT @PCRE_PARENS_NEST_LIMIT@
|
||||
#define MATCH_LIMIT @PCRE_MATCH_LIMIT@
|
||||
#define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@
|
||||
#define PCREGREP_BUFSIZE @PCREGREP_BUFSIZE@
|
||||
|
|
86
tools/pcre/config.guess
vendored
86
tools/pcre/config.guess
vendored
|
@ -1,14 +1,12 @@
|
|||
#! /bin/sh
|
||||
# Attempt to guess a canonical system name.
|
||||
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
|
||||
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
||||
# 2011, 2012 Free Software Foundation, Inc.
|
||||
# Copyright 1992-2013 Free Software Foundation, Inc.
|
||||
|
||||
timestamp='2012-08-14'
|
||||
timestamp='2013-11-29'
|
||||
|
||||
# This file is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
|
@ -22,19 +20,17 @@ timestamp='2012-08-14'
|
|||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
|
||||
# Originally written by Per Bothner. Please send patches (context
|
||||
# diff format) to <config-patches@gnu.org> and include a ChangeLog
|
||||
# entry.
|
||||
# the same distribution terms that you use for the rest of that
|
||||
# program. This Exception is an additional permission under section 7
|
||||
# of the GNU General Public License, version 3 ("GPLv3").
|
||||
#
|
||||
# This script attempts to guess a canonical system name similar to
|
||||
# config.sub. If it succeeds, it prints the system name on stdout, and
|
||||
# exits with 0. Otherwise, it exits with 1.
|
||||
# Originally written by Per Bothner.
|
||||
#
|
||||
# You can get the latest version of this script from:
|
||||
# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
|
||||
#
|
||||
# Please send patches with a ChangeLog entry to config-patches@gnu.org.
|
||||
|
||||
|
||||
me=`echo "$0" | sed -e 's,.*/,,'`
|
||||
|
||||
|
@ -54,9 +50,7 @@ version="\
|
|||
GNU config.guess ($timestamp)
|
||||
|
||||
Originally written by Per Bothner.
|
||||
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
|
||||
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
|
||||
Free Software Foundation, Inc.
|
||||
Copyright 1992-2013 Free Software Foundation, Inc.
|
||||
|
||||
This is free software; see the source for copying conditions. There is NO
|
||||
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
|
||||
|
@ -139,22 +133,20 @@ UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
|
|||
UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
|
||||
|
||||
case "${UNAME_SYSTEM}" in
|
||||
Linux|GNU/*)
|
||||
Linux|GNU|GNU/*)
|
||||
# If the system lacks a compiler, then just pick glibc.
|
||||
# We could probably try harder.
|
||||
LIBC=gnu
|
||||
|
||||
eval $set_cc_for_build
|
||||
cat <<-EOF > $dummy.c
|
||||
#include <features.h>
|
||||
#ifdef __UCLIBC__
|
||||
# ifdef __UCLIBC_CONFIG_VERSION__
|
||||
LIBC=uclibc __UCLIBC_CONFIG_VERSION__
|
||||
# else
|
||||
#if defined(__UCLIBC__)
|
||||
LIBC=uclibc
|
||||
# endif
|
||||
#else
|
||||
# ifdef __dietlibc__
|
||||
#elif defined(__dietlibc__)
|
||||
LIBC=dietlibc
|
||||
# else
|
||||
#else
|
||||
LIBC=gnu
|
||||
# endif
|
||||
#endif
|
||||
EOF
|
||||
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
|
||||
|
@ -329,7 +321,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
|
|||
arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
|
||||
echo arm-acorn-riscix${UNAME_RELEASE}
|
||||
exit ;;
|
||||
arm:riscos:*:*|arm:RISCOS:*:*)
|
||||
arm*:riscos:*:*|arm*:RISCOS:*:*)
|
||||
echo arm-unknown-riscos
|
||||
exit ;;
|
||||
SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
|
||||
|
@ -912,6 +904,9 @@ EOF
|
|||
if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
|
||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||
exit ;;
|
||||
arc:Linux:*:* | arceb:Linux:*:*)
|
||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||
exit ;;
|
||||
arm*:Linux:*:*)
|
||||
eval $set_cc_for_build
|
||||
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
|
||||
|
@ -974,6 +969,9 @@ EOF
|
|||
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
|
||||
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
|
||||
;;
|
||||
or1k:Linux:*:*)
|
||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||
exit ;;
|
||||
or32:Linux:*:*)
|
||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||
exit ;;
|
||||
|
@ -997,8 +995,14 @@ EOF
|
|||
ppc:Linux:*:*)
|
||||
echo powerpc-unknown-linux-${LIBC}
|
||||
exit ;;
|
||||
ppc64le:Linux:*:*)
|
||||
echo powerpc64le-unknown-linux-${LIBC}
|
||||
exit ;;
|
||||
ppcle:Linux:*:*)
|
||||
echo powerpcle-unknown-linux-${LIBC}
|
||||
exit ;;
|
||||
s390:Linux:*:* | s390x:Linux:*:*)
|
||||
echo ${UNAME_MACHINE}-ibm-linux
|
||||
echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
|
||||
exit ;;
|
||||
sh64*:Linux:*:*)
|
||||
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
|
||||
|
@ -1252,19 +1256,31 @@ EOF
|
|||
exit ;;
|
||||
*:Darwin:*:*)
|
||||
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
|
||||
case $UNAME_PROCESSOR in
|
||||
i386)
|
||||
eval $set_cc_for_build
|
||||
if test "$UNAME_PROCESSOR" = unknown ; then
|
||||
UNAME_PROCESSOR=powerpc
|
||||
fi
|
||||
if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
|
||||
if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
|
||||
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
|
||||
(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
|
||||
grep IS_64BIT_ARCH >/dev/null
|
||||
then
|
||||
UNAME_PROCESSOR="x86_64"
|
||||
fi
|
||||
fi ;;
|
||||
unknown) UNAME_PROCESSOR=powerpc ;;
|
||||
case $UNAME_PROCESSOR in
|
||||
i386) UNAME_PROCESSOR=x86_64 ;;
|
||||
powerpc) UNAME_PROCESSOR=powerpc64 ;;
|
||||
esac
|
||||
fi
|
||||
fi
|
||||
elif test "$UNAME_PROCESSOR" = i386 ; then
|
||||
# Avoid executing cc on OS X 10.9, as it ships with a stub
|
||||
# that puts up a graphical alert prompting to install
|
||||
# developer tools. Any system running Mac OS X 10.7 or
|
||||
# later (Darwin 11 and later) is required to have a 64-bit
|
||||
# processor. This is not true of the ARM version of Darwin
|
||||
# that Apple uses in portable devices.
|
||||
UNAME_PROCESSOR=x86_64
|
||||
fi
|
||||
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
|
||||
exit ;;
|
||||
*:procnto*:*:* | *:QNX:[0123456789]*:*)
|
||||
|
|
|
@ -5,20 +5,28 @@
|
|||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the facilities, config.h.in is converted by
|
||||
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
|
||||
are going to build PCRE "by hand" without using "configure" or CMake, you
|
||||
should copy the distributed config.h.generic to config.h, and then edit the
|
||||
macro definitions to be the way you need them. You must then add
|
||||
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
|
||||
at the start of every source.
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
||||
them both to 0; an emulation function will be used. */
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined
|
||||
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
|
||||
macros are listed as a commented #undef in config.h.generic. Macros such as
|
||||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
|
@ -44,27 +52,19 @@ them both to 0; an emulation function will be used. */
|
|||
/* #undef EBCDIC_NL25 */
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
#ifndef HAVE_BCOPY
|
||||
#define HAVE_BCOPY 1
|
||||
#endif
|
||||
/* #undef HAVE_BCOPY */
|
||||
|
||||
/* Define to 1 if you have the <bits/type_traits.h> header file. */
|
||||
/* #undef HAVE_BITS_TYPE_TRAITS_H */
|
||||
|
||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||
#ifndef HAVE_BZLIB_H
|
||||
#define HAVE_BZLIB_H 1
|
||||
#endif
|
||||
/* #undef HAVE_BZLIB_H */
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
#ifndef HAVE_DIRENT_H
|
||||
#define HAVE_DIRENT_H 1
|
||||
#endif
|
||||
/* #undef HAVE_DIRENT_H */
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#ifndef HAVE_DLFCN_H
|
||||
#define HAVE_DLFCN_H 1
|
||||
#endif
|
||||
/* #undef HAVE_DLFCN_H */
|
||||
|
||||
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||
/* #undef HAVE_EDITLINE_READLINE_H */
|
||||
|
@ -73,29 +73,19 @@ them both to 0; an emulation function will be used. */
|
|||
/* #undef HAVE_EDIT_READLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#ifndef HAVE_INTTYPES_H
|
||||
#define HAVE_INTTYPES_H 1
|
||||
#endif
|
||||
/* #undef HAVE_INTTYPES_H */
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#ifndef HAVE_LIMITS_H
|
||||
#define HAVE_LIMITS_H 1
|
||||
#endif
|
||||
/* #undef HAVE_LIMITS_H */
|
||||
|
||||
/* Define to 1 if the system has the type `long long'. */
|
||||
#ifndef HAVE_LONG_LONG
|
||||
#define HAVE_LONG_LONG 1
|
||||
#endif
|
||||
/* #undef HAVE_LONG_LONG */
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
#ifndef HAVE_MEMMOVE
|
||||
#define HAVE_MEMMOVE 1
|
||||
#endif
|
||||
/* #undef HAVE_MEMMOVE */
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#ifndef HAVE_MEMORY_H
|
||||
#define HAVE_MEMORY_H 1
|
||||
#endif
|
||||
/* #undef HAVE_MEMORY_H */
|
||||
|
||||
/* Define if you have POSIX threads libraries and header files. */
|
||||
/* #undef HAVE_PTHREAD */
|
||||
|
@ -110,34 +100,22 @@ them both to 0; an emulation function will be used. */
|
|||
/* #undef HAVE_READLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#ifndef HAVE_STDINT_H
|
||||
#define HAVE_STDINT_H 1
|
||||
#endif
|
||||
/* #undef HAVE_STDINT_H */
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#ifndef HAVE_STDLIB_H
|
||||
#define HAVE_STDLIB_H 1
|
||||
#endif
|
||||
/* #undef HAVE_STDLIB_H */
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
#ifndef HAVE_STRERROR
|
||||
#define HAVE_STRERROR 1
|
||||
#endif
|
||||
/* #undef HAVE_STRERROR */
|
||||
|
||||
/* Define to 1 if you have the <string> header file. */
|
||||
#ifndef HAVE_STRING
|
||||
#define HAVE_STRING 1
|
||||
#endif
|
||||
/* #undef HAVE_STRING */
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#ifndef HAVE_STRINGS_H
|
||||
#define HAVE_STRINGS_H 1
|
||||
#endif
|
||||
/* #undef HAVE_STRINGS_H */
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#ifndef HAVE_STRING_H
|
||||
#define HAVE_STRING_H 1
|
||||
#endif
|
||||
/* #undef HAVE_STRING_H */
|
||||
|
||||
/* Define to 1 if you have `strtoimax'. */
|
||||
/* #undef HAVE_STRTOIMAX */
|
||||
|
@ -146,46 +124,31 @@ them both to 0; an emulation function will be used. */
|
|||
/* #undef HAVE_STRTOLL */
|
||||
|
||||
/* Define to 1 if you have `strtoq'. */
|
||||
#ifndef HAVE_STRTOQ
|
||||
#define HAVE_STRTOQ 1
|
||||
#endif
|
||||
/* #undef HAVE_STRTOQ */
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#ifndef HAVE_SYS_STAT_H
|
||||
#define HAVE_SYS_STAT_H 1
|
||||
#endif
|
||||
/* #undef HAVE_SYS_STAT_H */
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#ifndef HAVE_SYS_TYPES_H
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
#endif
|
||||
/* #undef HAVE_SYS_TYPES_H */
|
||||
|
||||
/* Define to 1 if you have the <type_traits.h> header file. */
|
||||
/* #undef HAVE_TYPE_TRAITS_H */
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#ifndef HAVE_UNISTD_H
|
||||
#define HAVE_UNISTD_H 1
|
||||
#endif
|
||||
/* #undef HAVE_UNISTD_H */
|
||||
|
||||
/* Define to 1 if the system has the type `unsigned long long'. */
|
||||
#ifndef HAVE_UNSIGNED_LONG_LONG
|
||||
#define HAVE_UNSIGNED_LONG_LONG 1
|
||||
#endif
|
||||
/* #undef HAVE_UNSIGNED_LONG_LONG */
|
||||
|
||||
/* Define to 1 or 0, depending whether the compiler supports simple visibility
|
||||
declarations. */
|
||||
#ifndef HAVE_VISIBILITY
|
||||
#define HAVE_VISIBILITY 1
|
||||
#endif
|
||||
/* Define to 1 if the compiler supports simple visibility declarations. */
|
||||
/* #undef HAVE_VISIBILITY */
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
/* #undef HAVE_WINDOWS_H */
|
||||
|
||||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
#ifndef HAVE_ZLIB_H
|
||||
#define HAVE_ZLIB_H 1
|
||||
#endif
|
||||
/* #undef HAVE_ZLIB_H */
|
||||
|
||||
/* Define to 1 if you have `_strtoi64'. */
|
||||
/* #undef HAVE__STRTOI64 */
|
||||
|
@ -201,6 +164,7 @@ them both to 0; an emulation function will be used. */
|
|||
|
||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||
*/
|
||||
/* This is ignored unless you are using libtool. */
|
||||
#ifndef LT_OBJDIR
|
||||
#define LT_OBJDIR ".libs/"
|
||||
#endif
|
||||
|
@ -253,9 +217,6 @@ them both to 0; an emulation function will be used. */
|
|||
#define NEWLINE 10
|
||||
#endif
|
||||
|
||||
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
|
||||
/* #undef NO_MINUS_C_MINUS_O */
|
||||
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited
|
||||
size. Define NO_RECURSE to any value to get a version that doesn't use
|
||||
|
@ -275,7 +236,7 @@ them both to 0; an emulation function will be used. */
|
|||
#define PACKAGE_NAME "PCRE"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE 8.32"
|
||||
#define PACKAGE_STRING "PCRE 8.35"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre"
|
||||
|
@ -284,7 +245,14 @@ them both to 0; an emulation function will be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "8.32"
|
||||
#define PACKAGE_VERSION "8.35"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern. */
|
||||
#ifndef PARENS_NEST_LIMIT
|
||||
#define PARENS_NEST_LIMIT 250
|
||||
#endif
|
||||
|
||||
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
|
||||
pcregrep to hold parts of the file it is searching. This is also the
|
||||
|
@ -325,13 +293,7 @@ them both to 0; an emulation function will be used. */
|
|||
/* #undef PTHREAD_CREATE_JOINABLE */
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#ifndef STDC_HEADERS
|
||||
#define STDC_HEADERS 1
|
||||
#endif
|
||||
|
||||
/* Define to allow pcretest and pcregrep to be linked with gcov, so that they
|
||||
are able to generate code coverage reports. */
|
||||
/* #undef SUPPORT_GCOV */
|
||||
/* #undef STDC_HEADERS */
|
||||
|
||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
/* #undef SUPPORT_JIT */
|
||||
|
@ -357,9 +319,7 @@ them both to 0; an emulation function will be used. */
|
|||
/* #undef SUPPORT_PCRE32 */
|
||||
|
||||
/* Define to any value to enable the 8 bit PCRE library. */
|
||||
#ifndef SUPPORT_PCRE8
|
||||
#define SUPPORT_PCRE8 /**/
|
||||
#endif
|
||||
/* #undef SUPPORT_PCRE8 */
|
||||
|
||||
/* Define to any value to enable JIT support in pcregrep. */
|
||||
/* #undef SUPPORT_PCREGREP_JIT */
|
||||
|
@ -373,13 +333,11 @@ them both to 0; an emulation function will be used. */
|
|||
ASCII/UTF-8/16/32, but not both at once. */
|
||||
/* #undef SUPPORT_UTF */
|
||||
|
||||
/* Valgrind support to find invalid memory reads. */
|
||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||
/* #undef SUPPORT_VALGRIND */
|
||||
|
||||
/* Version number of package */
|
||||
#ifndef VERSION
|
||||
#define VERSION "8.32"
|
||||
#endif
|
||||
#define VERSION "8.35"
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
|
|
@ -5,20 +5,28 @@
|
|||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the facilities, config.h.in is converted by
|
||||
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
|
||||
are going to build PCRE "by hand" without using "configure" or CMake, you
|
||||
should copy the distributed config.h.generic to config.h, and then edit the
|
||||
macro definitions to be the way you need them. You must then add
|
||||
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
|
||||
at the start of every source.
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
||||
them both to 0; an emulation function will be used. */
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined
|
||||
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
|
||||
macros are listed as a commented #undef in config.h.generic. Macros such as
|
||||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
|
@ -133,8 +141,7 @@ them both to 0; an emulation function will be used. */
|
|||
/* Define to 1 if the system has the type `unsigned long long'. */
|
||||
#undef HAVE_UNSIGNED_LONG_LONG
|
||||
|
||||
/* Define to 1 or 0, depending whether the compiler supports simple visibility
|
||||
declarations. */
|
||||
/* Define to 1 if the compiler supports simple visibility declarations. */
|
||||
#undef HAVE_VISIBILITY
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
|
@ -195,9 +202,6 @@ them both to 0; an emulation function will be used. */
|
|||
or -2 (ANYCRLF). */
|
||||
#undef NEWLINE
|
||||
|
||||
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
|
||||
#undef NO_MINUS_C_MINUS_O
|
||||
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited
|
||||
size. Define NO_RECURSE to any value to get a version that doesn't use
|
||||
|
@ -228,6 +232,11 @@ them both to 0; an emulation function will be used. */
|
|||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern. */
|
||||
#undef PARENS_NEST_LIMIT
|
||||
|
||||
/* to make a symbol visible */
|
||||
#undef PCRECPP_EXP_DECL
|
||||
|
||||
|
@ -284,10 +293,6 @@ them both to 0; an emulation function will be used. */
|
|||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Define to allow pcretest and pcregrep to be linked with gcov, so that they
|
||||
are able to generate code coverage reports. */
|
||||
#undef SUPPORT_GCOV
|
||||
|
||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
#undef SUPPORT_JIT
|
||||
|
||||
|
@ -326,7 +331,7 @@ them both to 0; an emulation function will be used. */
|
|||
ASCII/UTF-8/16/32, but not both at once. */
|
||||
#undef SUPPORT_UTF
|
||||
|
||||
/* Valgrind support to find invalid memory reads. */
|
||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||
#undef SUPPORT_VALGRIND
|
||||
|
||||
/* Version number of package */
|
||||
|
|
109
tools/pcre/config.sub
vendored
109
tools/pcre/config.sub
vendored
|
@ -1,24 +1,18 @@
|
|||
#! /bin/sh
|
||||
# Configuration validation subroutine script.
|
||||
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
|
||||
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
||||
# 2011, 2012 Free Software Foundation, Inc.
|
||||
# Copyright 1992-2013 Free Software Foundation, Inc.
|
||||
|
||||
timestamp='2012-08-18'
|
||||
timestamp='2013-10-01'
|
||||
|
||||
# This file is (in principle) common to ALL GNU software.
|
||||
# The presence of a machine in this file suggests that SOME GNU software
|
||||
# can handle that machine. It does not imply ALL GNU software can.
|
||||
#
|
||||
# This file is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# This file is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
|
@ -26,11 +20,12 @@ timestamp='2012-08-18'
|
|||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
# the same distribution terms that you use for the rest of that
|
||||
# program. This Exception is an additional permission under section 7
|
||||
# of the GNU General Public License, version 3 ("GPLv3").
|
||||
|
||||
|
||||
# Please send patches to <config-patches@gnu.org>. Submit a context
|
||||
# diff and a properly formatted GNU ChangeLog entry.
|
||||
# Please send patches with a ChangeLog entry to config-patches@gnu.org.
|
||||
#
|
||||
# Configuration subroutine to validate and canonicalize a configuration type.
|
||||
# Supply the specified configuration type as an argument.
|
||||
|
@ -73,9 +68,7 @@ Report bugs and patches to <config-patches@gnu.org>."
|
|||
version="\
|
||||
GNU config.sub ($timestamp)
|
||||
|
||||
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
|
||||
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
|
||||
Free Software Foundation, Inc.
|
||||
Copyright 1992-2013 Free Software Foundation, Inc.
|
||||
|
||||
This is free software; see the source for copying conditions. There is NO
|
||||
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
|
||||
|
@ -156,7 +149,7 @@ case $os in
|
|||
-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
|
||||
-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
|
||||
-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
|
||||
-apple | -axis | -knuth | -cray | -microblaze)
|
||||
-apple | -axis | -knuth | -cray | -microblaze*)
|
||||
os=
|
||||
basic_machine=$1
|
||||
;;
|
||||
|
@ -259,21 +252,24 @@ case $basic_machine in
|
|||
| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
|
||||
| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
|
||||
| am33_2.0 \
|
||||
| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
|
||||
| arc | arceb \
|
||||
| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
|
||||
| avr | avr32 \
|
||||
| be32 | be64 \
|
||||
| bfin \
|
||||
| c4x | clipper \
|
||||
| d10v | d30v | dlx | dsp16xx | dvp \
|
||||
| c4x | c8051 | clipper \
|
||||
| d10v | d30v | dlx | dsp16xx \
|
||||
| epiphany \
|
||||
| fido | fr30 | frv \
|
||||
| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
|
||||
| hexagon \
|
||||
| i370 | i860 | i960 | ia64 \
|
||||
| ip2k | iq2000 \
|
||||
| k1om \
|
||||
| le32 | le64 \
|
||||
| lm32 \
|
||||
| m32c | m32r | m32rle | m68000 | m68k | m88k \
|
||||
| maxq | mb | microblaze | mcore | mep | metag \
|
||||
| maxq | mb | microblaze | microblazeel | mcore | mep | metag \
|
||||
| mips | mipsbe | mipseb | mipsel | mipsle \
|
||||
| mips16 \
|
||||
| mips64 | mips64el \
|
||||
|
@ -291,16 +287,17 @@ case $basic_machine in
|
|||
| mipsisa64r2 | mipsisa64r2el \
|
||||
| mipsisa64sb1 | mipsisa64sb1el \
|
||||
| mipsisa64sr71k | mipsisa64sr71kel \
|
||||
| mipsr5900 | mipsr5900el \
|
||||
| mipstx39 | mipstx39el \
|
||||
| mn10200 | mn10300 \
|
||||
| moxie \
|
||||
| mt \
|
||||
| msp430 \
|
||||
| nds32 | nds32le | nds32be \
|
||||
| nios | nios2 \
|
||||
| nios | nios2 | nios2eb | nios2el \
|
||||
| ns16k | ns32k \
|
||||
| open8 \
|
||||
| or32 \
|
||||
| or1k | or32 \
|
||||
| pdp10 | pdp11 | pj | pjl \
|
||||
| powerpc | powerpc64 | powerpc64le | powerpcle \
|
||||
| pyramid \
|
||||
|
@ -328,7 +325,7 @@ case $basic_machine in
|
|||
c6x)
|
||||
basic_machine=tic6x-unknown
|
||||
;;
|
||||
m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
|
||||
m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
|
||||
basic_machine=$basic_machine-unknown
|
||||
os=-none
|
||||
;;
|
||||
|
@ -370,13 +367,13 @@ case $basic_machine in
|
|||
| aarch64-* | aarch64_be-* \
|
||||
| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
|
||||
| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
|
||||
| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
|
||||
| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
|
||||
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \
|
||||
| avr-* | avr32-* \
|
||||
| be32-* | be64-* \
|
||||
| bfin-* | bs2000-* \
|
||||
| c[123]* | c30-* | [cjt]90-* | c4x-* \
|
||||
| clipper-* | craynv-* | cydra-* \
|
||||
| c8051-* | clipper-* | craynv-* | cydra-* \
|
||||
| d10v-* | d30v-* | dlx-* \
|
||||
| elxsi-* \
|
||||
| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
|
||||
|
@ -385,11 +382,13 @@ case $basic_machine in
|
|||
| hexagon-* \
|
||||
| i*86-* | i860-* | i960-* | ia64-* \
|
||||
| ip2k-* | iq2000-* \
|
||||
| k1om-* \
|
||||
| le32-* | le64-* \
|
||||
| lm32-* \
|
||||
| m32c-* | m32r-* | m32rle-* \
|
||||
| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
|
||||
| m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
|
||||
| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
|
||||
| microblaze-* | microblazeel-* \
|
||||
| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
|
||||
| mips16-* \
|
||||
| mips64-* | mips64el-* \
|
||||
|
@ -407,12 +406,13 @@ case $basic_machine in
|
|||
| mipsisa64r2-* | mipsisa64r2el-* \
|
||||
| mipsisa64sb1-* | mipsisa64sb1el-* \
|
||||
| mipsisa64sr71k-* | mipsisa64sr71kel-* \
|
||||
| mipsr5900-* | mipsr5900el-* \
|
||||
| mipstx39-* | mipstx39el-* \
|
||||
| mmix-* \
|
||||
| mt-* \
|
||||
| msp430-* \
|
||||
| nds32-* | nds32le-* | nds32be-* \
|
||||
| nios-* | nios2-* \
|
||||
| nios-* | nios2-* | nios2eb-* | nios2el-* \
|
||||
| none-* | np1-* | ns16k-* | ns32k-* \
|
||||
| open8-* \
|
||||
| orion-* \
|
||||
|
@ -788,7 +788,7 @@ case $basic_machine in
|
|||
basic_machine=ns32k-utek
|
||||
os=-sysv
|
||||
;;
|
||||
microblaze)
|
||||
microblaze*)
|
||||
basic_machine=microblaze-xilinx
|
||||
;;
|
||||
mingw64)
|
||||
|
@ -796,7 +796,7 @@ case $basic_machine in
|
|||
os=-mingw64
|
||||
;;
|
||||
mingw32)
|
||||
basic_machine=i386-pc
|
||||
basic_machine=i686-pc
|
||||
os=-mingw32
|
||||
;;
|
||||
mingw32ce)
|
||||
|
@ -810,24 +810,6 @@ case $basic_machine in
|
|||
basic_machine=m68k-atari
|
||||
os=-mint
|
||||
;;
|
||||
mipsEE* | ee | ps2)
|
||||
basic_machine=mips64r5900el-scei
|
||||
case $os in
|
||||
-linux*)
|
||||
;;
|
||||
*)
|
||||
os=-elf
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
iop)
|
||||
basic_machine=mipsel-scei
|
||||
os=-irx
|
||||
;;
|
||||
dvp)
|
||||
basic_machine=dvp-scei
|
||||
os=-elf
|
||||
;;
|
||||
mips3*-*)
|
||||
basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
|
||||
;;
|
||||
|
@ -850,7 +832,7 @@ case $basic_machine in
|
|||
basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
|
||||
;;
|
||||
msys)
|
||||
basic_machine=i386-pc
|
||||
basic_machine=i686-pc
|
||||
os=-msys
|
||||
;;
|
||||
mvs)
|
||||
|
@ -1041,7 +1023,11 @@ case $basic_machine in
|
|||
basic_machine=i586-unknown
|
||||
os=-pw32
|
||||
;;
|
||||
rdos)
|
||||
rdos | rdos64)
|
||||
basic_machine=x86_64-pc
|
||||
os=-rdos
|
||||
;;
|
||||
rdos32)
|
||||
basic_machine=i386-pc
|
||||
os=-rdos
|
||||
;;
|
||||
|
@ -1368,7 +1354,7 @@ case $os in
|
|||
-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
|
||||
| -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
|
||||
| -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
|
||||
| -sym* | -kopensolaris* \
|
||||
| -sym* | -kopensolaris* | -plan9* \
|
||||
| -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
|
||||
| -aos* | -aros* \
|
||||
| -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
|
||||
|
@ -1386,7 +1372,7 @@ case $os in
|
|||
| -uxpv* | -beos* | -mpeix* | -udk* \
|
||||
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
|
||||
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
|
||||
| -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* | -irx* \
|
||||
| -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
|
||||
| -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
|
||||
| -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
|
||||
| -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
|
||||
|
@ -1514,9 +1500,6 @@ case $os in
|
|||
-aros*)
|
||||
os=-aros
|
||||
;;
|
||||
-kaos*)
|
||||
os=-kaos
|
||||
;;
|
||||
-zvmoe)
|
||||
os=-zvmoe
|
||||
;;
|
||||
|
@ -1565,6 +1548,9 @@ case $basic_machine in
|
|||
c4x-* | tic4x-*)
|
||||
os=-coff
|
||||
;;
|
||||
c8051-*)
|
||||
os=-elf
|
||||
;;
|
||||
hexagon-*)
|
||||
os=-elf
|
||||
;;
|
||||
|
@ -1608,6 +1594,9 @@ case $basic_machine in
|
|||
mips*-*)
|
||||
os=-elf
|
||||
;;
|
||||
or1k-*)
|
||||
os=-elf
|
||||
;;
|
||||
or32-*)
|
||||
os=-coff
|
||||
;;
|
||||
|
|
2102
tools/pcre/configure
vendored
2102
tools/pcre/configure
vendored
File diff suppressed because it is too large
Load Diff
|
@ -9,18 +9,18 @@ dnl The PCRE_PRERELEASE feature is for identifying release candidates. It might
|
|||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre_major, [8])
|
||||
m4_define(pcre_minor, [32])
|
||||
m4_define(pcre_minor, [35])
|
||||
m4_define(pcre_prerelease, [])
|
||||
m4_define(pcre_date, [2012-11-30])
|
||||
m4_define(pcre_date, [2014-04-04])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre_version, [3:0:2])
|
||||
m4_define(libpcre16_version, [2:0:2])
|
||||
m4_define(libpcre32_version, [0:0:0])
|
||||
m4_define(libpcreposix_version, [0:1:0])
|
||||
m4_define(libpcre_version, [3:3:2])
|
||||
m4_define(libpcre16_version, [2:3:2])
|
||||
m4_define(libpcre32_version, [0:3:0])
|
||||
m4_define(libpcreposix_version, [0:2:0])
|
||||
m4_define(libpcrecpp_version, [0:0:0])
|
||||
|
||||
AC_PREREQ(2.57)
|
||||
|
@ -30,6 +30,9 @@ AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
|
|||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||
AC_CONFIG_HEADERS(config.h)
|
||||
|
||||
# This is a new thing required to stop a warning from automake 1.12
|
||||
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
|
||||
|
||||
# This was added at the suggestion of libtoolize (03-Jan-10)
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
|
||||
|
@ -245,7 +248,7 @@ AC_ARG_ENABLE(pcregrep-libbz2,
|
|||
# Handle --with-pcregrep-bufsize=N
|
||||
AC_ARG_WITH(pcregrep-bufsize,
|
||||
AS_HELP_STRING([--with-pcregrep-bufsize=N],
|
||||
[pcregrep buffer size (default=20480)]),
|
||||
[pcregrep buffer size (default=20480, minimum=8192)]),
|
||||
, with_pcregrep_bufsize=20480)
|
||||
|
||||
# Handle --enable-pcretest-libedit
|
||||
|
@ -272,6 +275,12 @@ AC_ARG_WITH(link-size,
|
|||
[internal link size (2, 3, or 4 allowed; default=2)]),
|
||||
, with_link_size=2)
|
||||
|
||||
# Handle --with-parens-nest-limit=N
|
||||
AC_ARG_WITH(parens-nest-limit,
|
||||
AS_HELP_STRING([--with-parens-nest-limit=N],
|
||||
[nested parentheses limit (default=250)]),
|
||||
, with_parens_nest_limit=250)
|
||||
|
||||
# Handle --with-match-limit=N
|
||||
AC_ARG_WITH(match-limit,
|
||||
AS_HELP_STRING([--with-match-limit=N],
|
||||
|
@ -427,24 +436,33 @@ AH_TOP([
|
|||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the facilities, config.h.in is converted by
|
||||
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
|
||||
are going to build PCRE "by hand" without using "configure" or CMake, you
|
||||
should copy the distributed config.h.generic to config.h, and then edit the
|
||||
macro definitions to be the way you need them. You must then add
|
||||
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
|
||||
at the start of every source.
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
so that config.h is included at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
||||
them both to 0; an emulation function will be used. */])
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined
|
||||
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
|
||||
macros are listed as a commented #undef in config.h.generic. Macros such as
|
||||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */])
|
||||
|
||||
# Checks for header files.
|
||||
AC_HEADER_STDC
|
||||
AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h windows.h)
|
||||
AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h)
|
||||
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
|
||||
|
||||
# The files below are C++ header files.
|
||||
pcre_have_type_traits="0"
|
||||
|
@ -669,11 +687,15 @@ if test "$enable_pcre32" = "yes"; then
|
|||
Define to any value to enable the 32 bit PCRE library.])
|
||||
fi
|
||||
|
||||
# Unless running under Windows, JIT support requires pthreads.
|
||||
|
||||
if test "$enable_jit" = "yes"; then
|
||||
if test "$HAVE_WINDOWS_H" != "1"; then
|
||||
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
|
||||
CC="$PTHREAD_CC"
|
||||
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
|
||||
LIBS="$PTHREAD_LIBS $LIBS"
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_JIT], [], [
|
||||
Define to any value to enable support for Just-In-Time compiling.])
|
||||
else
|
||||
|
@ -722,7 +744,12 @@ if test "$enable_pcregrep_libbz2" = "yes"; then
|
|||
fi
|
||||
|
||||
if test $with_pcregrep_bufsize -lt 8192 ; then
|
||||
AC_MSG_WARN([$with_pcregrep_bufsize is too small for --with-pcregrep-bufsize; using 8192])
|
||||
with_pcregrep_bufsize="8192"
|
||||
else
|
||||
if test $? -gt 1 ; then
|
||||
AC_MSG_ERROR([Bad value for --with-pcregrep-bufsize])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([PCREGREP_BUFSIZE], [$with_pcregrep_bufsize], [
|
||||
|
@ -773,6 +800,11 @@ AC_DEFINE_UNQUOTED([POSIX_MALLOC_THRESHOLD], [$with_posix_malloc_threshold], [
|
|||
faster than using malloc() for each call. The threshold above which
|
||||
the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
||||
The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||
The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
|
@ -838,7 +870,7 @@ fi
|
|||
|
||||
if test "$enable_valgrind" = "yes"; then
|
||||
AC_DEFINE_UNQUOTED([SUPPORT_VALGRIND], [], [
|
||||
Valgrind support to find invalid memory reads.])
|
||||
Define to any value for valgrind support to find invalid memory reads.])
|
||||
fi
|
||||
|
||||
# Platform specific issues
|
||||
|
@ -946,7 +978,7 @@ if test "$enable_pcretest_libreadline" = "yes"; then
|
|||
fi
|
||||
fi
|
||||
|
||||
# Check for valgrind
|
||||
# Handle valgrind support
|
||||
|
||||
if test "$enable_valgrind" = "yes"; then
|
||||
m4_ifdef([PKG_CHECK_MODULES],
|
||||
|
@ -954,7 +986,7 @@ if test "$enable_valgrind" = "yes"; then
|
|||
[AC_MSG_ERROR([pkg-config not supported])])
|
||||
fi
|
||||
|
||||
# test code coverage reporting
|
||||
# Handle code coverage reporting support
|
||||
if test "$enable_coverage" = "yes"; then
|
||||
if test "x$GCC" != "xyes"; then
|
||||
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
|
||||
|
@ -985,11 +1017,7 @@ if test "$enable_coverage" = "yes"; then
|
|||
AC_MSG_ERROR([genhtml not found])
|
||||
fi
|
||||
|
||||
AC_DEFINE([SUPPORT_GCOV],[1], [
|
||||
Define to allow pcretest and pcregrep to be linked with gcov, so that they
|
||||
are able to generate code coverage reports.])
|
||||
|
||||
# And add flags needed for gcov
|
||||
# Set flags needed for gcov
|
||||
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||
GCOV_LIBS="-lgcov"
|
||||
|
@ -1064,6 +1092,7 @@ $PACKAGE-$VERSION configuration summary:
|
|||
Use stack recursion ............. : ${enable_stack_for_recursion}
|
||||
POSIX mem threshold ............. : ${with_posix_malloc_threshold}
|
||||
Internal link size .............. : ${with_link_size}
|
||||
Nested parentheses limit ........ : ${with_parens_nest_limit}
|
||||
Match limit ..................... : ${with_match_limit}
|
||||
Match limit recursion ........... : ${with_match_limit_recursion}
|
||||
Build shared libs ............... : ${enable_shared}
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
#! /bin/sh
|
||||
# depcomp - compile a program generating dependencies as side-effects
|
||||
|
||||
scriptversion=2012-03-27.16; # UTC
|
||||
scriptversion=2013-05-30.07; # UTC
|
||||
|
||||
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009, 2010,
|
||||
# 2011, 2012 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
@ -57,11 +56,65 @@ EOF
|
|||
;;
|
||||
esac
|
||||
|
||||
# Get the directory component of the given path, and save it in the
|
||||
# global variables '$dir'. Note that this directory component will
|
||||
# be either empty or ending with a '/' character. This is deliberate.
|
||||
set_dir_from ()
|
||||
{
|
||||
case $1 in
|
||||
*/*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
|
||||
*) dir=;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Get the suffix-stripped basename of the given path, and save it the
|
||||
# global variable '$base'.
|
||||
set_base_from ()
|
||||
{
|
||||
base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
|
||||
}
|
||||
|
||||
# If no dependency file was actually created by the compiler invocation,
|
||||
# we still have to create a dummy depfile, to avoid errors with the
|
||||
# Makefile "include basename.Plo" scheme.
|
||||
make_dummy_depfile ()
|
||||
{
|
||||
echo "#dummy" > "$depfile"
|
||||
}
|
||||
|
||||
# Factor out some common post-processing of the generated depfile.
|
||||
# Requires the auxiliary global variable '$tmpdepfile' to be set.
|
||||
aix_post_process_depfile ()
|
||||
{
|
||||
# If the compiler actually managed to produce a dependency file,
|
||||
# post-process it.
|
||||
if test -f "$tmpdepfile"; then
|
||||
# Each line is of the form 'foo.o: dependency.h'.
|
||||
# Do two passes, one to just change these to
|
||||
# $object: dependency.h
|
||||
# and one to simply output
|
||||
# dependency.h:
|
||||
# which is needed to avoid the deleted-header problem.
|
||||
{ sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
|
||||
sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
|
||||
} > "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
else
|
||||
make_dummy_depfile
|
||||
fi
|
||||
}
|
||||
|
||||
# A tabulation character.
|
||||
tab=' '
|
||||
# A newline character.
|
||||
nl='
|
||||
'
|
||||
# Character ranges might be problematic outside the C locale.
|
||||
# These definitions help.
|
||||
upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
lower=abcdefghijklmnopqrstuvwxyz
|
||||
digits=0123456789
|
||||
alpha=${upper}${lower}
|
||||
|
||||
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
|
||||
echo "depcomp: Variables source, object and depmode must be set" 1>&2
|
||||
|
@ -75,6 +128,9 @@ tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
|
|||
|
||||
rm -f "$tmpdepfile"
|
||||
|
||||
# Avoid interferences from the environment.
|
||||
gccflag= dashmflag=
|
||||
|
||||
# Some modes work just like other modes, but use different flags. We
|
||||
# parameterize here, but still list the modes in the big case below,
|
||||
# to make depend.m4 easier to write. Note that we *cannot* use a case
|
||||
|
@ -109,7 +165,7 @@ if test "$depmode" = msvc7msys; then
|
|||
fi
|
||||
|
||||
if test "$depmode" = xlc; then
|
||||
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency informations.
|
||||
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
|
||||
gccflag=-qmakedep=gcc,-MF
|
||||
depmode=gcc
|
||||
fi
|
||||
|
@ -134,8 +190,7 @@ gcc3)
|
|||
done
|
||||
"$@"
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
|
@ -143,13 +198,17 @@ gcc3)
|
|||
;;
|
||||
|
||||
gcc)
|
||||
## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
|
||||
## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
|
||||
## (see the conditional assignment to $gccflag above).
|
||||
## There are various ways to get dependency output from gcc. Here's
|
||||
## why we pick this rather obscure method:
|
||||
## - Don't want to use -MD because we'd like the dependencies to end
|
||||
## up in a subdir. Having to rename by hand is ugly.
|
||||
## (We might end up doing this anyway to support other compilers.)
|
||||
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
|
||||
## -MM, not -M (despite what the docs say).
|
||||
## -MM, not -M (despite what the docs say). Also, it might not be
|
||||
## supported by the other compilers which use the 'gcc' depmode.
|
||||
## - Using -M directly means running the compiler twice (even worse
|
||||
## than renaming).
|
||||
if test -z "$gccflag"; then
|
||||
|
@ -157,15 +216,14 @@ gcc)
|
|||
fi
|
||||
"$@" -Wp,"$gccflag$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
|
||||
## The second -e expression handles DOS-style file names with drive letters.
|
||||
# The second -e expression handles DOS-style file names with drive
|
||||
# letters.
|
||||
sed -e 's/^[^:]*: / /' \
|
||||
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
|
||||
## This next piece of magic avoids the "deleted header file" problem.
|
||||
|
@ -174,14 +232,14 @@ gcc)
|
|||
## typically no way to rebuild the header). We avoid this by adding
|
||||
## dummy dependencies for each header file. Too bad gcc doesn't do
|
||||
## this for us directly.
|
||||
tr ' ' "$nl" < "$tmpdepfile" |
|
||||
## Some versions of gcc put a space before the ':'. On the theory
|
||||
## that the space means something, we add a space to the output as
|
||||
## well. hp depmode also adds that space, but also prefixes the VPATH
|
||||
## to the object. Take care to not repeat it in the output.
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
@ -200,8 +258,7 @@ sgi)
|
|||
"$@" -MDupdate "$tmpdepfile"
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
|
@ -209,7 +266,6 @@ sgi)
|
|||
|
||||
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
|
||||
echo "$object : \\" > "$depfile"
|
||||
|
||||
# Clip off the initial element (the dependent). Don't try to be
|
||||
# clever and replace this with sed code, as IRIX sed won't handle
|
||||
# lines with more than a fixed number of characters (4096 in
|
||||
|
@ -217,19 +273,15 @@ sgi)
|
|||
# the IRIX cc adds comments like '#:fec' to the end of the
|
||||
# dependency line.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
|
||||
tr "$nl" ' ' >> "$depfile"
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
|
||||
| tr "$nl" ' ' >> "$depfile"
|
||||
echo >> "$depfile"
|
||||
|
||||
# The second pass generates a dummy entry for each header file.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
|
||||
>> "$depfile"
|
||||
else
|
||||
# The sourcefile does not contain any dependencies, so just
|
||||
# store a dummy comment line, to avoid errors with the Makefile
|
||||
# "include basename.Plo" scheme.
|
||||
echo "#dummy" > "$depfile"
|
||||
make_dummy_depfile
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
@ -247,9 +299,8 @@ aix)
|
|||
# current directory. Also, the AIX compiler puts '$object:' at the
|
||||
# start of each line; $object doesn't have directory information.
|
||||
# Version 6 uses the directory in both cases.
|
||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||
test "x$dir" = "x$object" && dir=
|
||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$base.u
|
||||
|
@ -262,9 +313,7 @@ aix)
|
|||
"$@" -M
|
||||
fi
|
||||
stat=$?
|
||||
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
exit $stat
|
||||
fi
|
||||
|
@ -273,65 +322,113 @@ aix)
|
|||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
# Each line is of the form 'foo.o: dependent.h'.
|
||||
# Do two passes, one to just change these to
|
||||
# '$object: dependent.h' and one to simply 'dependent.h:'.
|
||||
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
||||
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
# The sourcefile does not contain any dependencies, so just
|
||||
# store a dummy comment line, to avoid errors with the Makefile
|
||||
# "include basename.Plo" scheme.
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
aix_post_process_depfile
|
||||
;;
|
||||
|
||||
icc)
|
||||
# Intel's C compiler anf tcc (Tiny C Compiler) understand '-MD -MF file'.
|
||||
# However on
|
||||
# $CC -MD -MF foo.d -c -o sub/foo.o sub/foo.c
|
||||
# ICC 7.0 will fill foo.d with something like
|
||||
# foo.o: sub/foo.c
|
||||
# foo.o: sub/foo.h
|
||||
# which is wrong. We want
|
||||
# sub/foo.o: sub/foo.c
|
||||
# sub/foo.o: sub/foo.h
|
||||
# sub/foo.c:
|
||||
# sub/foo.h:
|
||||
# ICC 7.1 will output
|
||||
# foo.o: sub/foo.c sub/foo.h
|
||||
# and will wrap long lines using '\':
|
||||
# foo.o: sub/foo.c ... \
|
||||
# sub/foo.h ... \
|
||||
# ...
|
||||
# tcc 0.9.26 (FIXME still under development at the moment of writing)
|
||||
# will emit a similar output, but also prepend the continuation lines
|
||||
# with horizontal tabulation characters.
|
||||
tcc)
|
||||
# tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
|
||||
# FIXME: That version still under development at the moment of writing.
|
||||
# Make that this statement remains true also for stable, released
|
||||
# versions.
|
||||
# It will wrap lines (doesn't matter whether long or short) with a
|
||||
# trailing '\', as in:
|
||||
#
|
||||
# foo.o : \
|
||||
# foo.c \
|
||||
# foo.h \
|
||||
#
|
||||
# It will put a trailing '\' even on the last line, and will use leading
|
||||
# spaces rather than leading tabs (at least since its commit 0394caf7
|
||||
# "Emit spaces for -MD").
|
||||
"$@" -MD -MF "$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
# Each line is of the form 'foo.o: dependent.h',
|
||||
# or 'foo.o: dep1.h dep2.h \', or ' dep3.h dep4.h \'.
|
||||
# Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
|
||||
# We have to change lines of the first kind to '$object: \'.
|
||||
sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
|
||||
# And for each line of the second kind, we have to emit a 'dep.h:'
|
||||
# dummy dependency, to avoid the deleted-header problem.
|
||||
sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
## The order of this option in the case statement is important, since the
|
||||
## shell code in configure will try each of these formats in the order
|
||||
## listed in this file. A plain '-MD' option would be understood by many
|
||||
## compilers, so we must ensure this comes after the gcc and icc options.
|
||||
pgcc)
|
||||
# Portland's C compiler understands '-MD'.
|
||||
# Will always output deps to 'file.d' where file is the root name of the
|
||||
# source file under compilation, even if file resides in a subdirectory.
|
||||
# The object file name does not affect the name of the '.d' file.
|
||||
# pgcc 10.2 will output
|
||||
# foo.o: sub/foo.c sub/foo.h
|
||||
# and will wrap long lines using '\' :
|
||||
# foo.o: sub/foo.c ... \
|
||||
# sub/foo.h ... \
|
||||
# ...
|
||||
set_dir_from "$object"
|
||||
# Use the source, not the object, to determine the base name, since
|
||||
# that's sadly what pgcc will do too.
|
||||
set_base_from "$source"
|
||||
tmpdepfile=$base.d
|
||||
|
||||
# For projects that build the same source file twice into different object
|
||||
# files, the pgcc approach of using the *source* file root name can cause
|
||||
# problems in parallel builds. Use a locking strategy to avoid stomping on
|
||||
# the same $tmpdepfile.
|
||||
lockdir=$base.d-lock
|
||||
trap "
|
||||
echo '$0: caught signal, cleaning up...' >&2
|
||||
rmdir '$lockdir'
|
||||
exit 1
|
||||
" 1 2 13 15
|
||||
numtries=100
|
||||
i=$numtries
|
||||
while test $i -gt 0; do
|
||||
# mkdir is a portable test-and-set.
|
||||
if mkdir "$lockdir" 2>/dev/null; then
|
||||
# This process acquired the lock.
|
||||
"$@" -MD
|
||||
stat=$?
|
||||
# Release the lock.
|
||||
rmdir "$lockdir"
|
||||
break
|
||||
else
|
||||
# If the lock is being held by a different process, wait
|
||||
# until the winning process is done or we timeout.
|
||||
while test -d "$lockdir" && test $i -gt 0; do
|
||||
sleep 1
|
||||
i=`expr $i - 1`
|
||||
done
|
||||
fi
|
||||
i=`expr $i - 1`
|
||||
done
|
||||
trap - 1 2 13 15
|
||||
if test $i -le 0; then
|
||||
echo "$0: failed to acquire lock after $numtries attempts" >&2
|
||||
echo "$0: check lockdir '$lockdir'" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
# Each line is of the form `foo.o: dependent.h',
|
||||
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
|
||||
# Do two passes, one to just change these to
|
||||
# '$object: dependent.h' and one to simply 'dependent.h:'.
|
||||
sed -e "s/^[ $tab][ $tab]*/ /" -e "s,^[^:]*:,$object :," \
|
||||
< "$tmpdepfile" > "$depfile"
|
||||
sed '
|
||||
s/[ '"$tab"'][ '"$tab"']*/ /g
|
||||
s/^ *//
|
||||
s/ *\\*$//
|
||||
s/^[^:]*: *//
|
||||
/^$/d
|
||||
/:$/d
|
||||
s/$/ :/
|
||||
' < "$tmpdepfile" >> "$depfile"
|
||||
# `$object: dependent.h' and one to simply `dependent.h:'.
|
||||
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
|
@ -342,9 +439,8 @@ hp2)
|
|||
# 'foo.d', which lands next to the object file, wherever that
|
||||
# happens to be.
|
||||
# Much of this is similar to the tru64 case; see comments there.
|
||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||
test "x$dir" = "x$object" && dir=
|
||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir.libs/$base.d
|
||||
|
@ -355,8 +451,7 @@ hp2)
|
|||
"$@" +Maked
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2"
|
||||
exit $stat
|
||||
fi
|
||||
|
@ -366,7 +461,7 @@ hp2)
|
|||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
|
||||
sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
|
||||
# Add 'dependent.h:' lines.
|
||||
sed -ne '2,${
|
||||
s/^ *//
|
||||
|
@ -375,7 +470,7 @@ hp2)
|
|||
p
|
||||
}' "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
echo "#dummy" > "$depfile"
|
||||
make_dummy_depfile
|
||||
fi
|
||||
rm -f "$tmpdepfile" "$tmpdepfile2"
|
||||
;;
|
||||
|
@ -386,55 +481,40 @@ tru64)
|
|||
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
|
||||
# dependencies in 'foo.d' instead, so we check for that too.
|
||||
# Subdirectories are respected.
|
||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||
test "x$dir" = "x$object" && dir=
|
||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||
set_dir_from "$object"
|
||||
set_base_from "$object"
|
||||
|
||||
if test "$libtool" = yes; then
|
||||
# With Tru64 cc, shared objects can also be used to make a
|
||||
# static library. This mechanism is used in libtool 1.4 series to
|
||||
# handle both shared and static libraries in a single compilation.
|
||||
# With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d.
|
||||
#
|
||||
# With libtool 1.5 this exception was removed, and libtool now
|
||||
# generates 2 separate objects for the 2 libraries. These two
|
||||
# compilations output dependencies in $dir.libs/$base.o.d and
|
||||
# Libtool generates 2 separate objects for the 2 libraries. These
|
||||
# two compilations output dependencies in $dir.libs/$base.o.d and
|
||||
# in $dir$base.o.d. We have to check for both files, because
|
||||
# one of the two compilations can be disabled. We should prefer
|
||||
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
|
||||
# automatically cleaned when .libs/ is deleted, while ignoring
|
||||
# the former would cause a distcleancheck panic.
|
||||
tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4
|
||||
tmpdepfile2=$dir$base.o.d # libtool 1.5
|
||||
tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5
|
||||
tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504
|
||||
tmpdepfile1=$dir$base.o.d # libtool 1.5
|
||||
tmpdepfile2=$dir.libs/$base.o.d # Likewise.
|
||||
tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504
|
||||
"$@" -Wc,-MD
|
||||
else
|
||||
tmpdepfile1=$dir$base.o.d
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
tmpdepfile3=$dir$base.d
|
||||
tmpdepfile4=$dir$base.d
|
||||
"$@" -MD
|
||||
fi
|
||||
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
||||
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
# Same post-processing that is required for AIX mode.
|
||||
aix_post_process_depfile
|
||||
;;
|
||||
|
||||
msvc7)
|
||||
|
@ -446,8 +526,7 @@ msvc7)
|
|||
"$@" $showIncludes > "$tmpdepfile"
|
||||
stat=$?
|
||||
grep -v '^Note: including file: ' "$tmpdepfile"
|
||||
if test "$stat" = 0; then :
|
||||
else
|
||||
if test $stat -ne 0; then
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
|
@ -473,6 +552,7 @@ $ {
|
|||
G
|
||||
p
|
||||
}' >> "$depfile"
|
||||
echo >> "$depfile" # make sure the fragment doesn't end with a backslash
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
|
@ -524,13 +604,14 @@ dashmstdout)
|
|||
# in the target name. This is to cope with DOS-style filenames:
|
||||
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
|
||||
"$@" $dashmflag |
|
||||
sed 's:^['"$tab"' ]*[^:'"$tab"' ][^:][^:]*\:['"$tab"' ]*:'"$object"'\: :' > "$tmpdepfile"
|
||||
sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
cat < "$tmpdepfile" > "$depfile"
|
||||
tr ' ' "$nl" < "$tmpdepfile" | \
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process this sed invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
|
@ -583,10 +664,12 @@ makedepend)
|
|||
# makedepend may prepend the VPATH from the source file name to the object.
|
||||
# No need to regex-escape $object, excess matching of '.' is harmless.
|
||||
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
|
||||
sed '1,2d' "$tmpdepfile" | tr ' ' "$nl" | \
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process the last invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed '1,2d' "$tmpdepfile" \
|
||||
| tr ' ' "$nl" \
|
||||
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile" "$tmpdepfile".bak
|
||||
;;
|
||||
|
||||
|
@ -622,10 +705,10 @@ cpp)
|
|||
esac
|
||||
done
|
||||
|
||||
"$@" -E |
|
||||
sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
|
||||
sed '$ s: \\$::' > "$tmpdepfile"
|
||||
"$@" -E \
|
||||
| sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
| sed '$ s: \\$::' > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
cat < "$tmpdepfile" >> "$depfile"
|
||||
|
|
764
tools/pcre/doc/html/NON-AUTOTOOLS-BUILD.txt
Normal file
764
tools/pcre/doc/html/NON-AUTOTOOLS-BUILD.txt
Normal file
|
@ -0,0 +1,764 @@
|
|||
Building PCRE without using autotools
|
||||
-------------------------------------
|
||||
|
||||
This document contains the following sections:
|
||||
|
||||
General
|
||||
Generic instructions for the PCRE C library
|
||||
The C++ wrapper functions
|
||||
Building for virtual Pascal
|
||||
Stack size in Windows environments
|
||||
Linking programs in Windows environments
|
||||
Calling conventions in Windows environments
|
||||
Comments about Win32 builds
|
||||
Building PCRE on Windows with CMake
|
||||
Use of relative paths with CMake on Windows
|
||||
Testing with RunTest.bat
|
||||
Building under Windows CE with Visual Studio 200x
|
||||
Building under Windows with BCC5.5
|
||||
Building using Borland C++ Builder 2007 (CB2007) and higher
|
||||
Building PCRE on OpenVMS
|
||||
Building PCRE on Stratus OpenVOS
|
||||
Building PCRE on native z/OS and z/VM
|
||||
|
||||
|
||||
GENERAL
|
||||
|
||||
I (Philip Hazel) have no experience of Windows or VMS sytems and how their
|
||||
libraries work. The items in the PCRE distribution and Makefile that relate to
|
||||
anything other than Linux systems are untested by me.
|
||||
|
||||
There are some other comments and files (including some documentation in CHM
|
||||
format) in the Contrib directory on the FTP site:
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
|
||||
|
||||
The basic PCRE library consists entirely of code written in Standard C, and so
|
||||
should compile successfully on any system that has a Standard C compiler and
|
||||
library. The C++ wrapper functions are a separate issue (see below).
|
||||
|
||||
The PCRE distribution includes a "configure" file for use by the configure/make
|
||||
(autotools) build system, as found in many Unix-like environments. The README
|
||||
file contains information about the options for "configure".
|
||||
|
||||
There is also support for CMake, which some users prefer, especially in Windows
|
||||
environments, though it can also be run in Unix-like environments. See the
|
||||
section entitled "Building PCRE on Windows with CMake" below.
|
||||
|
||||
Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
|
||||
names config.h.generic and pcre.h.generic. These are provided for those who
|
||||
build PCRE without using "configure" or CMake. If you use "configure" or CMake,
|
||||
the .generic versions are not used.
|
||||
|
||||
|
||||
GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
|
||||
|
||||
The following are generic instructions for building the PCRE C library "by
|
||||
hand". If you are going to use CMake, this section does not apply to you; you
|
||||
can skip ahead to the CMake section.
|
||||
|
||||
(1) Copy or rename the file config.h.generic as config.h, and edit the macro
|
||||
settings that it contains to whatever is appropriate for your environment.
|
||||
|
||||
In particular, you can alter the definition of the NEWLINE macro to
|
||||
specify what character(s) you want to be interpreted as line terminators.
|
||||
In an EBCDIC environment, you MUST change NEWLINE, because its default
|
||||
value is 10, an ASCII LF. The usual EBCDIC newline character is 21 (0x15,
|
||||
NL), though in some cases it may be 37 (0x25).
|
||||
|
||||
When you compile any of the PCRE modules, you must specify -DHAVE_CONFIG_H
|
||||
to your compiler so that config.h is included in the sources.
|
||||
|
||||
An alternative approach is not to edit config.h, but to use -D on the
|
||||
compiler command line to make any changes that you need to the
|
||||
configuration options. In this case -DHAVE_CONFIG_H must not be set.
|
||||
|
||||
NOTE: There have been occasions when the way in which certain parameters
|
||||
in config.h are used has changed between releases. (In the configure/make
|
||||
world, this is handled automatically.) When upgrading to a new release,
|
||||
you are strongly advised to review config.h.generic before re-using what
|
||||
you had previously.
|
||||
|
||||
(2) Copy or rename the file pcre.h.generic as pcre.h.
|
||||
|
||||
(3) EITHER:
|
||||
Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
|
||||
|
||||
OR:
|
||||
Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if
|
||||
you have set up config.h), and then run it with the single argument
|
||||
"pcre_chartables.c". This generates a set of standard character tables
|
||||
and writes them to that file. The tables are generated using the default
|
||||
C locale for your system. If you want to use a locale that is specified
|
||||
by LC_xxx environment variables, add the -L option to the dftables
|
||||
command. You must use this method if you are building on a system that
|
||||
uses EBCDIC code.
|
||||
|
||||
The tables in pcre_chartables.c are defaults. The caller of PCRE can
|
||||
specify alternative tables at run time.
|
||||
|
||||
(4) Ensure that you have the following header files:
|
||||
|
||||
pcre_internal.h
|
||||
ucp.h
|
||||
|
||||
(5) For an 8-bit library, compile the following source files, setting
|
||||
-DHAVE_CONFIG_H as a compiler option if you have set up config.h with your
|
||||
configuration, or else use other -D settings to change the configuration
|
||||
as required.
|
||||
|
||||
pcre_byte_order.c
|
||||
pcre_chartables.c
|
||||
pcre_compile.c
|
||||
pcre_config.c
|
||||
pcre_dfa_exec.c
|
||||
pcre_exec.c
|
||||
pcre_fullinfo.c
|
||||
pcre_get.c
|
||||
pcre_globals.c
|
||||
pcre_jit_compile.c
|
||||
pcre_maketables.c
|
||||
pcre_newline.c
|
||||
pcre_ord2utf8.c
|
||||
pcre_refcount.c
|
||||
pcre_string_utils.c
|
||||
pcre_study.c
|
||||
pcre_tables.c
|
||||
pcre_ucd.c
|
||||
pcre_valid_utf8.c
|
||||
pcre_version.c
|
||||
pcre_xclass.c
|
||||
|
||||
Make sure that you include -I. in the compiler command (or equivalent for
|
||||
an unusual compiler) so that all included PCRE header files are first
|
||||
sought in the current directory. Otherwise you run the risk of picking up
|
||||
a previously-installed file from somewhere else.
|
||||
|
||||
Note that you must still compile pcre_jit_compile.c, even if you have not
|
||||
defined SUPPORT_JIT in config.h, because when JIT support is not
|
||||
configured, dummy functions are compiled. When JIT support IS configured,
|
||||
pcre_jit_compile.c #includes sources from the sljit subdirectory, where
|
||||
there should be 16 files, all of whose names begin with "sljit".
|
||||
|
||||
(6) Now link all the compiled code into an object library in whichever form
|
||||
your system keeps such libraries. This is the basic PCRE C 8-bit library.
|
||||
If your system has static and shared libraries, you may have to do this
|
||||
once for each type.
|
||||
|
||||
(7) If you want to build a 16-bit library (as well as, or instead of the 8-bit
|
||||
or 32-bit libraries) repeat steps 5-6 with the following files:
|
||||
|
||||
pcre16_byte_order.c
|
||||
pcre16_chartables.c
|
||||
pcre16_compile.c
|
||||
pcre16_config.c
|
||||
pcre16_dfa_exec.c
|
||||
pcre16_exec.c
|
||||
pcre16_fullinfo.c
|
||||
pcre16_get.c
|
||||
pcre16_globals.c
|
||||
pcre16_jit_compile.c
|
||||
pcre16_maketables.c
|
||||
pcre16_newline.c
|
||||
pcre16_ord2utf16.c
|
||||
pcre16_refcount.c
|
||||
pcre16_string_utils.c
|
||||
pcre16_study.c
|
||||
pcre16_tables.c
|
||||
pcre16_ucd.c
|
||||
pcre16_utf16_utils.c
|
||||
pcre16_valid_utf16.c
|
||||
pcre16_version.c
|
||||
pcre16_xclass.c
|
||||
|
||||
(8) If you want to build a 32-bit library (as well as, or instead of the 8-bit
|
||||
or 16-bit libraries) repeat steps 5-6 with the following files:
|
||||
|
||||
pcre32_byte_order.c
|
||||
pcre32_chartables.c
|
||||
pcre32_compile.c
|
||||
pcre32_config.c
|
||||
pcre32_dfa_exec.c
|
||||
pcre32_exec.c
|
||||
pcre32_fullinfo.c
|
||||
pcre32_get.c
|
||||
pcre32_globals.c
|
||||
pcre32_jit_compile.c
|
||||
pcre32_maketables.c
|
||||
pcre32_newline.c
|
||||
pcre32_ord2utf32.c
|
||||
pcre32_refcount.c
|
||||
pcre32_string_utils.c
|
||||
pcre32_study.c
|
||||
pcre32_tables.c
|
||||
pcre32_ucd.c
|
||||
pcre32_utf32_utils.c
|
||||
pcre32_valid_utf32.c
|
||||
pcre32_version.c
|
||||
pcre32_xclass.c
|
||||
|
||||
(9) If you want to build the POSIX wrapper functions (which apply only to the
|
||||
8-bit library), ensure that you have the pcreposix.h file and then compile
|
||||
pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result
|
||||
(on its own) as the pcreposix library.
|
||||
|
||||
(10) The pcretest program can be linked with any combination of the 8-bit,
|
||||
16-bit and 32-bit libraries (depending on what you selected in config.h).
|
||||
Compile pcretest.c and pcre_printint.c (again, don't forget
|
||||
-DHAVE_CONFIG_H) and link them together with the appropriate library/ies.
|
||||
If you compiled an 8-bit library, pcretest also needs the pcreposix
|
||||
wrapper library unless you compiled it with -DNOPOSIX.
|
||||
|
||||
(11) Run pcretest on the testinput files in the testdata directory, and check
|
||||
that the output matches the corresponding testoutput files. There are
|
||||
comments about what each test does in the section entitled "Testing PCRE"
|
||||
in the README file. If you compiled more than one of the 8-bit, 16-bit and
|
||||
32-bit libraries, you need to run pcretest with the -16 option to do
|
||||
16-bit tests and with the -32 option to do 32-bit tests.
|
||||
|
||||
Some tests are relevant only when certain build-time options are selected.
|
||||
For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run
|
||||
if you have built PCRE without it. See the comments at the start of each
|
||||
testinput file. If you have a suitable Unix-like shell, the RunTest script
|
||||
will run the appropriate tests for you. The command "RunTest list" will
|
||||
output a list of all the tests.
|
||||
|
||||
Note that the supplied files are in Unix format, with just LF characters
|
||||
as line terminators. You may need to edit them to change this if your
|
||||
system uses a different convention. If you are using Windows, you probably
|
||||
should use the wintestinput3 file instead of testinput3 (and the
|
||||
corresponding output file). This is a locale test; wintestinput3 sets the
|
||||
locale to "french" rather than "fr_FR", and there some minor output
|
||||
differences.
|
||||
|
||||
(12) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested
|
||||
by the testdata files. However, you might also like to build and run
|
||||
the freestanding JIT test program, pcre_jit_test.c.
|
||||
|
||||
(13) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
||||
uses only the basic 8-bit PCRE library (it does not need the pcreposix
|
||||
library).
|
||||
|
||||
|
||||
THE C++ WRAPPER FUNCTIONS
|
||||
|
||||
The PCRE distribution also contains some C++ wrapper functions and tests,
|
||||
applicable to the 8-bit library, which were contributed by Google Inc. On a
|
||||
system that can use "configure" and "make", the functions are automatically
|
||||
built into a library called pcrecpp. It should be straightforward to compile
|
||||
the .cc files manually on other systems. The files called xxx_unittest.cc are
|
||||
test programs for each of the corresponding xxx.cc files.
|
||||
|
||||
|
||||
BUILDING FOR VIRTUAL PASCAL
|
||||
|
||||
A script for building PCRE using Borland's C++ compiler for use with VPASCAL
|
||||
was contributed by Alexander Tokarev. Stefan Weber updated the script and added
|
||||
additional files. The following files in the distribution are for building PCRE
|
||||
for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
|
||||
|
||||
|
||||
STACK SIZE IN WINDOWS ENVIRONMENTS
|
||||
|
||||
The default processor stack size of 1Mb in some Windows environments is too
|
||||
small for matching patterns that need much recursion. In particular, test 2 may
|
||||
fail because of this. Normally, running out of stack causes a crash, but there
|
||||
have been cases where the test program has just died silently. See your linker
|
||||
documentation for how to increase stack size if you experience problems. The
|
||||
Linux default of 8Mb is a reasonable choice for the stack, though even that can
|
||||
be too small for some pattern/subject combinations.
|
||||
|
||||
PCRE has a compile configuration option to disable the use of stack for
|
||||
recursion so that heap is used instead. However, pattern matching is
|
||||
significantly slower when this is done. There is more about stack usage in the
|
||||
"pcrestack" documentation.
|
||||
|
||||
|
||||
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
|
||||
|
||||
If you want to statically link a program against a PCRE library in the form of
|
||||
a non-dll .a file, you must define PCRE_STATIC before including pcre.h or
|
||||
pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will
|
||||
be declared __declspec(dllimport), with unwanted results.
|
||||
|
||||
|
||||
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
|
||||
|
||||
It is possible to compile programs to use different calling conventions using
|
||||
MSVC. Search the web for "calling conventions" for more information. To make it
|
||||
easier to change the calling convention for the exported functions in the
|
||||
PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external
|
||||
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
|
||||
not set, it defaults to empty; the default calling convention is then used
|
||||
(which is what is wanted most of the time).
|
||||
|
||||
|
||||
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE ON WINDOWS WITH CMAKE")
|
||||
|
||||
There are two ways of building PCRE using the "configure, make, make install"
|
||||
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
|
||||
the same thing; they are completely different from each other. There is also
|
||||
support for building using CMake, which some users find a more straightforward
|
||||
way of building PCRE under Windows.
|
||||
|
||||
The MinGW home page (http://www.mingw.org/) says this:
|
||||
|
||||
MinGW: A collection of freely available and freely distributable Windows
|
||||
specific header files and import libraries combined with GNU toolsets that
|
||||
allow one to produce native Windows programs that do not rely on any
|
||||
3rd-party C runtime DLLs.
|
||||
|
||||
The Cygwin home page (http://www.cygwin.com/) says this:
|
||||
|
||||
Cygwin is a Linux-like environment for Windows. It consists of two parts:
|
||||
|
||||
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
|
||||
substantial Linux API functionality
|
||||
|
||||
. A collection of tools which provide Linux look and feel.
|
||||
|
||||
The Cygwin DLL currently works with all recent, commercially released x86 32
|
||||
bit and 64 bit versions of Windows, with the exception of Windows CE.
|
||||
|
||||
On both MinGW and Cygwin, PCRE should build correctly using:
|
||||
|
||||
./configure && make && make install
|
||||
|
||||
This should create two libraries called libpcre and libpcreposix, and, if you
|
||||
have enabled building the C++ wrapper, a third one called libpcrecpp. These are
|
||||
independent libraries: when you link with libpcreposix or libpcrecpp you must
|
||||
also link with libpcre, which contains the basic functions. (Some earlier
|
||||
releases of PCRE included the basic libpcre functions in libpcreposix. This no
|
||||
longer happens.)
|
||||
|
||||
A user submitted a special-purpose patch that makes it easy to create
|
||||
"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll"
|
||||
as a special target. If you use this target, no other files are built, and in
|
||||
particular, the pcretest and pcregrep programs are not built. An example of how
|
||||
this might be used is:
|
||||
|
||||
./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll
|
||||
|
||||
Using Cygwin's compiler generates libraries and executables that depend on
|
||||
cygwin1.dll. If a library that is generated this way is distributed,
|
||||
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
|
||||
licence, this forces not only PCRE to be under the GPL, but also the entire
|
||||
application. A distributor who wants to keep their own code proprietary must
|
||||
purchase an appropriate Cygwin licence.
|
||||
|
||||
MinGW has no such restrictions. The MinGW compiler generates a library or
|
||||
executable that can run standalone on Windows without any third party dll or
|
||||
licensing issues.
|
||||
|
||||
But there is more complication:
|
||||
|
||||
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
|
||||
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
|
||||
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
|
||||
gcc and MinGW's gcc). So, a user can:
|
||||
|
||||
. Build native binaries by using MinGW or by getting Cygwin and using
|
||||
-mno-cygwin.
|
||||
|
||||
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
|
||||
compiler flags.
|
||||
|
||||
The test files that are supplied with PCRE are in UNIX format, with LF
|
||||
characters as line terminators. Unless your PCRE library uses a default newline
|
||||
option that includes LF as a valid newline, it may be necessary to change the
|
||||
line terminators in the test files to get some of the tests to work.
|
||||
|
||||
|
||||
BUILDING PCRE ON WINDOWS WITH CMAKE
|
||||
|
||||
CMake is an alternative configuration facility that can be used instead of
|
||||
"configure". CMake creates project files (make files, solution files, etc.)
|
||||
tailored to numerous development environments, including Visual Studio,
|
||||
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
|
||||
spaces in the names for your CMake installation and your PCRE source and build
|
||||
directories.
|
||||
|
||||
The following instructions were contributed by a PCRE user. If they are not
|
||||
followed exactly, errors may occur. In the event that errors do occur, it is
|
||||
recommended that you delete the CMake cache before attempting to repeat the
|
||||
CMake build process. In the CMake GUI, the cache can be deleted by selecting
|
||||
"File > Delete Cache".
|
||||
|
||||
1. Install the latest CMake version available from http://www.cmake.org/, and
|
||||
ensure that cmake\bin is on your path.
|
||||
|
||||
2. Unzip (retaining folder structure) the PCRE source tree into a source
|
||||
directory such as C:\pcre. You should ensure your local date and time
|
||||
is not earlier than the file dates in your source dir if the release is
|
||||
very new.
|
||||
|
||||
3. Create a new, empty build directory, preferably a subdirectory of the
|
||||
source dir. For example, C:\pcre\pcre-xx\build.
|
||||
|
||||
4. Run cmake-gui from the Shell envirornment of your build tool, for example,
|
||||
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
|
||||
to start Cmake from the Windows Start menu, as this can lead to errors.
|
||||
|
||||
5. Enter C:\pcre\pcre-xx and C:\pcre\pcre-xx\build for the source and build
|
||||
directories, respectively.
|
||||
|
||||
6. Hit the "Configure" button.
|
||||
|
||||
7. Select the particular IDE / build tool that you are using (Visual
|
||||
Studio, MSYS makefiles, MinGW makefiles, etc.)
|
||||
|
||||
8. The GUI will then list several configuration options. This is where
|
||||
you can enable UTF-8 support or other PCRE optional features.
|
||||
|
||||
9. Hit "Configure" again. The adjacent "Generate" button should now be
|
||||
active.
|
||||
|
||||
10. Hit "Generate".
|
||||
|
||||
11. The build directory should now contain a usable build system, be it a
|
||||
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
|
||||
cmake-gui and use the generated build system with your compiler or IDE.
|
||||
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE
|
||||
solution, select the desired configuration (Debug, or Release, etc.) and
|
||||
build the ALL_BUILD project.
|
||||
|
||||
12. If during configuration with cmake-gui you've elected to build the test
|
||||
programs, you can execute them by building the test project. E.g., for
|
||||
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
|
||||
most recent build configuration is targeted by the tests. A summary of
|
||||
test results is presented. Complete test output is subsequently
|
||||
available for review in Testing\Temporary under your build dir.
|
||||
|
||||
|
||||
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
|
||||
|
||||
A PCRE user comments as follows: I thought that others may want to know the
|
||||
current state of CMAKE_USE_RELATIVE_PATHS support on Windows. Here it is:
|
||||
|
||||
-- AdditionalIncludeDirectories is only partially modified (only the
|
||||
first path - see below)
|
||||
-- Only some of the contained file paths are modified - shown below for
|
||||
pcre.vcproj
|
||||
-- It properly modifies
|
||||
|
||||
I am sure CMake people can fix that if they want to. Until then one will
|
||||
need to replace existing absolute paths in project files with relative
|
||||
paths manually (e.g. from VS) - relative to project file location. I did
|
||||
just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big
|
||||
deal.
|
||||
|
||||
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
|
||||
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
|
||||
|
||||
RelativePath="pcre.h"
|
||||
RelativePath="pcre_chartables.c"
|
||||
RelativePath="pcre_chartables.c.rule"
|
||||
|
||||
|
||||
TESTING WITH RUNTEST.BAT
|
||||
|
||||
If configured with CMake, building the test project ("make test" or building
|
||||
ALL_TESTS in Visual Studio) creates (and runs) pcre_test.bat (and depending
|
||||
on your configuration options, possibly other test programs) in the build
|
||||
directory. Pcre_test.bat runs RunTest.Bat with correct source and exe paths.
|
||||
|
||||
For manual testing with RunTest.bat, provided the build dir is a subdirectory
|
||||
of the source directory: Open command shell window. Chdir to the location
|
||||
of your pcretest.exe and pcregrep.exe programs. Call RunTest.bat with
|
||||
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
|
||||
|
||||
To run only a particular test with RunTest.Bat provide a test number argument.
|
||||
|
||||
Otherwise:
|
||||
|
||||
1. Copy RunTest.bat into the directory where pcretest.exe and pcregrep.exe
|
||||
have been created.
|
||||
|
||||
2. Edit RunTest.bat to indentify the full or relative location of
|
||||
the pcre source (wherein which the testdata folder resides), e.g.:
|
||||
|
||||
set srcdir=C:\pcre\pcre-8.20
|
||||
|
||||
3. In a Windows command environment, chdir to the location of your bat and
|
||||
exe programs.
|
||||
|
||||
4. Run RunTest.bat. Test outputs will automatically be compared to expected
|
||||
results, and discrepancies will be identified in the console output.
|
||||
|
||||
To independently test the just-in-time compiler, run pcre_jit_test.exe.
|
||||
To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
|
||||
pcre_scanner_unittest.exe.
|
||||
|
||||
|
||||
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
|
||||
|
||||
Vincent Richomme sent a zip archive of files to help with this process. They
|
||||
can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
|
||||
site.
|
||||
|
||||
|
||||
BUILDING UNDER WINDOWS WITH BCC5.5
|
||||
|
||||
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
|
||||
|
||||
Some of the core BCC libraries have a version of PCRE from 1998 built in, which
|
||||
can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a version
|
||||
mismatch. I'm including an easy workaround below, if you'd like to include it
|
||||
in the non-unix instructions:
|
||||
|
||||
When linking a project with BCC5.5, pcre.lib must be included before any of the
|
||||
libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command line.
|
||||
|
||||
|
||||
BUILDING USING BORLAND C++ BUILDER 2007 (CB2007) AND HIGHER
|
||||
|
||||
A PCRE user sent these comments about this environment (see also the comment
|
||||
from another user that follows them):
|
||||
|
||||
The XE versions of C++ Builder come with a RegularExpressionsCore class which
|
||||
contain a version of TPerlRegEx. However, direct use of the C PCRE library may
|
||||
be desirable.
|
||||
|
||||
The default makevp.bat, however, supplied with PCRE builds a version of PCRE
|
||||
that is not usable with any version of C++ Builder because the compiler ships
|
||||
with an embedded version of PCRE, version 2.01 from 1998! [See also the note
|
||||
about BCC5.5 above.] If you want to use PCRE you'll need to rename the
|
||||
functions (pcre_compile to pcre_compile_bcc, etc) or do as I have done and just
|
||||
use the 16 bit versions. I'm using std::wstring everywhere anyway. Since the
|
||||
embedded version of PCRE does not have the 16 bit function names, there is no
|
||||
conflict.
|
||||
|
||||
Building PCRE using a C++ Builder static library project file (recommended):
|
||||
|
||||
1. Rename or remove pcre.h, pcreposi.h, and pcreposix.h from your C++ Builder
|
||||
original include path.
|
||||
|
||||
2. Download PCRE from pcre.org and extract to a directory.
|
||||
|
||||
3. Rename pcre_chartables.c.dist to pcre_chartables.c, pcre.h.generic to
|
||||
pcre.h, and config.h.generic to config.h.
|
||||
|
||||
4. Edit pcre.h and pcre_config.c so that they include config.h.
|
||||
|
||||
5. Edit config.h like so:
|
||||
|
||||
Comment out the following lines:
|
||||
#define PACKAGE "pcre"
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
#define PACKAGE_NAME "PCRE"
|
||||
#define PACKAGE_STRING "PCRE 8.32"
|
||||
#define PACKAGE_TARNAME "pcre"
|
||||
#define PACKAGE_URL ""
|
||||
#define PACKAGE_VERSION "8.32"
|
||||
|
||||
Add the following lines:
|
||||
#ifndef SUPPORT_UTF
|
||||
#define SUPPORT_UTF 100 // any value is fine
|
||||
#endif
|
||||
|
||||
#ifndef SUPPORT_UCP
|
||||
#define SUPPORT_UCP 101 // any value is fine
|
||||
#endif
|
||||
|
||||
#ifndef SUPPORT_UCP
|
||||
#define SUPPORT_PCRE16 102 // any value is fine
|
||||
#endif
|
||||
|
||||
#ifndef SUPPORT_UTF8
|
||||
#define SUPPORT_UTF8 103 // any value is fine
|
||||
#endif
|
||||
|
||||
6. Build a C++ Builder project using the IDE. Go to File / New / Other and
|
||||
choose Static Library. You can name it pcre.cbproj or whatever. Now set your
|
||||
paths by going to Project / Options. Set the Include path. Do this from the
|
||||
"Base" option to apply to both Release and Debug builds. Now add the following
|
||||
files to the project:
|
||||
|
||||
pcre.h
|
||||
pcre16_byte_order.c
|
||||
pcre16_chartables.c
|
||||
pcre16_compile.c
|
||||
pcre16_config.c
|
||||
pcre16_dfa_exec.c
|
||||
pcre16_exec.c
|
||||
pcre16_fullinfo.c
|
||||
pcre16_get.c
|
||||
pcre16_globals.c
|
||||
pcre16_maketables.c
|
||||
pcre16_newline.c
|
||||
pcre16_ord2utf16.c
|
||||
pcre16_printint.c
|
||||
pcre16_refcount.c
|
||||
pcre16_string_utils.c
|
||||
pcre16_study.c
|
||||
pcre16_tables.c
|
||||
pcre16_ucd.c
|
||||
pcre16_utf16_utils.c
|
||||
pcre16_valid_utf16.c
|
||||
pcre16_version.c
|
||||
pcre16_xclass.c
|
||||
|
||||
//Optional
|
||||
pcre_version.c
|
||||
|
||||
7. After compiling the .lib file, copy the .lib and header files to a project
|
||||
you want to use PCRE with. Enjoy.
|
||||
|
||||
Optional ... Building PCRE using the makevp.bat file:
|
||||
|
||||
1. Edit makevp_c.txt and makevp_l.txt and change all the names to the 16 bit
|
||||
versions.
|
||||
|
||||
2. Edit makevp.bat and set the path to C++ Builder. Run makevp.bat.
|
||||
|
||||
Another PCRE user added this comment:
|
||||
|
||||
Another approach I successfully used for some years with BCB 5 and 6 was to
|
||||
make sure that include and library paths of PCRE are configured before the
|
||||
default paths of the IDE in the dialogs where one can manage those paths.
|
||||
Afterwards one can open the project files using a text editor and manually add
|
||||
the self created library for pcre itself, pcrecpp doesn't ship with the IDE, in
|
||||
the library nodes where the IDE manages its own libraries to link against in
|
||||
front of the IDE-own libraries. This way one can use the default PCRE function
|
||||
names without getting access violations on runtime.
|
||||
|
||||
<ALLLIB value="libpcre.lib $(LIBFILES) $(LIBRARIES) import32.lib cp32mt.lib"/>
|
||||
|
||||
|
||||
BUILDING PCRE ON OPENVMS
|
||||
|
||||
Stephen Hoffman sent the following, in December 2012:
|
||||
|
||||
"Here <http://labs.hoffmanlabs.com/node/1847> is a very short write-up on the
|
||||
OpenVMS port and here
|
||||
|
||||
<http://labs.hoffmanlabs.com/labsnotes/pcre-vms-8_32.zip>
|
||||
|
||||
is a zip with the OpenVMS files, and with one modified testing-related PCRE
|
||||
file." This is a port of PCRE 8.32.
|
||||
|
||||
Earlier, Dan Mooney sent the following comments about building PCRE on OpenVMS.
|
||||
They relate to an older version of PCRE that used fewer source files, so the
|
||||
exact commands will need changing. See the current list of source files above.
|
||||
|
||||
"It was quite easy to compile and link the library. I don't have a formal
|
||||
make file but the attached file [reproduced below] contains the OpenVMS DCL
|
||||
commands I used to build the library. I had to add #define
|
||||
POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere.
|
||||
|
||||
The library was built on:
|
||||
O/S: HP OpenVMS v7.3-1
|
||||
Compiler: Compaq C v6.5-001-48BCD
|
||||
Linker: vA13-01
|
||||
|
||||
The test results did not match 100% due to the issues you mention in your
|
||||
documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I
|
||||
modified some of the character tables temporarily and was able to get the
|
||||
results to match. Tests using the fr locale did not match since I don't have
|
||||
that locale loaded. The study size was always reported to be 3 less than the
|
||||
value in the standard test output files."
|
||||
|
||||
=========================
|
||||
$! This DCL procedure builds PCRE on OpenVMS
|
||||
$!
|
||||
$! I followed the instructions in the non-unix-use file in the distribution.
|
||||
$!
|
||||
$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES
|
||||
$ COMPILE DFTABLES.C
|
||||
$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ
|
||||
$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C
|
||||
$ COMPILE MAKETABLES.C
|
||||
$ COMPILE GET.C
|
||||
$ COMPILE STUDY.C
|
||||
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
|
||||
$! did not seem to be defined anywhere.
|
||||
$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support.
|
||||
$ COMPILE PCRE.C
|
||||
$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ
|
||||
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
|
||||
$! did not seem to be defined anywhere.
|
||||
$ COMPILE PCREPOSIX.C
|
||||
$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ
|
||||
$ COMPILE PCRETEST.C
|
||||
$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB
|
||||
$! C programs that want access to command line arguments must be
|
||||
$! defined as a symbol
|
||||
$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE"
|
||||
$! Arguments must be enclosed in quotes.
|
||||
$ PCRETEST "-C"
|
||||
$! Test results:
|
||||
$!
|
||||
$! The test results did not match 100%. The functions isprint(), iscntrl(),
|
||||
$! isgraph() and ispunct() on OpenVMS must not produce the same results
|
||||
$! as the system that built the test output files provided with the
|
||||
$! distribution.
|
||||
$!
|
||||
$! The study size did not match and was always 3 less on OpenVMS.
|
||||
$!
|
||||
$! Locale could not be set to fr
|
||||
$!
|
||||
=========================
|
||||
|
||||
|
||||
BUILDING PCRE ON STRATUS OPENVOS
|
||||
|
||||
These notes on the port of PCRE to VOS (lightly edited) were supplied by
|
||||
Ashutosh Warikoo, whose email address has the local part awarikoo and the
|
||||
domain nse.co.in. The port was for version 7.9 in August 2009.
|
||||
|
||||
1. Building PCRE
|
||||
|
||||
I built pcre on OpenVOS Release 17.0.1at using GNU Tools 3.4a without any
|
||||
problems. I used the following packages to build PCRE:
|
||||
|
||||
ftp://ftp.stratus.com/pub/vos/posix/ga/posix.save.evf.gz
|
||||
|
||||
Please read and follow the instructions that come with these packages. To start
|
||||
the build of pcre, from the root of the package type:
|
||||
|
||||
./build.sh
|
||||
|
||||
2. Installing PCRE
|
||||
|
||||
Once you have successfully built PCRE, login to the SysAdmin group, switch to
|
||||
the root user, and type
|
||||
|
||||
[ !create_dir (master_disk)>usr --if needed ]
|
||||
[ !create_dir (master_disk)>usr>local --if needed ]
|
||||
!gmake install
|
||||
|
||||
This installs PCRE and its man pages into /usr/local. You can add
|
||||
(master_disk)>usr>local>bin to your command search paths, or if you are in
|
||||
BASH, add /usr/local/bin to the PATH environment variable.
|
||||
|
||||
4. Restrictions
|
||||
|
||||
This port requires readline library optionally. However during the build I
|
||||
faced some yet unexplored errors while linking with readline. As it was an
|
||||
optional component I chose to disable it.
|
||||
|
||||
5. Known Problems
|
||||
|
||||
I ran the test suite, but you will have to be your own judge of whether this
|
||||
command, and this port, suits your purposes. If you find any problems that
|
||||
appear to be related to the port itself, please let me know. Please see the
|
||||
build.log file in the root of the package also.
|
||||
|
||||
|
||||
BUILDING PCRE ON NATIVE Z/OS AND Z/VM
|
||||
|
||||
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
|
||||
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
|
||||
applications can be supported through UNIX System Services, and in such an
|
||||
environment PCRE can be built in the same way as in other systems. However, in
|
||||
native z/OS (without UNIX System Services) and in z/VM, special ports are
|
||||
required. For details, please see this web site:
|
||||
|
||||
http://www.zaconsultants.net
|
||||
|
||||
There is also a mirror here:
|
||||
|
||||
http://www.vsoft-software.com/downloads.html
|
||||
|
||||
==========================
|
||||
Last Updated: 14 May 2013
|
991
tools/pcre/doc/html/README.txt
Normal file
991
tools/pcre/doc/html/README.txt
Normal file
|
@ -0,0 +1,991 @@
|
|||
README file for PCRE (Perl-compatible regular expression library)
|
||||
-----------------------------------------------------------------
|
||||
|
||||
The latest release of PCRE is always available in three alternative formats
|
||||
from:
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.bz2
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE at
|
||||
pcre-dev@exim.org. You can access the archives and subscribe or manage your
|
||||
subscription here:
|
||||
|
||||
https://lists.exim.org/mailman/listinfo/pcre-dev
|
||||
|
||||
Please read the NEWS file if you are upgrading from a previous release.
|
||||
The contents of this README file are:
|
||||
|
||||
The PCRE APIs
|
||||
Documentation for PCRE
|
||||
Contributions by users of PCRE
|
||||
Building PCRE on non-Unix-like systems
|
||||
Building PCRE without using autotools
|
||||
Building PCRE using autotools
|
||||
Retrieving configuration information
|
||||
Shared libraries
|
||||
Cross-compiling using autotools
|
||||
Using HP's ANSI C++ compiler (aCC)
|
||||
Compiling in Tru64 using native compilers
|
||||
Using Sun's compilers for Solaris
|
||||
Using PCRE from MySQL
|
||||
Making new tarballs
|
||||
Testing PCRE
|
||||
Character tables
|
||||
File manifest
|
||||
|
||||
|
||||
The PCRE APIs
|
||||
-------------
|
||||
|
||||
PCRE is written in C, and it has its own API. There are three sets of
|
||||
functions, one for the 8-bit library, which processes strings of bytes, one for
|
||||
the 16-bit library, which processes strings of 16-bit values, and one for the
|
||||
32-bit library, which processes strings of 32-bit values. The distribution also
|
||||
includes a set of C++ wrapper functions (see the pcrecpp man page for details),
|
||||
courtesy of Google Inc., which can be used to call the 8-bit PCRE library from
|
||||
C++.
|
||||
|
||||
In addition, there is a set of C wrapper functions (again, just for the 8-bit
|
||||
library) that are based on the POSIX regular expression API (see the pcreposix
|
||||
man page). These end up in the library called libpcreposix. Note that this just
|
||||
provides a POSIX calling interface to PCRE; the regular expressions themselves
|
||||
still follow Perl syntax and semantics. The POSIX API is restricted, and does
|
||||
not give full access to all of PCRE's facilities.
|
||||
|
||||
The header file for the POSIX-style functions is called pcreposix.h. The
|
||||
official POSIX name is regex.h, but I did not want to risk possible problems
|
||||
with existing files of that name by distributing it that way. To use PCRE with
|
||||
an existing program that uses the POSIX API, pcreposix.h will have to be
|
||||
renamed or pointed at by a link.
|
||||
|
||||
If you are using the POSIX interface to PCRE and there is already a POSIX regex
|
||||
library installed on your system, as well as worrying about the regex.h header
|
||||
file (as mentioned above), you must also take care when linking programs to
|
||||
ensure that they link with PCRE's libpcreposix library. Otherwise they may pick
|
||||
up the POSIX functions of the same name from the other library.
|
||||
|
||||
One way of avoiding this confusion is to compile PCRE with the addition of
|
||||
-Dregcomp=PCREregcomp (and similarly for the other POSIX functions) to the
|
||||
compiler flags (CFLAGS if you are using "configure" -- see below). This has the
|
||||
effect of renaming the functions so that the names no longer clash. Of course,
|
||||
you have to do the same thing for your applications, or write them using the
|
||||
new names.
|
||||
|
||||
|
||||
Documentation for PCRE
|
||||
----------------------
|
||||
|
||||
If you install PCRE in the normal way on a Unix-like system, you will end up
|
||||
with a set of man pages whose names all start with "pcre". The one that is just
|
||||
called "pcre" lists all the others. In addition to these man pages, the PCRE
|
||||
documentation is supplied in two other forms:
|
||||
|
||||
1. There are files called doc/pcre.txt, doc/pcregrep.txt, and
|
||||
doc/pcretest.txt in the source distribution. The first of these is a
|
||||
concatenation of the text forms of all the section 3 man pages except
|
||||
the listing of pcredemo.c and those that summarize individual functions.
|
||||
The other two are the text forms of the section 1 man pages for the
|
||||
pcregrep and pcretest commands. These text forms are provided for ease of
|
||||
scanning with text editors or similar tools. They are installed in
|
||||
<prefix>/share/doc/pcre, where <prefix> is the installation prefix
|
||||
(defaulting to /usr/local).
|
||||
|
||||
2. A set of files containing all the documentation in HTML form, hyperlinked
|
||||
in various ways, and rooted in a file called index.html, is distributed in
|
||||
doc/html and installed in <prefix>/share/doc/pcre/html.
|
||||
|
||||
Users of PCRE have contributed files containing the documentation for various
|
||||
releases in CHM format. These can be found in the Contrib directory of the FTP
|
||||
site (see next section).
|
||||
|
||||
|
||||
Contributions by users of PCRE
|
||||
------------------------------
|
||||
|
||||
You can find contributions from PCRE users in the directory
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
|
||||
|
||||
There is a README file giving brief descriptions of what they are. Some are
|
||||
complete in themselves; others are pointers to URLs containing relevant files.
|
||||
Some of this material is likely to be well out-of-date. Several of the earlier
|
||||
contributions provided support for compiling PCRE on various flavours of
|
||||
Windows (I myself do not use Windows). Nowadays there is more Windows support
|
||||
in the standard distribution, so these contibutions have been archived.
|
||||
|
||||
A PCRE user maintains downloadable Windows binaries of the pcregrep and
|
||||
pcretest programs here:
|
||||
|
||||
http://www.rexegg.com/pcregrep-pcretest.html
|
||||
|
||||
|
||||
Building PCRE on non-Unix-like systems
|
||||
--------------------------------------
|
||||
|
||||
For a non-Unix-like system, please read the comments in the file
|
||||
NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
|
||||
"make" you may be able to build PCRE using autotools in the same way as for
|
||||
many Unix-like systems.
|
||||
|
||||
PCRE can also be configured using the GUI facility provided by CMake's
|
||||
cmake-gui command. This creates Makefiles, solution files, etc. The file
|
||||
NON-AUTOTOOLS-BUILD has information about CMake.
|
||||
|
||||
PCRE has been compiled on many different operating systems. It should be
|
||||
straightforward to build PCRE on any system that has a Standard C compiler and
|
||||
library, because it uses only Standard C functions.
|
||||
|
||||
|
||||
Building PCRE without using autotools
|
||||
-------------------------------------
|
||||
|
||||
The use of autotools (in particular, libtool) is problematic in some
|
||||
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
|
||||
file for ways of building PCRE without using autotools.
|
||||
|
||||
|
||||
Building PCRE using autotools
|
||||
-----------------------------
|
||||
|
||||
If you are using HP's ANSI C++ compiler (aCC), please see the special note
|
||||
in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.
|
||||
|
||||
The following instructions assume the use of the widely used "configure; make;
|
||||
make install" (autotools) process.
|
||||
|
||||
To build PCRE on system that supports autotools, first run the "configure"
|
||||
command from the PCRE distribution directory, with your current directory set
|
||||
to the directory where you want the files to be created. This command is a
|
||||
standard GNU "autoconf" configuration script, for which generic instructions
|
||||
are supplied in the file INSTALL.
|
||||
|
||||
Most commonly, people build PCRE within its own distribution directory, and in
|
||||
this case, on many systems, just running "./configure" is sufficient. However,
|
||||
the usual methods of changing standard defaults are available. For example:
|
||||
|
||||
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
|
||||
|
||||
This command specifies that the C compiler should be run with the flags '-O2
|
||||
-Wall' instead of the default, and that "make install" should install PCRE
|
||||
under /opt/local instead of the default /usr/local.
|
||||
|
||||
If you want to build in a different directory, just run "configure" with that
|
||||
directory as current. For example, suppose you have unpacked the PCRE source
|
||||
into /source/pcre/pcre-xxx, but you want to build it in /build/pcre/pcre-xxx:
|
||||
|
||||
cd /build/pcre/pcre-xxx
|
||||
/source/pcre/pcre-xxx/configure
|
||||
|
||||
PCRE is written in C and is normally compiled as a C library. However, it is
|
||||
possible to build it as a C++ library, though the provided building apparatus
|
||||
does not have any features to support this.
|
||||
|
||||
There are some optional features that can be included or omitted from the PCRE
|
||||
library. They are also documented in the pcrebuild man page.
|
||||
|
||||
. By default, both shared and static libraries are built. You can change this
|
||||
by adding one of these options to the "configure" command:
|
||||
|
||||
--disable-shared
|
||||
--disable-static
|
||||
|
||||
(See also "Shared libraries on Unix-like systems" below.)
|
||||
|
||||
. By default, only the 8-bit library is built. If you add --enable-pcre16 to
|
||||
the "configure" command, the 16-bit library is also built. If you add
|
||||
--enable-pcre32 to the "configure" command, the 32-bit library is also built.
|
||||
If you want only the 16-bit or 32-bit library, use --disable-pcre8 to disable
|
||||
building the 8-bit library.
|
||||
|
||||
. If you are building the 8-bit library and want to suppress the building of
|
||||
the C++ wrapper library, you can add --disable-cpp to the "configure"
|
||||
command. Otherwise, when "configure" is run without --disable-pcre8, it will
|
||||
try to find a C++ compiler and C++ header files, and if it succeeds, it will
|
||||
try to build the C++ wrapper.
|
||||
|
||||
. If you want to include support for just-in-time compiling, which can give
|
||||
large performance improvements on certain platforms, add --enable-jit to the
|
||||
"configure" command. This support is available only for certain hardware
|
||||
architectures. If you try to enable it on an unsupported architecture, there
|
||||
will be a compile time error.
|
||||
|
||||
. When JIT support is enabled, pcregrep automatically makes use of it, unless
|
||||
you add --disable-pcregrep-jit to the "configure" command.
|
||||
|
||||
. If you want to make use of the support for UTF-8 Unicode character strings in
|
||||
the 8-bit library, or UTF-16 Unicode character strings in the 16-bit library,
|
||||
or UTF-32 Unicode character strings in the 32-bit library, you must add
|
||||
--enable-utf to the "configure" command. Without it, the code for handling
|
||||
UTF-8, UTF-16 and UTF-8 is not included in the relevant library. Even
|
||||
when --enable-utf is included, the use of a UTF encoding still has to be
|
||||
enabled by an option at run time. When PCRE is compiled with this option, its
|
||||
input can only either be ASCII or UTF-8/16/32, even when running on EBCDIC
|
||||
platforms. It is not possible to use both --enable-utf and --enable-ebcdic at
|
||||
the same time.
|
||||
|
||||
. There are no separate options for enabling UTF-8, UTF-16 and UTF-32
|
||||
independently because that would allow ridiculous settings such as requesting
|
||||
UTF-16 support while building only the 8-bit library. However, the option
|
||||
--enable-utf8 is retained for backwards compatibility with earlier releases
|
||||
that did not support 16-bit or 32-bit character strings. It is synonymous with
|
||||
--enable-utf. It is not possible to configure one library with UTF support
|
||||
and the other without in the same configuration.
|
||||
|
||||
. If, in addition to support for UTF-8/16/32 character strings, you want to
|
||||
include support for the \P, \p, and \X sequences that recognize Unicode
|
||||
character properties, you must add --enable-unicode-properties to the
|
||||
"configure" command. This adds about 30K to the size of the library (in the
|
||||
form of a property table); only the basic two-letter properties such as Lu
|
||||
are supported.
|
||||
|
||||
. You can build PCRE to recognize either CR or LF or the sequence CRLF or any
|
||||
of the preceding, or any of the Unicode newline sequences as indicating the
|
||||
end of a line. Whatever you specify at build time is the default; the caller
|
||||
of PCRE can change the selection at run time. The default newline indicator
|
||||
is a single LF character (the Unix standard). You can specify the default
|
||||
newline indicator by adding --enable-newline-is-cr or --enable-newline-is-lf
|
||||
or --enable-newline-is-crlf or --enable-newline-is-anycrlf or
|
||||
--enable-newline-is-any to the "configure" command, respectively.
|
||||
|
||||
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
|
||||
the standard tests will fail, because the lines in the test files end with
|
||||
LF. Even if the files are edited to change the line endings, there are likely
|
||||
to be some failures. With --enable-newline-is-anycrlf or
|
||||
--enable-newline-is-any, many tests should succeed, but there may be some
|
||||
failures.
|
||||
|
||||
. By default, the sequence \R in a pattern matches any Unicode line ending
|
||||
sequence. This is independent of the option specifying what PCRE considers to
|
||||
be the end of a line (see above). However, the caller of PCRE can restrict \R
|
||||
to match only CR, LF, or CRLF. You can make this the default by adding
|
||||
--enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").
|
||||
|
||||
. When called via the POSIX interface, PCRE uses malloc() to get additional
|
||||
storage for processing capturing parentheses if there are more than 10 of
|
||||
them in a pattern. You can increase this threshold by setting, for example,
|
||||
|
||||
--with-posix-malloc-threshold=20
|
||||
|
||||
on the "configure" command.
|
||||
|
||||
. PCRE has a counter that limits the depth of nesting of parentheses in a
|
||||
pattern. This limits the amount of system stack that a pattern uses when it
|
||||
is compiled. The default is 250, but you can change it by setting, for
|
||||
example,
|
||||
|
||||
--with-parens-nest-limit=500
|
||||
|
||||
. PCRE has a counter that can be set to limit the amount of resources it uses
|
||||
when matching a pattern. If the limit is exceeded during a match, the match
|
||||
fails. The default is ten million. You can change the default by setting, for
|
||||
example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
on the "configure" command. This is just the default; individual calls to
|
||||
pcre_exec() can supply their own value. There is more discussion on the
|
||||
pcreapi man page.
|
||||
|
||||
. There is a separate counter that limits the depth of recursive function calls
|
||||
during a matching process. This also has a default of ten million, which is
|
||||
essentially "unlimited". You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit-recursion=500000
|
||||
|
||||
Recursive function calls use up the runtime stack; running out of stack can
|
||||
cause programs to crash in strange ways. There is a discussion about stack
|
||||
sizes in the pcrestack man page.
|
||||
|
||||
. The default maximum compiled pattern size is around 64K. You can increase
|
||||
this by adding --with-link-size=3 to the "configure" command. In the 8-bit
|
||||
library, PCRE then uses three bytes instead of two for offsets to different
|
||||
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
||||
the same as --with-link-size=4, which (in both libraries) uses four-byte
|
||||
offsets. Increasing the internal link size reduces performance. In the 32-bit
|
||||
library, the only supported link size is 4.
|
||||
|
||||
. You can build PCRE so that its internal match() function that is called from
|
||||
pcre_exec() does not call itself recursively. Instead, it uses memory blocks
|
||||
obtained from the heap via the special functions pcre_stack_malloc() and
|
||||
pcre_stack_free() to save data that would otherwise be saved on the stack. To
|
||||
build PCRE like this, use
|
||||
|
||||
--disable-stack-for-recursion
|
||||
|
||||
on the "configure" command. PCRE runs more slowly in this mode, but it may be
|
||||
necessary in environments with limited stack sizes. This applies only to the
|
||||
normal execution of the pcre_exec() function; if JIT support is being
|
||||
successfully used, it is not relevant. Equally, it does not apply to
|
||||
pcre_dfa_exec(), which does not use deeply nested recursion. There is a
|
||||
discussion about stack sizes in the pcrestack man page.
|
||||
|
||||
. For speed, PCRE uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. By default, it uses a set of
|
||||
tables for ASCII encoding that is part of the distribution. If you specify
|
||||
|
||||
--enable-rebuild-chartables
|
||||
|
||||
a program called dftables is compiled and run in the default C locale when
|
||||
you obey "make". It builds a source file called pcre_chartables.c. If you do
|
||||
not specify this option, pcre_chartables.c is created as a copy of
|
||||
pcre_chartables.c.dist. See "Character tables" below for further information.
|
||||
|
||||
. It is possible to compile PCRE for use on systems that use EBCDIC as their
|
||||
character code (as opposed to ASCII/Unicode) by specifying
|
||||
|
||||
--enable-ebcdic
|
||||
|
||||
This automatically implies --enable-rebuild-chartables (see above). However,
|
||||
when PCRE is built this way, it always operates in EBCDIC. It cannot support
|
||||
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
|
||||
which specifies that the code value for the EBCDIC NL character is 0x25
|
||||
instead of the default 0x15.
|
||||
|
||||
. In environments where valgrind is installed, if you specify
|
||||
|
||||
--enable-valgrind
|
||||
|
||||
PCRE will use valgrind annotations to mark certain memory regions as
|
||||
unaddressable. This allows it to detect invalid memory accesses, and is
|
||||
mostly useful for debugging PCRE itself.
|
||||
|
||||
. In environments where the gcc compiler is used and lcov version 1.6 or above
|
||||
is installed, if you specify
|
||||
|
||||
--enable-coverage
|
||||
|
||||
the build process implements a code coverage report for the test suite. The
|
||||
report is generated by running "make coverage". If ccache is installed on
|
||||
your system, it must be disabled when building PCRE for coverage reporting.
|
||||
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
||||
running "make" to build PCRE. There is more information about coverage
|
||||
reporting in the "pcrebuild" documentation.
|
||||
|
||||
. The pcregrep program currently supports only 8-bit data files, and so
|
||||
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
|
||||
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
|
||||
specifying one or both of
|
||||
|
||||
--enable-pcregrep-libz
|
||||
--enable-pcregrep-libbz2
|
||||
|
||||
Of course, the relevant libraries must be installed on your system.
|
||||
|
||||
. The default size (in bytes) of the internal buffer used by pcregrep can be
|
||||
set by, for example:
|
||||
|
||||
--with-pcregrep-bufsize=51200
|
||||
|
||||
The value must be a plain integer. The default is 20480.
|
||||
|
||||
. It is possible to compile pcretest so that it links with the libreadline
|
||||
or libedit libraries, by specifying, respectively,
|
||||
|
||||
--enable-pcretest-libreadline or --enable-pcretest-libedit
|
||||
|
||||
If this is done, when pcretest's input is from a terminal, it reads it using
|
||||
the readline() function. This provides line-editing and history facilities.
|
||||
Note that libreadline is GPL-licenced, so if you distribute a binary of
|
||||
pcretest linked in this way, there may be licensing issues. These can be
|
||||
avoided by linking with libedit (which has a BSD licence) instead.
|
||||
|
||||
Enabling libreadline causes the -lreadline option to be added to the pcretest
|
||||
build. In many operating environments with a sytem-installed readline
|
||||
library this is sufficient. However, in some environments (e.g. if an
|
||||
unmodified distribution version of readline is in use), it may be necessary
|
||||
to specify something like LIBS="-lncurses" as well. This is because, to quote
|
||||
the readline INSTALL, "Readline uses the termcap functions, but does not link
|
||||
with the termcap or curses library itself, allowing applications which link
|
||||
with readline the to choose an appropriate library." If you get error
|
||||
messages about missing functions tgetstr, tgetent, tputs, tgetflag, or tgoto,
|
||||
this is the problem, and linking with the ncurses library should fix it.
|
||||
|
||||
The "configure" script builds the following files for the basic C library:
|
||||
|
||||
. Makefile the makefile that builds the library
|
||||
. config.h build-time configuration options for the library
|
||||
. pcre.h the public PCRE header file
|
||||
. pcre-config script that shows the building settings such as CFLAGS
|
||||
that were set for "configure"
|
||||
. libpcre.pc ) data for the pkg-config command
|
||||
. libpcre16.pc )
|
||||
. libpcre32.pc )
|
||||
. libpcreposix.pc )
|
||||
. libtool script that builds shared and/or static libraries
|
||||
|
||||
Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
|
||||
names config.h.generic and pcre.h.generic. These are provided for those who
|
||||
have to built PCRE without using "configure" or CMake. If you use "configure"
|
||||
or CMake, the .generic versions are not used.
|
||||
|
||||
When building the 8-bit library, if a C++ compiler is found, the following
|
||||
files are also built:
|
||||
|
||||
. libpcrecpp.pc data for the pkg-config command
|
||||
. pcrecpparg.h header file for calling PCRE via the C++ wrapper
|
||||
. pcre_stringpiece.h header for the C++ "stringpiece" functions
|
||||
|
||||
The "configure" script also creates config.status, which is an executable
|
||||
script that can be run to recreate the configuration, and config.log, which
|
||||
contains compiler output from tests that "configure" runs.
|
||||
|
||||
Once "configure" has run, you can run "make". This builds the the libraries
|
||||
libpcre, libpcre16 and/or libpcre32, and a test program called pcretest. If you
|
||||
enabled JIT support with --enable-jit, a test program called pcre_jit_test is
|
||||
built as well.
|
||||
|
||||
If the 8-bit library is built, libpcreposix and the pcregrep command are also
|
||||
built, and if a C++ compiler was found on your system, and you did not disable
|
||||
it with --disable-cpp, "make" builds the C++ wrapper library, which is called
|
||||
libpcrecpp, as well as some test programs called pcrecpp_unittest,
|
||||
pcre_scanner_unittest, and pcre_stringpiece_unittest.
|
||||
|
||||
The command "make check" runs all the appropriate tests. Details of the PCRE
|
||||
tests are given below in a separate section of this document.
|
||||
|
||||
You can use "make install" to install PCRE into live directories on your
|
||||
system. The following are installed (file names are all relative to the
|
||||
<prefix> that is set when "configure" is run):
|
||||
|
||||
Commands (bin):
|
||||
pcretest
|
||||
pcregrep (if 8-bit support is enabled)
|
||||
pcre-config
|
||||
|
||||
Libraries (lib):
|
||||
libpcre16 (if 16-bit support is enabled)
|
||||
libpcre32 (if 32-bit support is enabled)
|
||||
libpcre (if 8-bit support is enabled)
|
||||
libpcreposix (if 8-bit support is enabled)
|
||||
libpcrecpp (if 8-bit and C++ support is enabled)
|
||||
|
||||
Configuration information (lib/pkgconfig):
|
||||
libpcre16.pc
|
||||
libpcre32.pc
|
||||
libpcre.pc
|
||||
libpcreposix.pc
|
||||
libpcrecpp.pc (if C++ support is enabled)
|
||||
|
||||
Header files (include):
|
||||
pcre.h
|
||||
pcreposix.h
|
||||
pcre_scanner.h )
|
||||
pcre_stringpiece.h ) if C++ support is enabled
|
||||
pcrecpp.h )
|
||||
pcrecpparg.h )
|
||||
|
||||
Man pages (share/man/man{1,3}):
|
||||
pcregrep.1
|
||||
pcretest.1
|
||||
pcre-config.1
|
||||
pcre.3
|
||||
pcre*.3 (lots more pages, all starting "pcre")
|
||||
|
||||
HTML documentation (share/doc/pcre/html):
|
||||
index.html
|
||||
*.html (lots more pages, hyperlinked from index.html)
|
||||
|
||||
Text file documentation (share/doc/pcre):
|
||||
AUTHORS
|
||||
COPYING
|
||||
ChangeLog
|
||||
LICENCE
|
||||
NEWS
|
||||
README
|
||||
pcre.txt (a concatenation of the man(3) pages)
|
||||
pcretest.txt the pcretest man page
|
||||
pcregrep.txt the pcregrep man page
|
||||
pcre-config.txt the pcre-config man page
|
||||
|
||||
If you want to remove PCRE from your system, you can run "make uninstall".
|
||||
This removes all the files that "make install" installed. However, it does not
|
||||
remove any directories, because these are often shared with other programs.
|
||||
|
||||
|
||||
Retrieving configuration information
|
||||
------------------------------------
|
||||
|
||||
Running "make install" installs the command pcre-config, which can be used to
|
||||
recall information about the PCRE configuration and installation. For example:
|
||||
|
||||
pcre-config --version
|
||||
|
||||
prints the version number, and
|
||||
|
||||
pcre-config --libs
|
||||
|
||||
outputs information about where the library is installed. This command can be
|
||||
included in makefiles for programs that use PCRE, saving the programmer from
|
||||
having to remember too many details.
|
||||
|
||||
The pkg-config command is another system for saving and retrieving information
|
||||
about installed libraries. Instead of separate commands for each library, a
|
||||
single command is used. For example:
|
||||
|
||||
pkg-config --cflags pcre
|
||||
|
||||
The data is held in *.pc files that are installed in a directory called
|
||||
<prefix>/lib/pkgconfig.
|
||||
|
||||
|
||||
Shared libraries
|
||||
----------------
|
||||
|
||||
The default distribution builds PCRE as shared libraries and static libraries,
|
||||
as long as the operating system supports shared libraries. Shared library
|
||||
support relies on the "libtool" script which is built as part of the
|
||||
"configure" process.
|
||||
|
||||
The libtool script is used to compile and link both shared and static
|
||||
libraries. They are placed in a subdirectory called .libs when they are newly
|
||||
built. The programs pcretest and pcregrep are built to use these uninstalled
|
||||
libraries (by means of wrapper scripts in the case of shared libraries). When
|
||||
you use "make install" to install shared libraries, pcregrep and pcretest are
|
||||
automatically re-built to use the newly installed shared libraries before being
|
||||
installed themselves. However, the versions left in the build directory still
|
||||
use the uninstalled libraries.
|
||||
|
||||
To build PCRE using static libraries only you must use --disable-shared when
|
||||
configuring it. For example:
|
||||
|
||||
./configure --prefix=/usr/gnu --disable-shared
|
||||
|
||||
Then run "make" in the usual way. Similarly, you can use --disable-static to
|
||||
build only shared libraries.
|
||||
|
||||
|
||||
Cross-compiling using autotools
|
||||
-------------------------------
|
||||
|
||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||
order to cross-compile PCRE for some other host. However, you should NOT
|
||||
specify --enable-rebuild-chartables, because if you do, the dftables.c source
|
||||
file is compiled and run on the local host, in order to generate the inbuilt
|
||||
character tables (the pcre_chartables.c file). This will probably not work,
|
||||
because dftables.c needs to be compiled with the local compiler, not the cross
|
||||
compiler.
|
||||
|
||||
When --enable-rebuild-chartables is not specified, pcre_chartables.c is created
|
||||
by making a copy of pcre_chartables.c.dist, which is a default set of tables
|
||||
that assumes ASCII code. Cross-compiling with the default tables should not be
|
||||
a problem.
|
||||
|
||||
If you need to modify the character tables when cross-compiling, you should
|
||||
move pcre_chartables.c.dist out of the way, then compile dftables.c by hand and
|
||||
run it on the local host to make a new version of pcre_chartables.c.dist.
|
||||
Then when you cross-compile PCRE this new version of the tables will be used.
|
||||
|
||||
|
||||
Using HP's ANSI C++ compiler (aCC)
|
||||
----------------------------------
|
||||
|
||||
Unless C++ support is disabled by specifying the "--disable-cpp" option of the
|
||||
"configure" script, you must include the "-AA" option in the CXXFLAGS
|
||||
environment variable in order for the C++ components to compile correctly.
|
||||
|
||||
Also, note that the aCC compiler on PA-RISC platforms may have a defect whereby
|
||||
needed libraries fail to get included when specifying the "-AA" compiler
|
||||
option. If you experience unresolved symbols when linking the C++ programs,
|
||||
use the workaround of specifying the following environment variable prior to
|
||||
running the "configure" script:
|
||||
|
||||
CXXLDFLAGS="-lstd_v2 -lCsup_v2"
|
||||
|
||||
|
||||
Compiling in Tru64 using native compilers
|
||||
-----------------------------------------
|
||||
|
||||
The following error may occur when compiling with native compilers in the Tru64
|
||||
operating system:
|
||||
|
||||
CXX libpcrecpp_la-pcrecpp.lo
|
||||
cxx: Error: /usr/lib/cmplrs/cxx/V7.1-006/include/cxx/iosfwd, line 58: #error
|
||||
directive: "cannot include iosfwd -- define __USE_STD_IOSTREAM to
|
||||
override default - see section 7.1.2 of the C++ Using Guide"
|
||||
#error "cannot include iosfwd -- define __USE_STD_IOSTREAM to override default
|
||||
- see section 7.1.2 of the C++ Using Guide"
|
||||
|
||||
This may be followed by other errors, complaining that 'namespace "std" has no
|
||||
member'. The solution to this is to add the line
|
||||
|
||||
#define __USE_STD_IOSTREAM 1
|
||||
|
||||
to the config.h file.
|
||||
|
||||
|
||||
Using Sun's compilers for Solaris
|
||||
---------------------------------
|
||||
|
||||
A user reports that the following configurations work on Solaris 9 sparcv9 and
|
||||
Solaris 9 x86 (32-bit):
|
||||
|
||||
Solaris 9 sparcv9: ./configure --disable-cpp CC=/bin/cc CFLAGS="-m64 -g"
|
||||
Solaris 9 x86: ./configure --disable-cpp CC=/bin/cc CFLAGS="-g"
|
||||
|
||||
|
||||
Using PCRE from MySQL
|
||||
---------------------
|
||||
|
||||
On systems where both PCRE and MySQL are installed, it is possible to make use
|
||||
of PCRE from within MySQL, as an alternative to the built-in pattern matching.
|
||||
There is a web page that tells you how to do this:
|
||||
|
||||
http://www.mysqludf.org/lib_mysqludf_preg/index.php
|
||||
|
||||
|
||||
Making new tarballs
|
||||
-------------------
|
||||
|
||||
The command "make dist" creates three PCRE tarballs, in tar.gz, tar.bz2, and
|
||||
zip formats. The command "make distcheck" does the same, but then does a trial
|
||||
build of the new distribution to ensure that it works.
|
||||
|
||||
If you have modified any of the man page sources in the doc directory, you
|
||||
should first run the PrepareRelease script before making a distribution. This
|
||||
script creates the .txt and HTML forms of the documentation from the man pages.
|
||||
|
||||
|
||||
Testing PCRE
|
||||
------------
|
||||
|
||||
To test the basic PCRE library on a Unix-like system, run the RunTest script.
|
||||
There is another script called RunGrepTest that tests the options of the
|
||||
pcregrep command. If the C++ wrapper library is built, three test programs
|
||||
called pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest
|
||||
are also built. When JIT support is enabled, another test program called
|
||||
pcre_jit_test is built.
|
||||
|
||||
Both the scripts and all the program tests are run if you obey "make check" or
|
||||
"make test". For other environments, see the instructions in
|
||||
NON-AUTOTOOLS-BUILD.
|
||||
|
||||
The RunTest script runs the pcretest test program (which is documented in its
|
||||
own man page) on each of the relevant testinput files in the testdata
|
||||
directory, and compares the output with the contents of the corresponding
|
||||
testoutput files. RunTest uses a file called testtry to hold the main output
|
||||
from pcretest. Other files whose names begin with "test" are used as working
|
||||
files in some tests.
|
||||
|
||||
Some tests are relevant only when certain build-time options were selected. For
|
||||
example, the tests for UTF-8/16/32 support are run only if --enable-utf was
|
||||
used. RunTest outputs a comment when it skips a test.
|
||||
|
||||
Many of the tests that are not skipped are run up to three times. The second
|
||||
run forces pcre_study() to be called for all patterns except for a few in some
|
||||
tests that are marked "never study" (see the pcretest program for how this is
|
||||
done). If JIT support is available, the non-DFA tests are run a third time,
|
||||
this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option.
|
||||
This testing can be suppressed by putting "nojit" on the RunTest command line.
|
||||
|
||||
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
|
||||
libraries that are enabled. If you want to run just one set of tests, call
|
||||
RunTest with either the -8, -16 or -32 option.
|
||||
|
||||
If valgrind is installed, you can run the tests under it by putting "valgrind"
|
||||
on the RunTest command line. To run pcretest on just one or more specific test
|
||||
files, give their numbers as arguments to RunTest, for example:
|
||||
|
||||
RunTest 2 7 11
|
||||
|
||||
You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the
|
||||
end), or a number preceded by ~ to exclude a test. For example:
|
||||
|
||||
Runtest 3-15 ~10
|
||||
|
||||
This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests
|
||||
except test 13. Whatever order the arguments are in, the tests are always run
|
||||
in numerical order.
|
||||
|
||||
You can also call RunTest with the single argument "list" to cause it to output
|
||||
a list of tests.
|
||||
|
||||
The first test file can be fed directly into the perltest.pl script to check
|
||||
that Perl gives the same results. The only difference you should see is in the
|
||||
first few lines, where the Perl version is given instead of the PCRE version.
|
||||
|
||||
The second set of tests check pcre_fullinfo(), pcre_study(),
|
||||
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
|
||||
detection, and run-time flags that are specific to PCRE, as well as the POSIX
|
||||
wrapper API. It also uses the debugging flags to check some of the internals of
|
||||
pcre_compile().
|
||||
|
||||
If you build PCRE with a locale setting that is not the standard C locale, the
|
||||
character tables may be different (see next paragraph). In some cases, this may
|
||||
cause failures in the second set of tests. For example, in a locale where the
|
||||
isprint() function yields TRUE for characters in the range 128-255, the use of
|
||||
[:isascii:] inside a character class defines a different set of characters, and
|
||||
this shows up in this test as a difference in the compiled code, which is being
|
||||
listed for checking. Where the comparison test output contains [\x00-\x7f] the
|
||||
test will contain [\x00-\xff], and similarly in some other cases. This is not a
|
||||
bug in PCRE.
|
||||
|
||||
The third set of tests checks pcre_maketables(), the facility for building a
|
||||
set of character tables for a specific locale and using them instead of the
|
||||
default tables. The tests make use of the "fr_FR" (French) locale. Before
|
||||
running the test, the script checks for the presence of this locale by running
|
||||
the "locale" command. If that command fails, or if it doesn't include "fr_FR"
|
||||
in the list of available locales, the third test cannot be run, and a comment
|
||||
is output to say why. If running this test produces instances of the error
|
||||
|
||||
** Failed to set locale "fr_FR"
|
||||
|
||||
in the comparison output, it means that locale is not available on your system,
|
||||
despite being listed by "locale". This does not mean that PCRE is broken.
|
||||
|
||||
[If you are trying to run this test on Windows, you may be able to get it to
|
||||
work by changing "fr_FR" to "french" everywhere it occurs. Alternatively, use
|
||||
RunTest.bat. The version of RunTest.bat included with PCRE 7.4 and above uses
|
||||
Windows versions of test 2. More info on using RunTest.bat is included in the
|
||||
document entitled NON-UNIX-USE.]
|
||||
|
||||
The fourth and fifth tests check the UTF-8/16/32 support and error handling and
|
||||
internal UTF features of PCRE that are not relevant to Perl, respectively. The
|
||||
sixth and seventh tests do the same for Unicode character properties support.
|
||||
|
||||
The eighth, ninth, and tenth tests check the pcre_dfa_exec() alternative
|
||||
matching function, in non-UTF-8/16/32 mode, UTF-8/16/32 mode, and UTF-8/16/32
|
||||
mode with Unicode property support, respectively.
|
||||
|
||||
The eleventh test checks some internal offsets and code size features; it is
|
||||
run only when the default "link size" of 2 is set (in other cases the sizes
|
||||
change) and when Unicode property support is enabled.
|
||||
|
||||
The twelfth test is run only when JIT support is available, and the thirteenth
|
||||
test is run only when JIT support is not available. They test some JIT-specific
|
||||
features such as information output from pcretest about JIT compilation.
|
||||
|
||||
The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and
|
||||
the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit
|
||||
mode. These are tests that generate different output in the two modes. They are
|
||||
for general cases, UTF-8/16/32 support, and Unicode property support,
|
||||
respectively.
|
||||
|
||||
The twentieth test is run only in 16/32-bit mode. It tests some specific
|
||||
16/32-bit features of the DFA matching engine.
|
||||
|
||||
The twenty-first and twenty-second tests are run only in 16/32-bit mode, when
|
||||
the link size is set to 2 for the 16-bit library. They test reloading
|
||||
pre-compiled patterns.
|
||||
|
||||
The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are
|
||||
for general cases, and UTF-16 support, respectively.
|
||||
|
||||
The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are
|
||||
for general cases, and UTF-32 support, respectively.
|
||||
|
||||
|
||||
Character tables
|
||||
----------------
|
||||
|
||||
For speed, PCRE uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. The final argument of the
|
||||
pcre_compile() function is a pointer to a block of memory containing the
|
||||
concatenated tables. A call to pcre_maketables() can be used to generate a set
|
||||
of tables in the current locale. If the final argument for pcre_compile() is
|
||||
passed as NULL, a set of default tables that is built into the binary is used.
|
||||
|
||||
The source file called pcre_chartables.c contains the default set of tables. By
|
||||
default, this is created as a copy of pcre_chartables.c.dist, which contains
|
||||
tables for ASCII coding. However, if --enable-rebuild-chartables is specified
|
||||
for ./configure, a different version of pcre_chartables.c is built by the
|
||||
program dftables (compiled from dftables.c), which uses the ANSI C character
|
||||
handling functions such as isalnum(), isalpha(), isupper(), islower(), etc. to
|
||||
build the table sources. This means that the default C locale which is set for
|
||||
your system will control the contents of these default tables. You can change
|
||||
the default tables by editing pcre_chartables.c and then re-building PCRE. If
|
||||
you do this, you should take care to ensure that the file does not get
|
||||
automatically re-generated. The best way to do this is to move
|
||||
pcre_chartables.c.dist out of the way and replace it with your customized
|
||||
tables.
|
||||
|
||||
When the dftables program is run as a result of --enable-rebuild-chartables,
|
||||
it uses the default C locale that is set on your system. It does not pay
|
||||
attention to the LC_xxx environment variables. In other words, it uses the
|
||||
system's default locale rather than whatever the compiling user happens to have
|
||||
set. If you really do want to build a source set of character tables in a
|
||||
locale that is specified by the LC_xxx variables, you can run the dftables
|
||||
program by hand with the -L option. For example:
|
||||
|
||||
./dftables -L pcre_chartables.c.special
|
||||
|
||||
The first two 256-byte tables provide lower casing and case flipping functions,
|
||||
respectively. The next table consists of three 32-byte bit maps which identify
|
||||
digits, "word" characters, and white space, respectively. These are used when
|
||||
building 32-byte bit maps that represent character classes for code points less
|
||||
than 256.
|
||||
|
||||
The final 256-byte table has bits indicating various character types, as
|
||||
follows:
|
||||
|
||||
1 white space character
|
||||
2 letter
|
||||
4 decimal digit
|
||||
8 hexadecimal digit
|
||||
16 alphanumeric or '_'
|
||||
128 regular expression metacharacter or binary zero
|
||||
|
||||
You should not alter the set of characters that contain the 128 bit, as that
|
||||
will cause PCRE to malfunction.
|
||||
|
||||
|
||||
File manifest
|
||||
-------------
|
||||
|
||||
The distribution should contain the files listed below. Where a file name is
|
||||
given as pcre[16|32]_xxx it means that there are three files, one with the name
|
||||
pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
|
||||
|
||||
(A) Source files of the PCRE library functions and their headers:
|
||||
|
||||
dftables.c auxiliary program for building pcre_chartables.c
|
||||
when --enable-rebuild-chartables is specified
|
||||
|
||||
pcre_chartables.c.dist a default set of character tables that assume ASCII
|
||||
coding; used, unless --enable-rebuild-chartables is
|
||||
specified, by copying to pcre[16]_chartables.c
|
||||
|
||||
pcreposix.c )
|
||||
pcre[16|32]_byte_order.c )
|
||||
pcre[16|32]_compile.c )
|
||||
pcre[16|32]_config.c )
|
||||
pcre[16|32]_dfa_exec.c )
|
||||
pcre[16|32]_exec.c )
|
||||
pcre[16|32]_fullinfo.c )
|
||||
pcre[16|32]_get.c ) sources for the functions in the library,
|
||||
pcre[16|32]_globals.c ) and some internal functions that they use
|
||||
pcre[16|32]_jit_compile.c )
|
||||
pcre[16|32]_maketables.c )
|
||||
pcre[16|32]_newline.c )
|
||||
pcre[16|32]_refcount.c )
|
||||
pcre[16|32]_string_utils.c )
|
||||
pcre[16|32]_study.c )
|
||||
pcre[16|32]_tables.c )
|
||||
pcre[16|32]_ucd.c )
|
||||
pcre[16|32]_version.c )
|
||||
pcre[16|32]_xclass.c )
|
||||
pcre_ord2utf8.c )
|
||||
pcre_valid_utf8.c )
|
||||
pcre16_ord2utf16.c )
|
||||
pcre16_utf16_utils.c )
|
||||
pcre16_valid_utf16.c )
|
||||
pcre32_utf32_utils.c )
|
||||
pcre32_valid_utf32.c )
|
||||
|
||||
pcre[16|32]_printint.c ) debugging function that is used by pcretest,
|
||||
) and can also be #included in pcre_compile()
|
||||
|
||||
pcre.h.in template for pcre.h when built by "configure"
|
||||
pcreposix.h header for the external POSIX wrapper API
|
||||
pcre_internal.h header for internal use
|
||||
sljit/* 16 files that make up the JIT compiler
|
||||
ucp.h header for Unicode property handling
|
||||
|
||||
config.h.in template for config.h, which is built by "configure"
|
||||
|
||||
pcrecpp.h public header file for the C++ wrapper
|
||||
pcrecpparg.h.in template for another C++ header file
|
||||
pcre_scanner.h public header file for C++ scanner functions
|
||||
pcrecpp.cc )
|
||||
pcre_scanner.cc ) source for the C++ wrapper library
|
||||
|
||||
pcre_stringpiece.h.in template for pcre_stringpiece.h, the header for the
|
||||
C++ stringpiece functions
|
||||
pcre_stringpiece.cc source for the C++ stringpiece functions
|
||||
|
||||
(B) Source files for programs that use PCRE:
|
||||
|
||||
pcredemo.c simple demonstration of coding calls to PCRE
|
||||
pcregrep.c source of a grep utility that uses PCRE
|
||||
pcretest.c comprehensive test program
|
||||
|
||||
(C) Auxiliary files:
|
||||
|
||||
132html script to turn "man" pages into HTML
|
||||
AUTHORS information about the author of PCRE
|
||||
ChangeLog log of changes to the code
|
||||
CleanTxt script to clean nroff output for txt man pages
|
||||
Detrail script to remove trailing spaces
|
||||
HACKING some notes about the internals of PCRE
|
||||
INSTALL generic installation instructions
|
||||
LICENCE conditions for the use of PCRE
|
||||
COPYING the same, using GNU's standard name
|
||||
Makefile.in ) template for Unix Makefile, which is built by
|
||||
) "configure"
|
||||
Makefile.am ) the automake input that was used to create
|
||||
) Makefile.in
|
||||
NEWS important changes in this release
|
||||
NON-UNIX-USE the previous name for NON-AUTOTOOLS-BUILD
|
||||
NON-AUTOTOOLS-BUILD notes on building PCRE without using autotools
|
||||
PrepareRelease script to make preparations for "make dist"
|
||||
README this file
|
||||
RunTest a Unix shell script for running tests
|
||||
RunGrepTest a Unix shell script for pcregrep tests
|
||||
aclocal.m4 m4 macros (generated by "aclocal")
|
||||
config.guess ) files used by libtool,
|
||||
config.sub ) used only when building a shared library
|
||||
configure a configuring shell script (built by autoconf)
|
||||
configure.ac ) the autoconf input that was used to build
|
||||
) "configure" and config.h
|
||||
depcomp ) script to find program dependencies, generated by
|
||||
) automake
|
||||
doc/*.3 man page sources for PCRE
|
||||
doc/*.1 man page sources for pcregrep and pcretest
|
||||
doc/index.html.src the base HTML page
|
||||
doc/html/* HTML documentation
|
||||
doc/pcre.txt plain text version of the man pages
|
||||
doc/pcretest.txt plain text documentation of test program
|
||||
doc/perltest.txt plain text documentation of Perl test program
|
||||
install-sh a shell script for installing files
|
||||
libpcre16.pc.in template for libpcre16.pc for pkg-config
|
||||
libpcre32.pc.in template for libpcre32.pc for pkg-config
|
||||
libpcre.pc.in template for libpcre.pc for pkg-config
|
||||
libpcreposix.pc.in template for libpcreposix.pc for pkg-config
|
||||
libpcrecpp.pc.in template for libpcrecpp.pc for pkg-config
|
||||
ltmain.sh file used to build a libtool script
|
||||
missing ) common stub for a few missing GNU programs while
|
||||
) installing, generated by automake
|
||||
mkinstalldirs script for making install directories
|
||||
perltest.pl Perl test program
|
||||
pcre-config.in source of script which retains PCRE information
|
||||
pcre_jit_test.c test program for the JIT compiler
|
||||
pcrecpp_unittest.cc )
|
||||
pcre_scanner_unittest.cc ) test programs for the C++ wrapper
|
||||
pcre_stringpiece_unittest.cc )
|
||||
testdata/testinput* test data for main library tests
|
||||
testdata/testoutput* expected test results
|
||||
testdata/grep* input and output for pcregrep tests
|
||||
testdata/* other supporting test files
|
||||
|
||||
(D) Auxiliary files for cmake support
|
||||
|
||||
cmake/COPYING-CMAKE-SCRIPTS
|
||||
cmake/FindPackageHandleStandardArgs.cmake
|
||||
cmake/FindEditline.cmake
|
||||
cmake/FindReadline.cmake
|
||||
CMakeLists.txt
|
||||
config-cmake.h.in
|
||||
|
||||
(E) Auxiliary files for VPASCAL
|
||||
|
||||
makevp.bat
|
||||
makevp_c.txt
|
||||
makevp_l.txt
|
||||
pcregexp.pas
|
||||
|
||||
(F) Auxiliary files for building PCRE "by hand"
|
||||
|
||||
pcre.h.generic ) a version of the public PCRE header file
|
||||
) for use in non-"configure" environments
|
||||
config.h.generic ) a version of config.h for use in non-"configure"
|
||||
) environments
|
||||
|
||||
(F) Miscellaneous
|
||||
|
||||
RunTest.bat a script for running tests under Windows
|
||||
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 17 January 2014
|
|
@ -11,27 +11,29 @@
|
|||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>Perl-compatible Regular Expressions (PCRE)</h1>
|
||||
<p>
|
||||
The HTML documentation for PCRE comprises the following pages:
|
||||
The HTML documentation for PCRE consists of a number of pages that are listed
|
||||
below in alphabetical order. If you are new to PCRE, please read the first one
|
||||
first.
|
||||
</p>
|
||||
|
||||
<table>
|
||||
<tr><td><a href="pcre.html">pcre</a></td>
|
||||
<td> Introductory page</td></tr>
|
||||
|
||||
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
||||
<td> Information about the installation configuration</td></tr>
|
||||
|
||||
<tr><td><a href="pcre16.html">pcre16</a></td>
|
||||
<td> Discussion of the 16-bit PCRE library</td></tr>
|
||||
|
||||
<tr><td><a href="pcre32.html">pcre32</a></td>
|
||||
<td> Discussion of the 32-bit PCRE library</td></tr>
|
||||
|
||||
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
||||
<td> Information about the installation configuration</td></tr>
|
||||
|
||||
<tr><td><a href="pcreapi.html">pcreapi</a></td>
|
||||
<td> PCRE's native API</td></tr>
|
||||
|
||||
<tr><td><a href="pcrebuild.html">pcrebuild</a></td>
|
||||
<td> Options for building PCRE</td></tr>
|
||||
<td> Building PCRE</td></tr>
|
||||
|
||||
<tr><td><a href="pcrecallout.html">pcrecallout</a></td>
|
||||
<td> The <i>callout</i> facility</td></tr>
|
||||
|
@ -67,7 +69,7 @@ The HTML documentation for PCRE comprises the following pages:
|
|||
<td> Some comments on performance</td></tr>
|
||||
|
||||
<tr><td><a href="pcreposix.html">pcreposix</a></td>
|
||||
<td> The POSIX API to the PCRE library</td></tr>
|
||||
<td> The POSIX API to the PCRE 8-bit library</td></tr>
|
||||
|
||||
<tr><td><a href="pcreprecompile.html">pcreprecompile</a></td>
|
||||
<td> How to save and re-use compiled patterns</td></tr>
|
||||
|
@ -118,13 +120,13 @@ functions.
|
|||
<td> Match a compiled pattern to a subject string
|
||||
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
|
||||
<td> Free study data</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
|
||||
<td> Match a compiled pattern to a subject string
|
||||
(Perl compatible)</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
|
||||
<td> Free study data</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_free_substring.html">pcre_free_substring</a></td>
|
||||
<td> Free extracted substring</td></tr>
|
||||
|
||||
|
@ -140,14 +142,17 @@ functions.
|
|||
<tr><td><a href="pcre_get_stringnumber.html">pcre_get_stringnumber</a></td>
|
||||
<td> Convert captured string name to number</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_stringtable_entries.html">pcre_get_stringtable_entries</a></td>
|
||||
<td> Find table entries for given string name</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_substring.html">pcre_get_substring</a></td>
|
||||
<td> Extract numbered substring into new memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_substring_list.html">pcre_get_substring_list</a></td>
|
||||
<td> Extract all substrings into new memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_info.html">pcre_info</a></td>
|
||||
<td> Obsolete information extraction function</td></tr>
|
||||
<tr><td><a href="pcre_jit_exec.html">pcre_jit_exec</a></td>
|
||||
<td> Fast path interface to JIT matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
|
||||
<td> Create a stack for JIT matching</td></tr>
|
||||
|
|
|
@ -23,8 +23,8 @@ man page, in case the conversion went wrong.
|
|||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||
<P>
|
||||
<b>pcre-config [--prefix] [--exec-prefix] [--version] [--libs]</b>
|
||||
<b>[--libs16] [--libs32] [--libs-cpp] [--libs-posix]</b>
|
||||
<b>[--cflags] [--cflags-posix]</b>
|
||||
<b> [--libs16] [--libs32] [--libs-cpp] [--libs-posix]</b>
|
||||
<b> [--cflags] [--cflags-posix]</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
|
|
|
@ -38,9 +38,9 @@ Herczeg.
|
|||
</P>
|
||||
<P>
|
||||
Starting with release 8.32 it is possible to compile a third separate PCRE
|
||||
library, which supports 32-bit character strings (including
|
||||
UTF-32 strings). The build process allows any set of the 8-, 16- and 32-bit
|
||||
libraries. The work to make this possible was done by Christian Persch.
|
||||
library that supports 32-bit character strings (including UTF-32 strings). The
|
||||
build process allows any combination of the 8-, 16- and 32-bit libraries. The
|
||||
work to make this possible was done by Christian Persch.
|
||||
</P>
|
||||
<P>
|
||||
The three libraries contain identical sets of functions, except that the names
|
||||
|
@ -62,7 +62,7 @@ The current implementation of PCRE corresponds approximately with Perl 5.12,
|
|||
including support for UTF-8/16/32 encoded strings and Unicode general category
|
||||
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
||||
enabled; it is not the default. The Unicode tables correspond to Unicode
|
||||
release 6.2.0.
|
||||
release 6.3.0.
|
||||
</P>
|
||||
<P>
|
||||
In addition to the Perl-compatible matching function, PCRE contains an
|
||||
|
@ -100,8 +100,11 @@ function makes it possible for a client to discover which features are
|
|||
available. The features themselves are described in the
|
||||
<a href="pcrebuild.html"><b>pcrebuild</b></a>
|
||||
page. Documentation about building PCRE for various operating systems can be
|
||||
found in the <b>README</b> and <b>NON-AUTOTOOLS_BUILD</b> files in the source
|
||||
distribution.
|
||||
found in the
|
||||
<a href="README.txt"><b>README</b></a>
|
||||
and
|
||||
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS_BUILD</b></a>
|
||||
files in the source distribution.
|
||||
</P>
|
||||
<P>
|
||||
The libraries contains a number of undocumented internal functions and data
|
||||
|
@ -126,8 +129,11 @@ use sufficiently many resources as to cause your application to lose
|
|||
performance.
|
||||
</P>
|
||||
<P>
|
||||
The best way of guarding against this possibility is to use the
|
||||
One way of guarding against this possibility is to use the
|
||||
<b>pcre_fullinfo()</b> function to check the compiled pattern's options for UTF.
|
||||
Alternatively, from release 8.33, you can set the PCRE_NEVER_UTF option at
|
||||
compile time. This causes an compile time error if a pattern contains a
|
||||
UTF-setting sequence.
|
||||
</P>
|
||||
<P>
|
||||
If your application is one that supports UTF, be aware that validity checking
|
||||
|
@ -148,15 +154,18 @@ page.
|
|||
The user documentation for PCRE comprises a number of different sections. In
|
||||
the "man" format, each of these is a separate "man page". In the HTML format,
|
||||
each is a separate page, linked from the index page. In the plain text format,
|
||||
all the sections, except the <b>pcredemo</b> section, are concatenated, for ease
|
||||
of searching. The sections are as follows:
|
||||
the descriptions of the <b>pcregrep</b> and <b>pcretest</b> programs are in files
|
||||
called <b>pcregrep.txt</b> and <b>pcretest.txt</b>, respectively. The remaining
|
||||
sections, except for the <b>pcredemo</b> section (which is a program listing),
|
||||
are concatenated in <b>pcre.txt</b>, for ease of searching. The sections are as
|
||||
follows:
|
||||
<pre>
|
||||
pcre this document
|
||||
pcre-config show PCRE installation configuration information
|
||||
pcre16 details of the 16-bit library
|
||||
pcre32 details of the 32-bit library
|
||||
pcre-config show PCRE installation configuration information
|
||||
pcreapi details of PCRE's native C API
|
||||
pcrebuild options for building PCRE
|
||||
pcrebuild building PCRE
|
||||
pcrecallout details of the callout feature
|
||||
pcrecompat discussion of Perl compatibility
|
||||
pcrecpp details of the C++ wrapper for the 8-bit library
|
||||
|
@ -176,8 +185,8 @@ of searching. The sections are as follows:
|
|||
pcretest description of the <b>pcretest</b> testing command
|
||||
pcreunicode discussion of Unicode and UTF-8/16/32 support
|
||||
</pre>
|
||||
In addition, in the "man" and HTML formats, there is a short page for each
|
||||
C library function, listing its arguments and results.
|
||||
In the "man" and HTML formats, there is also a short page for each C library
|
||||
function, listing its arguments and results.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
|
@ -195,9 +204,9 @@ two digits 10, at the domain cam.ac.uk.
|
|||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 11 November 2012
|
||||
Last updated: 08 January 2014
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
|
|
@ -42,126 +42,126 @@ man page, in case the conversion went wrong.
|
|||
<br><a name="SEC1" href="#TOC1">PCRE 16-BIT API BASIC FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>int *<i>errorcodeptr</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>errorcodeptr</i>,</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre16_free_study(pcre16_extra *<i>extra</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">PCRE 16-BIT API STRING EXTRACTION FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b>PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b> PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||
<b>int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||
<b> int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>name</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b>" PCRE_SPTR16 <i>name</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre16_free_substring(PCRE_SPTR16 <i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre16_free_substring_list(PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">PCRE 16-BIT API AUXILIARY FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre16_jit_stack_free(pcre16_jit_stack *<i>stack</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
||||
<b>pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>const unsigned char *pcre16_maketables(void);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_refcount(pcre16 *<i>code</i>, int <i>adjust</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>const char *pcre16_version(void);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
||||
<b>pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
<b> pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">PCRE 16-BIT API INDIRECTED FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>void *(*pcre16_malloc)(size_t);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void (*pcre16_free)(void *);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void *(*pcre16_stack_malloc)(size_t);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void (*pcre16_stack_free)(void *);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int (*pcre16_callout)(pcre16_callout_block *);</b>
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">PCRE 16-BIT API 16-BIT-ONLY FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
|
||||
<b>int <i>keep_boms</i>);</b>
|
||||
<b> PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
|
||||
<b> int <i>keep_boms</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">THE PCRE 16-BIT LIBRARY</a><br>
|
||||
<P>
|
||||
|
@ -259,8 +259,9 @@ buffer, including the zero terminator if the string was zero-terminated.
|
|||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">SUBJECT STRING OFFSETS</a><br>
|
||||
<P>
|
||||
The offsets within subject strings that are returned by the matching functions
|
||||
are in 16-bit units rather than bytes.
|
||||
The lengths and starting offsets of subject strings must be specified in 16-bit
|
||||
data units, and the offsets within subject strings that are returned by the
|
||||
matching functions are in also 16-bit units rather than bytes.
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">NAMED SUBPATTERNS</a><br>
|
||||
<P>
|
||||
|
@ -374,9 +375,9 @@ Cambridge CB2 3QH, England.
|
|||
</P>
|
||||
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 08 November 2012
|
||||
Last updated: 12 May 2013
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
|
382
tools/pcre/doc/html/pcre32.html
Normal file
382
tools/pcre/doc/html/pcre32.html
Normal file
|
@ -0,0 +1,382 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre32 specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre32 man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">PCRE 32-BIT API BASIC FUNCTIONS</a>
|
||||
<li><a name="TOC2" href="#SEC2">PCRE 32-BIT API STRING EXTRACTION FUNCTIONS</a>
|
||||
<li><a name="TOC3" href="#SEC3">PCRE 32-BIT API AUXILIARY FUNCTIONS</a>
|
||||
<li><a name="TOC4" href="#SEC4">PCRE 32-BIT API INDIRECTED FUNCTIONS</a>
|
||||
<li><a name="TOC5" href="#SEC5">PCRE 32-BIT API 32-BIT-ONLY FUNCTION</a>
|
||||
<li><a name="TOC6" href="#SEC6">THE PCRE 32-BIT LIBRARY</a>
|
||||
<li><a name="TOC7" href="#SEC7">THE HEADER FILE</a>
|
||||
<li><a name="TOC8" href="#SEC8">THE LIBRARY NAME</a>
|
||||
<li><a name="TOC9" href="#SEC9">STRING TYPES</a>
|
||||
<li><a name="TOC10" href="#SEC10">STRUCTURE TYPES</a>
|
||||
<li><a name="TOC11" href="#SEC11">32-BIT FUNCTIONS</a>
|
||||
<li><a name="TOC12" href="#SEC12">SUBJECT STRING OFFSETS</a>
|
||||
<li><a name="TOC13" href="#SEC13">NAMED SUBPATTERNS</a>
|
||||
<li><a name="TOC14" href="#SEC14">OPTION NAMES</a>
|
||||
<li><a name="TOC15" href="#SEC15">CHARACTER CODES</a>
|
||||
<li><a name="TOC16" href="#SEC16">ERROR NAMES</a>
|
||||
<li><a name="TOC17" href="#SEC17">ERROR TEXTS</a>
|
||||
<li><a name="TOC18" href="#SEC18">CALLOUTS</a>
|
||||
<li><a name="TOC19" href="#SEC19">TESTING</a>
|
||||
<li><a name="TOC20" href="#SEC20">NOT SUPPORTED IN 32-BIT MODE</a>
|
||||
<li><a name="TOC21" href="#SEC21">AUTHOR</a>
|
||||
<li><a name="TOC22" href="#SEC22">REVISION</a>
|
||||
</ul>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<br><a name="SEC1" href="#TOC1">PCRE 32-BIT API BASIC FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b> int *<i>errorcodeptr</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
|
||||
<b> const char **<i>errptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre32_free_study(pcre32_extra *<i>extra</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">PCRE 32-BIT API STRING EXTRACTION FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b> PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
||||
<b> int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
|
||||
<b> PCRE_SPTR32 <i>name</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
|
||||
<b> PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
|
||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre32_free_substring(PCRE_SPTR32 <i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre32_free_substring_list(PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">PCRE 32-BIT API AUXILIARY FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre32_jit_stack_free(pcre32_jit_stack *<i>stack</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
|
||||
<b> pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>const unsigned char *pcre32_maketables(void);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_refcount(pcre32 *<i>code</i>, int <i>adjust</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>const char *pcre32_version(void);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
|
||||
<b> pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">PCRE 32-BIT API INDIRECTED FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>void *(*pcre32_malloc)(size_t);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void (*pcre32_free)(void *);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void *(*pcre32_stack_malloc)(size_t);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void (*pcre32_stack_free)(void *);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int (*pcre32_callout)(pcre32_callout_block *);</b>
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">PCRE 32-BIT API 32-BIT-ONLY FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
|
||||
<b> PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
|
||||
<b> int <i>keep_boms</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">THE PCRE 32-BIT LIBRARY</a><br>
|
||||
<P>
|
||||
Starting with release 8.32, it is possible to compile a PCRE library that
|
||||
supports 32-bit character strings, including UTF-32 strings, as well as or
|
||||
instead of the original 8-bit library. This work was done by Christian Persch,
|
||||
based on the work done by Zoltan Herczeg for the 16-bit library. All three
|
||||
libraries contain identical sets of functions, used in exactly the same way.
|
||||
Only the names of the functions and the data types of their arguments and
|
||||
results are different. To avoid over-complication and reduce the documentation
|
||||
maintenance load, most of the PCRE documentation describes the 8-bit library,
|
||||
with only occasional references to the 16-bit and 32-bit libraries. This page
|
||||
describes what is different when you use the 32-bit library.
|
||||
</P>
|
||||
<P>
|
||||
WARNING: A single application can be linked with all or any of the three
|
||||
libraries, but you must take care when processing any particular pattern
|
||||
to use functions from just one library. For example, if you want to study
|
||||
a pattern that was compiled with <b>pcre32_compile()</b>, you must do so
|
||||
with <b>pcre32_study()</b>, not <b>pcre_study()</b>, and you must free the
|
||||
study data with <b>pcre32_free_study()</b>.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">THE HEADER FILE</a><br>
|
||||
<P>
|
||||
There is only one header file, <b>pcre.h</b>. It contains prototypes for all the
|
||||
functions in all libraries, as well as definitions of flags, structures, error
|
||||
codes, etc.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">THE LIBRARY NAME</a><br>
|
||||
<P>
|
||||
In Unix-like systems, the 32-bit library is called <b>libpcre32</b>, and can
|
||||
normally be accesss by adding <b>-lpcre32</b> to the command for linking an
|
||||
application that uses PCRE.
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">STRING TYPES</a><br>
|
||||
<P>
|
||||
In the 8-bit library, strings are passed to PCRE library functions as vectors
|
||||
of bytes with the C type "char *". In the 32-bit library, strings are passed as
|
||||
vectors of unsigned 32-bit quantities. The macro PCRE_UCHAR32 specifies an
|
||||
appropriate data type, and PCRE_SPTR32 is defined as "const PCRE_UCHAR32 *". In
|
||||
very many environments, "unsigned int" is a 32-bit data type. When PCRE is
|
||||
built, it defines PCRE_UCHAR32 as "unsigned int", but checks that it really is
|
||||
a 32-bit data type. If it is not, the build fails with an error message telling
|
||||
the maintainer to modify the definition appropriately.
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">STRUCTURE TYPES</a><br>
|
||||
<P>
|
||||
The types of the opaque structures that are used for compiled 32-bit patterns
|
||||
and JIT stacks are <b>pcre32</b> and <b>pcre32_jit_stack</b> respectively. The
|
||||
type of the user-accessible structure that is returned by <b>pcre32_study()</b>
|
||||
is <b>pcre32_extra</b>, and the type of the structure that is used for passing
|
||||
data to a callout function is <b>pcre32_callout_block</b>. These structures
|
||||
contain the same fields, with the same names, as their 8-bit counterparts. The
|
||||
only difference is that pointers to character strings are 32-bit instead of
|
||||
8-bit types.
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">32-BIT FUNCTIONS</a><br>
|
||||
<P>
|
||||
For every function in the 8-bit library there is a corresponding function in
|
||||
the 32-bit library with a name that starts with <b>pcre32_</b> instead of
|
||||
<b>pcre_</b>. The prototypes are listed above. In addition, there is one extra
|
||||
function, <b>pcre32_utf32_to_host_byte_order()</b>. This is a utility function
|
||||
that converts a UTF-32 character string to host byte order if necessary. The
|
||||
other 32-bit functions expect the strings they are passed to be in host byte
|
||||
order.
|
||||
</P>
|
||||
<P>
|
||||
The <i>input</i> and <i>output</i> arguments of
|
||||
<b>pcre32_utf32_to_host_byte_order()</b> may point to the same address, that is,
|
||||
conversion in place is supported. The output buffer must be at least as long as
|
||||
the input.
|
||||
</P>
|
||||
<P>
|
||||
The <i>length</i> argument specifies the number of 32-bit data units in the
|
||||
input string; a negative value specifies a zero-terminated string.
|
||||
</P>
|
||||
<P>
|
||||
If <i>byte_order</i> is NULL, it is assumed that the string starts off in host
|
||||
byte order. This may be changed by byte-order marks (BOMs) anywhere in the
|
||||
string (commonly as the first character).
|
||||
</P>
|
||||
<P>
|
||||
If <i>byte_order</i> is not NULL, a non-zero value of the integer to which it
|
||||
points means that the input starts off in host byte order, otherwise the
|
||||
opposite order is assumed. Again, BOMs in the string can change this. The final
|
||||
byte order is passed back at the end of processing.
|
||||
</P>
|
||||
<P>
|
||||
If <i>keep_boms</i> is not zero, byte-order mark characters (0xfeff) are copied
|
||||
into the output string. Otherwise they are discarded.
|
||||
</P>
|
||||
<P>
|
||||
The result of the function is the number of 32-bit units placed into the output
|
||||
buffer, including the zero terminator if the string was zero-terminated.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">SUBJECT STRING OFFSETS</a><br>
|
||||
<P>
|
||||
The lengths and starting offsets of subject strings must be specified in 32-bit
|
||||
data units, and the offsets within subject strings that are returned by the
|
||||
matching functions are in also 32-bit units rather than bytes.
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">NAMED SUBPATTERNS</a><br>
|
||||
<P>
|
||||
The name-to-number translation table that is maintained for named subpatterns
|
||||
uses 32-bit characters. The <b>pcre32_get_stringtable_entries()</b> function
|
||||
returns the length of each entry in the table as the number of 32-bit data
|
||||
units.
|
||||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">OPTION NAMES</a><br>
|
||||
<P>
|
||||
There are two new general option names, PCRE_UTF32 and PCRE_NO_UTF32_CHECK,
|
||||
which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In
|
||||
fact, these new options define the same bits in the options word. There is a
|
||||
discussion about the
|
||||
<a href="pcreunicode.html#utf32strings">validity of UTF-32 strings</a>
|
||||
in the
|
||||
<a href="pcreunicode.html"><b>pcreunicode</b></a>
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
For the <b>pcre32_config()</b> function there is an option PCRE_CONFIG_UTF32
|
||||
that returns 1 if UTF-32 support is configured, otherwise 0. If this option is
|
||||
given to <b>pcre_config()</b> or <b>pcre16_config()</b>, or if the
|
||||
PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 option is given to <b>pcre32_config()</b>,
|
||||
the result is the PCRE_ERROR_BADOPTION error.
|
||||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">CHARACTER CODES</a><br>
|
||||
<P>
|
||||
In 32-bit mode, when PCRE_UTF32 is not set, character values are treated in the
|
||||
same way as in 8-bit, non UTF-8 mode, except, of course, that they can range
|
||||
from 0 to 0x7fffffff instead of 0 to 0xff. Character types for characters less
|
||||
than 0xff can therefore be influenced by the locale in the same way as before.
|
||||
Characters greater than 0xff have only one case, and no "type" (such as letter
|
||||
or digit).
|
||||
</P>
|
||||
<P>
|
||||
In UTF-32 mode, the character code is Unicode, in the range 0 to 0x10ffff, with
|
||||
the exception of values in the range 0xd800 to 0xdfff because those are
|
||||
"surrogate" values that are ill-formed in UTF-32.
|
||||
</P>
|
||||
<P>
|
||||
A UTF-32 string can indicate its endianness by special code knows as a
|
||||
byte-order mark (BOM). The PCRE functions do not handle this, expecting strings
|
||||
to be in host byte order. A utility function called
|
||||
<b>pcre32_utf32_to_host_byte_order()</b> is provided to help with this (see
|
||||
above).
|
||||
</P>
|
||||
<br><a name="SEC16" href="#TOC1">ERROR NAMES</a><br>
|
||||
<P>
|
||||
The error PCRE_ERROR_BADUTF32 corresponds to its 8-bit counterpart.
|
||||
The error PCRE_ERROR_BADMODE is given when a compiled
|
||||
pattern is passed to a function that processes patterns in the other
|
||||
mode, for example, if a pattern compiled with <b>pcre_compile()</b> is passed to
|
||||
<b>pcre32_exec()</b>.
|
||||
</P>
|
||||
<P>
|
||||
There are new error codes whose names begin with PCRE_UTF32_ERR for invalid
|
||||
UTF-32 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that
|
||||
are described in the section entitled
|
||||
<a href="pcreapi.html#badutf8reasons">"Reason codes for invalid UTF-8 strings"</a>
|
||||
in the main
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page. The UTF-32 errors are:
|
||||
<pre>
|
||||
PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff)
|
||||
PCRE_UTF32_ERR2 Non-character
|
||||
PCRE_UTF32_ERR3 Character > 0x10ffff
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC17" href="#TOC1">ERROR TEXTS</a><br>
|
||||
<P>
|
||||
If there is an error while compiling a pattern, the error text that is passed
|
||||
back by <b>pcre32_compile()</b> or <b>pcre32_compile2()</b> is still an 8-bit
|
||||
character string, zero-terminated.
|
||||
</P>
|
||||
<br><a name="SEC18" href="#TOC1">CALLOUTS</a><br>
|
||||
<P>
|
||||
The <i>subject</i> and <i>mark</i> fields in the callout block that is passed to
|
||||
a callout function point to 32-bit vectors.
|
||||
</P>
|
||||
<br><a name="SEC19" href="#TOC1">TESTING</a><br>
|
||||
<P>
|
||||
The <b>pcretest</b> program continues to operate with 8-bit input and output
|
||||
files, but it can be used for testing the 32-bit library. If it is run with the
|
||||
command line option <b>-32</b>, patterns and subject strings are converted from
|
||||
8-bit to 32-bit before being passed to PCRE, and the 32-bit library functions
|
||||
are used instead of the 8-bit ones. Returned 32-bit strings are converted to
|
||||
8-bit for output. If both the 8-bit and the 16-bit libraries were not compiled,
|
||||
<b>pcretest</b> defaults to 32-bit and the <b>-32</b> option is ignored.
|
||||
</P>
|
||||
<P>
|
||||
When PCRE is being built, the <b>RunTest</b> script that is called by "make
|
||||
check" uses the <b>pcretest</b> <b>-C</b> option to discover which of the 8-bit,
|
||||
16-bit and 32-bit libraries has been built, and runs the tests appropriately.
|
||||
</P>
|
||||
<br><a name="SEC20" href="#TOC1">NOT SUPPORTED IN 32-BIT MODE</a><br>
|
||||
<P>
|
||||
Not all the features of the 8-bit library are available with the 32-bit
|
||||
library. The C++ and POSIX wrapper functions support only the 8-bit library,
|
||||
and the <b>pcregrep</b> program is at present 8-bit only.
|
||||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
University Computing Service
|
||||
<br>
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 12 May 2013
|
||||
<br>
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
|
||||
<b>pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
||||
<b>pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
|
||||
<b>pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
<b> pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
|
@ -20,18 +20,18 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -65,6 +65,7 @@ The option bits are:
|
|||
PCRE_FIRSTLINE Force matching to be before newline
|
||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||
PCRE_MULTILINE ^ and $ match newlines within data
|
||||
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||
sequences
|
||||
|
@ -73,6 +74,8 @@ The option bits are:
|
|||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||
theses (named ones available)
|
||||
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||
validity (only relevant if
|
||||
PCRE_UTF16 is set)
|
||||
|
|
|
@ -20,21 +20,21 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>int *<i>errorcodeptr</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>errorcodeptr</i>,</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>int *<i>errorcodeptr</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>errorcodeptr</i>,</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>int *<i>errorcodeptr</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
<b>" int *<i>errorcodeptr</i>,£</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -69,6 +69,7 @@ The option bits are:
|
|||
PCRE_FIRSTLINE Force matching to be before newline
|
||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||
PCRE_MULTILINE ^ and $ match newlines within data
|
||||
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||
sequences
|
||||
|
@ -77,6 +78,8 @@ The option bits are:
|
|||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||
theses (named ones available)
|
||||
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||
validity (only relevant if
|
||||
PCRE_UTF16 is set)
|
||||
|
|
|
@ -48,6 +48,7 @@ point to an unsigned long integer. The available codes are:
|
|||
target architecture for the JIT compiler,
|
||||
or NULL if there is no JIT support
|
||||
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
||||
PCRE_CONFIG_PARENS_LIMIT Parentheses nesting limit
|
||||
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
||||
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
||||
Internal recursion depth limit
|
||||
|
|
|
@ -20,21 +20,21 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b>PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b> PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b>PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b> PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
|
@ -20,18 +20,18 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||
<b>int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||
<b> int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||
<b>int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||
<b> int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
||||
<b>int <i>buffersize</i>);</b>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
||||
<b> int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
|
@ -20,21 +20,21 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -50,16 +50,17 @@ are:
|
|||
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
|
||||
or is NULL
|
||||
<i>subject</i> Points to the subject string
|
||||
<i>length</i> Length of the subject string, in bytes
|
||||
<i>startoffset</i> Offset in bytes in the subject at which to
|
||||
start matching
|
||||
<i>length</i> Length of the subject string
|
||||
<i>startoffset</i> Offset in the subject at which to start matching
|
||||
<i>options</i> Option bits
|
||||
<i>ovector</i> Points to a vector of ints for result offsets
|
||||
<i>ovecsize</i> Number of elements in the vector
|
||||
<i>workspace</i> Points to a vector of ints used as working space
|
||||
<i>wscount</i> Number of elements in the vector
|
||||
</pre>
|
||||
The options are:
|
||||
The units for <i>length</i> and <i>startoffset</i> are bytes for
|
||||
<b>pcre_exec()</b>, 16-bit data items for <b>pcre16_exec()</b>, and 32-bit items
|
||||
for <b>pcre32_exec()</b>. The options are:
|
||||
<pre>
|
||||
PCRE_ANCHORED Match only at the first position
|
||||
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
||||
|
|
|
@ -20,18 +20,18 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -45,14 +45,15 @@ offsets to captured substrings. Its arguments are:
|
|||
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
|
||||
or is NULL
|
||||
<i>subject</i> Points to the subject string
|
||||
<i>length</i> Length of the subject string, in bytes
|
||||
<i>startoffset</i> Offset in bytes in the subject at which to
|
||||
start matching
|
||||
<i>length</i> Length of the subject string
|
||||
<i>startoffset</i> Offset in the subject at which to start matching
|
||||
<i>options</i> Option bits
|
||||
<i>ovector</i> Points to a vector of ints for result offsets
|
||||
<i>ovecsize</i> Number of elements in the vector (a multiple of 3)
|
||||
</pre>
|
||||
The options are:
|
||||
The units for <i>length</i> and <i>startoffset</i> are bytes for
|
||||
<b>pcre_exec()</b>, 16-bit data items for <b>pcre16_exec()</b>, and 32-bit items
|
||||
for <b>pcre32_exec()</b>. The options are:
|
||||
<pre>
|
||||
PCRE_ANCHORED Match only at the first position
|
||||
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
|
||||
|
|
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
|
@ -20,21 +20,21 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b>const char **<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b> const char **<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>name</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>name</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>name</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>name</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>name</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>name</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
|
@ -20,18 +20,18 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b>const char **<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b> const char **<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
|
||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
|
@ -20,21 +20,21 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>int pcre_jit_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_jit_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_jit_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||
<b>int <i>maxsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>maxsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||
<b>int <i>maxsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>maxsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||
<b>int <i>maxsize</i>);</b>
|
||||
<b> int <i>maxsize</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
|
||||
<b>pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
||||
<b>pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
|
||||
<b>pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
<b> pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
|
@ -20,15 +20,15 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>);</b>
|
||||
<b> const char **<i>errptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
|
@ -20,8 +20,8 @@ SYNOPSIS
|
|||
</P>
|
||||
<P>
|
||||
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
|
||||
<b>int <i>keep_boms</i>);</b>
|
||||
<b> PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
|
||||
<b> int <i>keep_boms</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
|
57
tools/pcre/doc/html/pcre_utf32_to_host_byte_order.html
Normal file
57
tools/pcre/doc/html/pcre_utf32_to_host_byte_order.html
Normal file
|
@ -0,0 +1,57 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre_utf32_to_host_byte_order specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre_utf32_to_host_byte_order man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE HTML documentation. It was generated automatically
|
||||
from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
|
||||
<b> PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
|
||||
<b> int <i>keep_boms</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function, which exists only in the 32-bit library, converts a UTF-32
|
||||
string to the correct order for the current host, taking account of any byte
|
||||
order marks (BOMs) within the string. Its arguments are:
|
||||
<pre>
|
||||
<i>output</i> pointer to output buffer, may be the same as <i>input</i>
|
||||
<i>input</i> pointer to input buffer
|
||||
<i>length</i> number of 32-bit units in the input, or negative for
|
||||
a zero-terminated string
|
||||
<i>host_byte_order</i> a NULL value or a non-zero value pointed to means
|
||||
start in host byte order
|
||||
<i>keep_boms</i> if non-zero, BOMs are copied to the output string
|
||||
</pre>
|
||||
The result of the function is the number of 32-bit units placed into the output
|
||||
buffer, including the zero terminator if the string was zero-terminated.
|
||||
</P>
|
||||
<P>
|
||||
If <i>host_byte_order</i> is not NULL, it is set to indicate the byte order that
|
||||
is current at the end of the string.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE native API in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
</p>
|
File diff suppressed because it is too large
Load Diff
|
@ -13,46 +13,63 @@ from the original man page. If there is any nonsense in it, please consult the
|
|||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">PCRE BUILD-TIME OPTIONS</a>
|
||||
<li><a name="TOC2" href="#SEC2">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
|
||||
<li><a name="TOC3" href="#SEC3">BUILDING SHARED AND STATIC LIBRARIES</a>
|
||||
<li><a name="TOC4" href="#SEC4">C++ SUPPORT</a>
|
||||
<li><a name="TOC5" href="#SEC5">UTF-8, UTF-16 AND UTF-32 SUPPORT</a>
|
||||
<li><a name="TOC6" href="#SEC6">UNICODE CHARACTER PROPERTY SUPPORT</a>
|
||||
<li><a name="TOC7" href="#SEC7">JUST-IN-TIME COMPILER SUPPORT</a>
|
||||
<li><a name="TOC8" href="#SEC8">CODE VALUE OF NEWLINE</a>
|
||||
<li><a name="TOC9" href="#SEC9">WHAT \R MATCHES</a>
|
||||
<li><a name="TOC10" href="#SEC10">POSIX MALLOC USAGE</a>
|
||||
<li><a name="TOC11" href="#SEC11">HANDLING VERY LARGE PATTERNS</a>
|
||||
<li><a name="TOC12" href="#SEC12">AVOIDING EXCESSIVE STACK USAGE</a>
|
||||
<li><a name="TOC13" href="#SEC13">LIMITING PCRE RESOURCE USAGE</a>
|
||||
<li><a name="TOC14" href="#SEC14">CREATING CHARACTER TABLES AT BUILD TIME</a>
|
||||
<li><a name="TOC15" href="#SEC15">USING EBCDIC CODE</a>
|
||||
<li><a name="TOC16" href="#SEC16">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
|
||||
<li><a name="TOC17" href="#SEC17">PCREGREP BUFFER SIZE</a>
|
||||
<li><a name="TOC18" href="#SEC18">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a>
|
||||
<li><a name="TOC19" href="#SEC19">DEBUGGING WITH VALGRIND SUPPORT</a>
|
||||
<li><a name="TOC20" href="#SEC20">CODE COVERAGE REPORTING</a>
|
||||
<li><a name="TOC21" href="#SEC21">SEE ALSO</a>
|
||||
<li><a name="TOC22" href="#SEC22">AUTHOR</a>
|
||||
<li><a name="TOC23" href="#SEC23">REVISION</a>
|
||||
<li><a name="TOC1" href="#SEC1">BUILDING PCRE</a>
|
||||
<li><a name="TOC2" href="#SEC2">PCRE BUILD-TIME OPTIONS</a>
|
||||
<li><a name="TOC3" href="#SEC3">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
|
||||
<li><a name="TOC4" href="#SEC4">BUILDING SHARED AND STATIC LIBRARIES</a>
|
||||
<li><a name="TOC5" href="#SEC5">C++ SUPPORT</a>
|
||||
<li><a name="TOC6" href="#SEC6">UTF-8, UTF-16 AND UTF-32 SUPPORT</a>
|
||||
<li><a name="TOC7" href="#SEC7">UNICODE CHARACTER PROPERTY SUPPORT</a>
|
||||
<li><a name="TOC8" href="#SEC8">JUST-IN-TIME COMPILER SUPPORT</a>
|
||||
<li><a name="TOC9" href="#SEC9">CODE VALUE OF NEWLINE</a>
|
||||
<li><a name="TOC10" href="#SEC10">WHAT \R MATCHES</a>
|
||||
<li><a name="TOC11" href="#SEC11">POSIX MALLOC USAGE</a>
|
||||
<li><a name="TOC12" href="#SEC12">HANDLING VERY LARGE PATTERNS</a>
|
||||
<li><a name="TOC13" href="#SEC13">AVOIDING EXCESSIVE STACK USAGE</a>
|
||||
<li><a name="TOC14" href="#SEC14">LIMITING PCRE RESOURCE USAGE</a>
|
||||
<li><a name="TOC15" href="#SEC15">CREATING CHARACTER TABLES AT BUILD TIME</a>
|
||||
<li><a name="TOC16" href="#SEC16">USING EBCDIC CODE</a>
|
||||
<li><a name="TOC17" href="#SEC17">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
|
||||
<li><a name="TOC18" href="#SEC18">PCREGREP BUFFER SIZE</a>
|
||||
<li><a name="TOC19" href="#SEC19">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a>
|
||||
<li><a name="TOC20" href="#SEC20">DEBUGGING WITH VALGRIND SUPPORT</a>
|
||||
<li><a name="TOC21" href="#SEC21">CODE COVERAGE REPORTING</a>
|
||||
<li><a name="TOC22" href="#SEC22">SEE ALSO</a>
|
||||
<li><a name="TOC23" href="#SEC23">AUTHOR</a>
|
||||
<li><a name="TOC24" href="#SEC24">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">PCRE BUILD-TIME OPTIONS</a><br>
|
||||
<br><a name="SEC1" href="#TOC1">BUILDING PCRE</a><br>
|
||||
<P>
|
||||
This document describes the optional features of PCRE that can be selected when
|
||||
the library is compiled. It assumes use of the <b>configure</b> script, where
|
||||
the optional features are selected or deselected by providing options to
|
||||
<b>configure</b> before running the <b>make</b> command. However, the same
|
||||
options can be selected in both Unix-like and non-Unix-like environments using
|
||||
the GUI facility of <b>cmake-gui</b> if you are using <b>CMake</b> instead of
|
||||
<b>configure</b> to build PCRE.
|
||||
PCRE is distributed with a <b>configure</b> script that can be used to build the
|
||||
library in Unix-like environments using the applications known as Autotools.
|
||||
Also in the distribution are files to support building using <b>CMake</b>
|
||||
instead of <b>configure</b>. The text file
|
||||
<a href="README.txt"><b>README</b></a>
|
||||
contains general information about building with Autotools (some of which is
|
||||
repeated below), and also has some comments about building on various operating
|
||||
systems. There is a lot more information about building PCRE without using
|
||||
Autotools (including information about using <b>CMake</b> and building "by
|
||||
hand") in the text file called
|
||||
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS-BUILD</b>.</a>
|
||||
You should consult this file as well as the
|
||||
<a href="README.txt"><b>README</b></a>
|
||||
file if you are building in a non-Unix-like environment.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">PCRE BUILD-TIME OPTIONS</a><br>
|
||||
<P>
|
||||
The rest of this document describes the optional features of PCRE that can be
|
||||
selected when the library is compiled. It assumes use of the <b>configure</b>
|
||||
script, where the optional features are selected or deselected by providing
|
||||
options to <b>configure</b> before running the <b>make</b> command. However, the
|
||||
same options can be selected in both Unix-like and non-Unix-like environments
|
||||
using the GUI facility of <b>cmake-gui</b> if you are using <b>CMake</b> instead
|
||||
of <b>configure</b> to build PCRE.
|
||||
</P>
|
||||
<P>
|
||||
There is a lot more information about building PCRE without using
|
||||
<b>configure</b> (including information about using <b>CMake</b> or building "by
|
||||
hand") in the file called <i>NON-AUTOTOOLS-BUILD</i>, which is part of the PCRE
|
||||
distribution. You should consult this file as well as the <i>README</i> file if
|
||||
you are building in a non-Unix-like environment.
|
||||
If you are not using Autotools or <b>CMake</b>, option selection can be done by
|
||||
editing the <b>config.h</b> file, or by passing parameter settings to the
|
||||
compiler, as described in
|
||||
<a href="NON-AUTOTOOLS-BUILD.txt"><b>NON-AUTOTOOLS-BUILD</b>.</a>
|
||||
</P>
|
||||
<P>
|
||||
The complete list of options for <b>configure</b> (which includes the standard
|
||||
|
@ -67,7 +84,7 @@ The following sections include descriptions of options whose names begin with
|
|||
--enable and --disable always come in pairs, so the complementary option always
|
||||
exists as well, but as it specifies the default, it is not described.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
|
||||
<br><a name="SEC3" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
|
||||
<P>
|
||||
By default, a library called <b>libpcre</b> is built, containing functions that
|
||||
take string arguments contained in vectors of bytes, either as single-byte
|
||||
|
@ -78,7 +95,7 @@ strings, by adding
|
|||
<pre>
|
||||
--enable-pcre16
|
||||
</pre>
|
||||
to the <b>configure</b> command. You can also build a separate
|
||||
to the <b>configure</b> command. You can also build yet another separate
|
||||
library, called <b>libpcre32</b>, in which strings are contained in vectors of
|
||||
32-bit data units and interpreted either as single-unit characters or UTF-32
|
||||
strings, by adding
|
||||
|
@ -94,17 +111,17 @@ and POSIX wrappers are for the 8-bit library only, and that <b>pcregrep</b> is
|
|||
an 8-bit program. None of these are built if you select only the 16-bit or
|
||||
32-bit libraries.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
|
||||
<br><a name="SEC4" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
|
||||
<P>
|
||||
The PCRE building process uses <b>libtool</b> to build both shared and static
|
||||
Unix libraries by default. You can suppress one of these by adding one of
|
||||
The Autotools PCRE building process uses <b>libtool</b> to build both shared and
|
||||
static libraries by default. You can suppress one of these by adding one of
|
||||
<pre>
|
||||
--disable-shared
|
||||
--disable-static
|
||||
</pre>
|
||||
to the <b>configure</b> command, as required.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">C++ SUPPORT</a><br>
|
||||
<br><a name="SEC5" href="#TOC1">C++ SUPPORT</a><br>
|
||||
<P>
|
||||
By default, if the 8-bit library is being built, the <b>configure</b> script
|
||||
will search for a C++ compiler and C++ header files. If it finds them, it
|
||||
|
@ -115,7 +132,7 @@ strings). You can disable this by adding
|
|||
</pre>
|
||||
to the <b>configure</b> command.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">UTF-8, UTF-16 AND UTF-32 SUPPORT</a><br>
|
||||
<br><a name="SEC6" href="#TOC1">UTF-8, UTF-16 AND UTF-32 SUPPORT</a><br>
|
||||
<P>
|
||||
To build PCRE with support for UTF Unicode character strings, add
|
||||
<pre>
|
||||
|
@ -143,7 +160,7 @@ not possible to support both EBCDIC and UTF-8 codes in the same version of the
|
|||
library. Consequently, --enable-utf and --enable-ebcdic are mutually
|
||||
exclusive.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">UNICODE CHARACTER PROPERTY SUPPORT</a><br>
|
||||
<br><a name="SEC7" href="#TOC1">UNICODE CHARACTER PROPERTY SUPPORT</a><br>
|
||||
<P>
|
||||
UTF support allows the libraries to process character codepoints up to 0x10ffff
|
||||
in the strings that they handle. On its own, however, it does not provide any
|
||||
|
@ -163,7 +180,7 @@ supported. Details are given in the
|
|||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
|
||||
<br><a name="SEC8" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
|
||||
<P>
|
||||
Just-in-time compiler support is included in the build by specifying
|
||||
<pre>
|
||||
|
@ -180,7 +197,7 @@ pcregrep automatically makes use of it, unless you add
|
|||
</pre>
|
||||
to the "configure" command.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">CODE VALUE OF NEWLINE</a><br>
|
||||
<br><a name="SEC9" href="#TOC1">CODE VALUE OF NEWLINE</a><br>
|
||||
<P>
|
||||
By default, PCRE interprets the linefeed (LF) character as indicating the end
|
||||
of a line. This is the normal newline character on Unix-like systems. You can
|
||||
|
@ -213,7 +230,7 @@ Whatever line ending convention is selected when PCRE is built can be
|
|||
overridden when the library functions are called. At build time it is
|
||||
conventional to use the standard for your operating system.
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||
<br><a name="SEC10" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||
<P>
|
||||
By default, the sequence \R in a pattern matches any Unicode newline sequence,
|
||||
whatever has been selected as the line ending sequence. If you specify
|
||||
|
@ -224,7 +241,7 @@ the default is changed so that \R matches only CR, LF, or CRLF. Whatever is
|
|||
selected when PCRE is built can be overridden when the library functions are
|
||||
called.
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">POSIX MALLOC USAGE</a><br>
|
||||
<br><a name="SEC11" href="#TOC1">POSIX MALLOC USAGE</a><br>
|
||||
<P>
|
||||
When the 8-bit library is called through the POSIX interface (see the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
|
@ -240,7 +257,7 @@ such as
|
|||
</pre>
|
||||
to the <b>configure</b> command.
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
|
||||
<br><a name="SEC12" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
|
||||
<P>
|
||||
Within a compiled pattern, offset values are used to point from one part to
|
||||
another (for example, from an opening parenthesis to an alternation
|
||||
|
@ -259,7 +276,7 @@ longer offsets slows down the operation of PCRE because it has to load
|
|||
additional data when handling them. For the 32-bit library the value is always
|
||||
4 and cannot be overridden; the value of --with-link-size is ignored.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
|
||||
<br><a name="SEC13" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
|
||||
<P>
|
||||
When matching with the <b>pcre_exec()</b> function, PCRE implements backtracking
|
||||
by making recursive calls to an internal function called <b>match()</b>. In
|
||||
|
@ -290,7 +307,7 @@ perform better than <b>malloc()</b> and <b>free()</b>. PCRE runs noticeably more
|
|||
slowly when built in this way. This option affects only the <b>pcre_exec()</b>
|
||||
function; it is not relevant for <b>pcre_dfa_exec()</b>.
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br>
|
||||
<br><a name="SEC14" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br>
|
||||
<P>
|
||||
Internally, PCRE has a function called <b>match()</b>, which it calls repeatedly
|
||||
(sometimes recursively) when matching a pattern with the <b>pcre_exec()</b>
|
||||
|
@ -319,7 +336,7 @@ constraints. However, you can set a lower limit by adding, for example,
|
|||
</pre>
|
||||
to the <b>configure</b> command. This value can also be overridden at run time.
|
||||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
||||
<br><a name="SEC15" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
||||
<P>
|
||||
PCRE uses fixed tables for processing characters whose code values are less
|
||||
than 256. By default, PCRE is built with a set of tables that are distributed
|
||||
|
@ -336,7 +353,7 @@ compiling, because <b>dftables</b> is run on the local host. If you need to
|
|||
create alternative tables when cross compiling, you will have to do so "by
|
||||
hand".)
|
||||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">USING EBCDIC CODE</a><br>
|
||||
<br><a name="SEC16" href="#TOC1">USING EBCDIC CODE</a><br>
|
||||
<P>
|
||||
PCRE assumes by default that it will run in an environment where the character
|
||||
code is ASCII (or Unicode, which is a superset of ASCII). This is the case for
|
||||
|
@ -367,7 +384,7 @@ The options that select newline behaviour, such as --enable-newline-is-cr,
|
|||
and equivalent run-time options, refer to these character values in an EBCDIC
|
||||
environment.
|
||||
</P>
|
||||
<br><a name="SEC16" href="#TOC1">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
|
||||
<br><a name="SEC17" href="#TOC1">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
|
||||
<P>
|
||||
By default, <b>pcregrep</b> reads all files as plain text. You can build it so
|
||||
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
|
||||
|
@ -380,7 +397,7 @@ to the <b>configure</b> command. These options naturally require that the
|
|||
relevant libraries are installed on your system. Configuration will fail if
|
||||
they are not.
|
||||
</P>
|
||||
<br><a name="SEC17" href="#TOC1">PCREGREP BUFFER SIZE</a><br>
|
||||
<br><a name="SEC18" href="#TOC1">PCREGREP BUFFER SIZE</a><br>
|
||||
<P>
|
||||
<b>pcregrep</b> uses an internal buffer to hold a "window" on the file it is
|
||||
scanning, in order to be able to output "before" and "after" lines when it
|
||||
|
@ -395,7 +412,7 @@ parameter value by adding, for example,
|
|||
to the <b>configure</b> command. The caller of \fPpcregrep\fP can, however,
|
||||
override this value by specifying a run-time option.
|
||||
</P>
|
||||
<br><a name="SEC18" href="#TOC1">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
||||
<br><a name="SEC19" href="#TOC1">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
||||
<P>
|
||||
If you add
|
||||
<pre>
|
||||
|
@ -426,7 +443,7 @@ automatically included, you may need to add something like
|
|||
</pre>
|
||||
immediately before the <b>configure</b> command.
|
||||
</P>
|
||||
<br><a name="SEC19" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
|
||||
<br><a name="SEC20" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
|
||||
<P>
|
||||
By adding the
|
||||
<pre>
|
||||
|
@ -436,7 +453,7 @@ option to to the <b>configure</b> command, PCRE will use valgrind annotations
|
|||
to mark certain memory regions as unaddressable. This allows it to detect
|
||||
invalid memory accesses, and is mostly useful for debugging PCRE itself.
|
||||
</P>
|
||||
<br><a name="SEC20" href="#TOC1">CODE COVERAGE REPORTING</a><br>
|
||||
<br><a name="SEC21" href="#TOC1">CODE COVERAGE REPORTING</a><br>
|
||||
<P>
|
||||
If your C compiler is gcc, you can build a version of PCRE that can generate a
|
||||
code coverage report for its test suite. To enable this, you must install
|
||||
|
@ -493,11 +510,11 @@ This cleans all coverage data including the generated coverage report. For more
|
|||
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">SEE ALSO</a><br>
|
||||
<br><a name="SEC22" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcreapi</b>(3), <b>pcre16</b>, <b>pcre32</b>, <b>pcre_config</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC22" href="#TOC1">AUTHOR</a><br>
|
||||
<br><a name="SEC23" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
|
@ -506,11 +523,11 @@ University Computing Service
|
|||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC23" href="#TOC1">REVISION</a><br>
|
||||
<br><a name="SEC24" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 30 October 2012
|
||||
Last updated: 12 May 2013
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
|
|
@ -64,23 +64,63 @@ it is processed as if it were
|
|||
<br>
|
||||
<br>
|
||||
Notice that there is a callout before and after each parenthesis and
|
||||
alternation bar. Automatic callouts can be used for tracking the progress of
|
||||
pattern matching. The
|
||||
<a href="pcretest.html"><b>pcretest</b></a>
|
||||
command has an option that sets automatic callouts; when it is used, the output
|
||||
indicates how the pattern is matched. This is useful information when you are
|
||||
trying to optimize the performance of a particular pattern.
|
||||
alternation bar. If the pattern contains a conditional group whose condition is
|
||||
an assertion, an automatic callout is inserted immediately before the
|
||||
condition. Such a callout may also be inserted explicitly, for example:
|
||||
<pre>
|
||||
(?(?C9)(?=a)ab|de)
|
||||
</pre>
|
||||
This applies only to assertion conditions (because they are themselves
|
||||
independent groups).
|
||||
</P>
|
||||
<P>
|
||||
The use of callouts in a pattern makes it ineligible for optimization by the
|
||||
just-in-time compiler. Studying such a pattern with the PCRE_STUDY_JIT_COMPILE
|
||||
option always fails.
|
||||
Automatic callouts can be used for tracking the progress of pattern matching.
|
||||
The
|
||||
<a href="pcretest.html"><b>pcretest</b></a>
|
||||
program has a pattern qualifier (/C) that sets automatic callouts; when it is
|
||||
used, the output indicates how the pattern is being matched. This is useful
|
||||
information when you are trying to optimize the performance of a particular
|
||||
pattern.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
|
||||
<P>
|
||||
You should be aware that, because of optimizations in the way PCRE matches
|
||||
patterns by default, callouts sometimes do not happen. For example, if the
|
||||
pattern is
|
||||
You should be aware that, because of optimizations in the way PCRE compiles and
|
||||
matches patterns, callouts sometimes do not happen exactly as you might expect.
|
||||
</P>
|
||||
<P>
|
||||
At compile time, PCRE "auto-possessifies" repeated items when it knows that
|
||||
what follows cannot be part of the repeat. For example, a+[bc] is compiled as
|
||||
if it were a++[bc]. The <b>pcretest</b> output when this pattern is anchored and
|
||||
then applied with automatic callouts to the string "aaaa" is:
|
||||
<pre>
|
||||
--->aaaa
|
||||
+0 ^ ^
|
||||
+1 ^ a+
|
||||
+3 ^ ^ [bc]
|
||||
No match
|
||||
</pre>
|
||||
This indicates that when matching [bc] fails, there is no backtracking into a+
|
||||
and therefore the callouts that would be taken for the backtracks do not occur.
|
||||
You can disable the auto-possessify feature by passing PCRE_NO_AUTO_POSSESS
|
||||
to <b>pcre_compile()</b>, or starting the pattern with (*NO_AUTO_POSSESS). If
|
||||
this is done in <b>pcretest</b> (using the /O qualifier), the output changes to
|
||||
this:
|
||||
<pre>
|
||||
--->aaaa
|
||||
+0 ^ ^
|
||||
+1 ^ a+
|
||||
+3 ^ ^ [bc]
|
||||
+3 ^ ^ [bc]
|
||||
+3 ^ ^ [bc]
|
||||
+3 ^^ [bc]
|
||||
No match
|
||||
</pre>
|
||||
This time, when matching [bc] fails, the matcher backtracks into a+ and tries
|
||||
again, repeatedly, until a+ itself fails.
|
||||
</P>
|
||||
<P>
|
||||
Other optimizations that provide fast "no match" results also affect callouts.
|
||||
For example, if the pattern is
|
||||
<pre>
|
||||
ab(?C4)cd
|
||||
</pre>
|
||||
|
@ -104,11 +144,11 @@ callouts such as the example above are obeyed.
|
|||
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
|
||||
<P>
|
||||
During matching, when PCRE reaches a callout point, the external function
|
||||
defined by <i>pcre_callout</i> or <i>pcre[16|32]_callout</i> is called
|
||||
(if it is set). This applies to both normal and DFA matching. The only
|
||||
argument to the callout function is a pointer to a <b>pcre_callout</b>
|
||||
or <b>pcre[16|32]_callout</b> block.
|
||||
These structures contains the following fields:
|
||||
defined by <i>pcre_callout</i> or <i>pcre[16|32]_callout</i> is called (if it is
|
||||
set). This applies to both normal and DFA matching. The only argument to the
|
||||
callout function is a pointer to a <b>pcre_callout</b> or
|
||||
<b>pcre[16|32]_callout</b> block. These structures contains the following
|
||||
fields:
|
||||
<pre>
|
||||
int <i>version</i>;
|
||||
int <i>callout_number</i>;
|
||||
|
@ -141,10 +181,10 @@ automatically generated callouts).
|
|||
<P>
|
||||
The <i>offset_vector</i> field is a pointer to the vector of offsets that was
|
||||
passed by the caller to the matching function. When <b>pcre_exec()</b> or
|
||||
<b>pcre[16|32]_exec()</b> is used, the contents can be inspected, in order to extract
|
||||
substrings that have been matched so far, in the same way as for extracting
|
||||
substrings after a match has completed. For the DFA matching functions, this
|
||||
field is not useful.
|
||||
<b>pcre[16|32]_exec()</b> is used, the contents can be inspected, in order to
|
||||
extract substrings that have been matched so far, in the same way as for
|
||||
extracting substrings after a match has completed. For the DFA matching
|
||||
functions, this field is not useful.
|
||||
</P>
|
||||
<P>
|
||||
The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
|
||||
|
@ -171,8 +211,10 @@ functions are used, because they do not support captured substrings.
|
|||
</P>
|
||||
<P>
|
||||
The <i>capture_last</i> field contains the number of the most recently captured
|
||||
substring. If no substrings have been captured, its value is -1. This is always
|
||||
the case for the DFA matching functions.
|
||||
substring. However, when a recursion exits, the value reverts to what it was
|
||||
outside the recursion, as do the values of all captured substrings. If no
|
||||
substrings have been captured, the value of <i>capture_last</i> is -1. This is
|
||||
always the case for the DFA matching functions.
|
||||
</P>
|
||||
<P>
|
||||
The <i>callout_data</i> field contains a value that is passed to a matching
|
||||
|
@ -203,11 +245,12 @@ same callout number. However, they are set for all callouts.
|
|||
</P>
|
||||
<P>
|
||||
The <i>mark</i> field is present from version 2 of the callout structure. In
|
||||
callouts from <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> it contains a pointer to
|
||||
the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
|
||||
(*THEN) item in the match, or NULL if no such items have been passed. Instances
|
||||
of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
|
||||
callouts from the DFA matching functions this field always contains NULL.
|
||||
callouts from <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> it contains a
|
||||
pointer to the zero-terminated name of the most recently passed (*MARK),
|
||||
(*PRUNE), or (*THEN) item in the match, or NULL if no such items have been
|
||||
passed. Instances of (*PRUNE) or (*THEN) without a name do not obliterate a
|
||||
previous (*MARK). In callouts from the DFA matching functions this field always
|
||||
contains NULL.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">RETURN VALUES</a><br>
|
||||
<P>
|
||||
|
@ -234,9 +277,9 @@ Cambridge CB2 3QH, England.
|
|||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 24 June 2012
|
||||
Last updated: 12 November 2013
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
|
|
@ -36,10 +36,8 @@ these do not seem to have any use.
|
|||
</P>
|
||||
<P>
|
||||
3. Capturing subpatterns that occur inside negative lookahead assertions are
|
||||
counted, but their entries in the offsets vector are never set. Perl sets its
|
||||
numerical variables from any such patterns that are matched before the
|
||||
assertion fails to match something (thereby succeeding), but only if the
|
||||
negative lookahead assertion contains just one branch.
|
||||
counted, but their entries in the offsets vector are never set. Perl sometimes
|
||||
(but not always) sets its numerical variables from inside negative assertions.
|
||||
</P>
|
||||
<P>
|
||||
4. Though binary zero characters are supported in the subject string, they are
|
||||
|
@ -102,24 +100,32 @@ in the
|
|||
page.
|
||||
</P>
|
||||
<P>
|
||||
10. If any of the backtracking control verbs are used in an assertion or in a
|
||||
subpattern that is called as a subroutine (whether or not recursively), their
|
||||
effect is confined to that subpattern; it does not extend to the surrounding
|
||||
pattern. This is not always the case in Perl. In particular, if (*THEN) is
|
||||
present in a group that is called as a subroutine, its action is limited to
|
||||
that group, even if the group does not contain any | characters. There is one
|
||||
exception to this: the name from a *(MARK), (*PRUNE), or (*THEN) that is
|
||||
encountered in a successful positive assertion <i>is</i> passed back when a
|
||||
match succeeds (compare capturing parentheses in assertions). Note that such
|
||||
subpatterns are processed as anchored at the point where they are tested.
|
||||
10. If any of the backtracking control verbs are used in a subpattern that is
|
||||
called as a subroutine (whether or not recursively), their effect is confined
|
||||
to that subpattern; it does not extend to the surrounding pattern. This is not
|
||||
always the case in Perl. In particular, if (*THEN) is present in a group that
|
||||
is called as a subroutine, its action is limited to that group, even if the
|
||||
group does not contain any | characters. Note that such subpatterns are
|
||||
processed as anchored at the point where they are tested.
|
||||
</P>
|
||||
<P>
|
||||
11. There are some differences that are concerned with the settings of captured
|
||||
11. If a pattern contains more than one backtracking control verb, the first
|
||||
one that is backtracked onto acts. For example, in the pattern
|
||||
A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
|
||||
triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
|
||||
same as PCRE, but there are examples where it differs.
|
||||
</P>
|
||||
<P>
|
||||
12. Most backtracking verbs in assertions have their normal actions. They are
|
||||
not confined to the assertion.
|
||||
</P>
|
||||
<P>
|
||||
13. There are some differences that are concerned with the settings of captured
|
||||
strings when part of a pattern is repeated. For example, matching "aba" against
|
||||
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b".
|
||||
</P>
|
||||
<P>
|
||||
12. PCRE's handling of duplicate subpattern numbers and duplicate subpattern
|
||||
14. PCRE's handling of duplicate subpattern numbers and duplicate subpattern
|
||||
names is not as general as Perl's. This is a consequence of the fact the PCRE
|
||||
works internally just with numbers, using an external table to translate
|
||||
between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B),
|
||||
|
@ -130,13 +136,26 @@ names map to capturing subpattern number 1. To avoid this confusing situation,
|
|||
an error is given at compile time.
|
||||
</P>
|
||||
<P>
|
||||
13. Perl recognizes comments in some places that PCRE does not, for example,
|
||||
15. Perl recognizes comments in some places that PCRE does not, for example,
|
||||
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
||||
Perl allows white space between ( and ? but PCRE never does, even if the
|
||||
PCRE_EXTENDED option is set.
|
||||
Perl allows white space between ( and ? (though current Perls warn that this is
|
||||
deprecated) but PCRE never does, even if the PCRE_EXTENDED option is set.
|
||||
</P>
|
||||
<P>
|
||||
14. PCRE provides some extensions to the Perl regular expression facilities.
|
||||
16. Perl, when in warning mode, gives warnings for character classes such as
|
||||
[A-\d] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE has no
|
||||
warning features, so it gives an error in these cases because they are almost
|
||||
certainly user mistakes.
|
||||
</P>
|
||||
<P>
|
||||
17. In PCRE, the upper/lower case character properties Lu and Ll are not
|
||||
affected when case-independent matching is specified. For example, \p{Lu}
|
||||
always matches an upper case letter. I think Perl has changed in this respect;
|
||||
in the release at the time of writing (5.16), \p{Lu} and \p{Ll} match all
|
||||
letters, regardless of case, when case independence is specified.
|
||||
</P>
|
||||
<P>
|
||||
18. PCRE provides some extensions to the Perl regular expression facilities.
|
||||
Perl 5.10 includes new features that are not in earlier versions of Perl, some
|
||||
of which (such as named parentheses) have been in PCRE for some time. This list
|
||||
is with respect to Perl 5.10:
|
||||
|
@ -207,9 +226,9 @@ Cambridge CB2 3QH, England.
|
|||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 25 August 2012
|
||||
Last updated: 10 November 2013
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
|
|
@ -37,8 +37,10 @@ man page, in case the conversion went wrong.
|
|||
<b>pcregrep</b> searches files for character patterns, in the same way as other
|
||||
grep commands do, but it uses the PCRE regular expression library to support
|
||||
patterns that are compatible with the regular expressions of Perl 5. See
|
||||
<a href="pcresyntax.html"><b>pcresyntax</b>(3)</a>
|
||||
for a quick-reference summary of pattern syntax, or
|
||||
<a href="pcrepattern.html"><b>pcrepattern</b>(3)</a>
|
||||
for a full description of syntax and semantics of the regular expressions
|
||||
for a full description of the syntax and semantics of the regular expressions
|
||||
that PCRE supports.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -748,9 +750,9 @@ Cambridge CB2 3QH, England.
|
|||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 13 September 2012
|
||||
Last updated: 03 April 2014
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
|
|
@ -172,15 +172,9 @@ PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and
|
|||
PCRE_PARTIAL_SOFT.
|
||||
</P>
|
||||
<P>
|
||||
The unsupported pattern items are:
|
||||
<pre>
|
||||
\C match a single byte; not supported in UTF-8 mode
|
||||
(?Cn) callouts
|
||||
(*PRUNE) )
|
||||
(*SKIP) ) backtracking control verbs
|
||||
(*THEN) )
|
||||
</pre>
|
||||
Support for some of these may be added in future.
|
||||
The only unsupported pattern items are \C (match a single data unit) when
|
||||
running in a UTF mode, and a callout immediately before an assertion condition
|
||||
in a conditional group.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">RETURN VALUES FROM JIT EXECUTION</a><br>
|
||||
<P>
|
||||
|
@ -449,9 +443,9 @@ Cambridge CB2 3QH, England.
|
|||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 31 October 2012
|
||||
Last updated: 17 March 2013
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
|
|
@ -21,9 +21,10 @@ practice be relevant.
|
|||
</P>
|
||||
<P>
|
||||
The maximum length of a compiled pattern is approximately 64K data units (bytes
|
||||
for the 8-bit library, 32-bit units for the 32-bit library, and 32-bit units for
|
||||
the 32-bit library) if PCRE is compiled with the default internal linkage size
|
||||
of 2 bytes. If you want to process regular expressions that are truly enormous,
|
||||
for the 8-bit library, 16-bit units for the 16-bit library, and 32-bit units for
|
||||
the 32-bit library) if PCRE is compiled with the default internal linkage size,
|
||||
which is 2 bytes for the 8-bit and 16-bit libraries, and 4 bytes for the 32-bit
|
||||
library. If you want to process regular expressions that are truly enormous,
|
||||
you can compile PCRE with an internal linkage size of 3 or 4 (when building the
|
||||
16-bit or 32-bit library, 3 is rounded up to 4). See the <b>README</b> file in
|
||||
the source distribution and the
|
||||
|
@ -36,7 +37,10 @@ All values in repeating quantifiers must be less than 65536.
|
|||
</P>
|
||||
<P>
|
||||
There is no limit to the number of parenthesized subpatterns, but there can be
|
||||
no more than 65535 capturing subpatterns.
|
||||
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
||||
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
||||
order to limit the amount of system stack used at compile time. The limit can
|
||||
be specified when PCRE is built; the default is 250.
|
||||
</P>
|
||||
<P>
|
||||
There is a limit to the number of forward references to subsequent subpatterns
|
||||
|
@ -50,7 +54,7 @@ maximum number of named subpatterns is 10000.
|
|||
</P>
|
||||
<P>
|
||||
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit library.
|
||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of a subject string is the largest positive number that an
|
||||
|
@ -77,9 +81,9 @@ Cambridge CB2 3QH, England.
|
|||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 04 May 2012
|
||||
Last updated: 05 November 2013
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
|
|
@ -126,6 +126,15 @@ character of the subject. The algorithm does not automatically move on to find
|
|||
matches that start at later positions.
|
||||
</P>
|
||||
<P>
|
||||
PCRE's "auto-possessification" optimization usually applies to character
|
||||
repeats at the end of a pattern (as well as internally). For example, the
|
||||
pattern "a\d+" is compiled as if it were "a\d++" because there is no point
|
||||
even considering the possibility of backtracking into the repeated digits. For
|
||||
DFA matching, this means that only one possible match is found. If you really
|
||||
do want multiple matches in such cases, either use an ungreedy repeat
|
||||
("a\d+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
|
||||
</P>
|
||||
<P>
|
||||
There are a number of features of PCRE regular expressions that are not
|
||||
supported by the alternative matching algorithm. They are as follows:
|
||||
</P>
|
||||
|
@ -224,7 +233,7 @@ Cambridge CB2 3QH, England.
|
|||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 08 January 2012
|
||||
Last updated: 12 November 2013
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -81,33 +81,36 @@ strings. This optimization is also disabled for partial matching.
|
|||
<br><a name="SEC2" href="#TOC1">PARTIAL MATCHING USING pcre_exec() OR pcre[16|32]_exec()</a><br>
|
||||
<P>
|
||||
A partial match occurs during a call to <b>pcre_exec()</b> or
|
||||
<b>pcre[16|32]_exec()</b> when the end of the subject string is reached successfully,
|
||||
but matching cannot continue because more characters are needed. However, at
|
||||
least one character in the subject must have been inspected. This character
|
||||
need not form part of the final matched string; lookbehind assertions and the
|
||||
\K escape sequence provide ways of inspecting characters before the start of a
|
||||
matched substring. The requirement for inspecting at least one character exists
|
||||
because an empty string can always be matched; without such a restriction there
|
||||
would always be a partial match of an empty string at the end of the subject.
|
||||
<b>pcre[16|32]_exec()</b> when the end of the subject string is reached
|
||||
successfully, but matching cannot continue because more characters are needed.
|
||||
However, at least one character in the subject must have been inspected. This
|
||||
character need not form part of the final matched string; lookbehind assertions
|
||||
and the \K escape sequence provide ways of inspecting characters before the
|
||||
start of a matched substring. The requirement for inspecting at least one
|
||||
character exists because an empty string can always be matched; without such a
|
||||
restriction there would always be a partial match of an empty string at the end
|
||||
of the subject.
|
||||
</P>
|
||||
<P>
|
||||
If there are at least two slots in the offsets vector when a partial match is
|
||||
returned, the first slot is set to the offset of the earliest character that
|
||||
was inspected. For convenience, the second offset points to the end of the
|
||||
subject so that a substring can easily be identified.
|
||||
subject so that a substring can easily be identified. If there are at least
|
||||
three slots in the offsets vector, the third slot is set to the offset of the
|
||||
character where matching started.
|
||||
</P>
|
||||
<P>
|
||||
For the majority of patterns, the first offset identifies the start of the
|
||||
partially matched string. However, for patterns that contain lookbehind
|
||||
assertions, or \K, or begin with \b or \B, earlier characters have been
|
||||
inspected while carrying out the match. For example:
|
||||
For the majority of patterns, the contents of the first and third slots will be
|
||||
the same. However, for patterns that contain lookbehind assertions, or begin
|
||||
with \b or \B, characters before the one where matching started may have been
|
||||
inspected while carrying out the match. For example, consider this pattern:
|
||||
<pre>
|
||||
/(?<=abc)123/
|
||||
</pre>
|
||||
This pattern matches "123", but only if it is preceded by "abc". If the subject
|
||||
string is "xyzabc12", the offsets after a partial match are for the substring
|
||||
"abc12", because all these characters are needed if another match is tried
|
||||
with extra characters added to the subject.
|
||||
string is "xyzabc12", the first two offsets after a partial match are for the
|
||||
substring "abc12", because all these characters were inspected. However, the
|
||||
third offset is set to 6, because that is the offset where matching began.
|
||||
</P>
|
||||
<P>
|
||||
What happens when a partial match is identified depends on which of the two
|
||||
|
@ -303,6 +306,16 @@ not retain the previously partially-matched string. It is up to the calling
|
|||
program to do that if it needs to.
|
||||
</P>
|
||||
<P>
|
||||
That means that, for an unanchored pattern, if a continued match fails, it is
|
||||
not possible to try again at a new starting point. All this facility is capable
|
||||
of doing is continuing with the previous match attempt. In the previous
|
||||
example, if the second set of data is "ug23" the result is no match, even
|
||||
though there would be a match for "aug23" if the entire string were given at
|
||||
once. Depending on the application, this may or may not be what you want.
|
||||
The only way to allow for starting again at the next character is to retain the
|
||||
matched part of the subject and try a new complete match.
|
||||
</P>
|
||||
<P>
|
||||
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
|
||||
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
|
||||
facility can be used to pass very long subject strings to the DFA matching
|
||||
|
@ -334,10 +347,9 @@ processing time is needed.
|
|||
<P>
|
||||
<b>Note:</b> If the pattern contains lookbehind assertions, or \K, or starts
|
||||
with \b or \B, the string that is returned for a partial match includes
|
||||
characters that precede the partially matched string itself, because these must
|
||||
be retained when adding on more characters for a subsequent matching attempt.
|
||||
However, in some cases you may need to retain even earlier characters, as
|
||||
discussed in the next section.
|
||||
characters that precede the start of what would be returned for a complete
|
||||
match, because it contains all the characters that were inspected during the
|
||||
partial match.
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">ISSUES WITH MULTI-SEGMENT MATCHING</a><br>
|
||||
<P>
|
||||
|
@ -356,12 +368,35 @@ includes the effect of PCRE_NOTEOL.
|
|||
offsets that are returned for a partial match. However a lookbehind assertion
|
||||
later in the pattern could require even earlier characters to be inspected. You
|
||||
can handle this case by using the PCRE_INFO_MAXLOOKBEHIND option of the
|
||||
<b>pcre_fullinfo()</b> or <b>pcre[16|32]_fullinfo()</b> functions to obtain the length
|
||||
of the largest lookbehind in the pattern. This length is given in characters,
|
||||
not bytes. If you always retain at least that many characters before the
|
||||
partially matched string, all should be well. (Of course, near the start of the
|
||||
subject, fewer characters may be present; in that case all characters should be
|
||||
retained.)
|
||||
<b>pcre_fullinfo()</b> or <b>pcre[16|32]_fullinfo()</b> functions to obtain the
|
||||
length of the longest lookbehind in the pattern. This length is given in
|
||||
characters, not bytes. If you always retain at least that many characters
|
||||
before the partially matched string, all should be well. (Of course, near the
|
||||
start of the subject, fewer characters may be present; in that case all
|
||||
characters should be retained.)
|
||||
</P>
|
||||
<P>
|
||||
From release 8.33, there is a more accurate way of deciding which characters to
|
||||
retain. Instead of subtracting the length of the longest lookbehind from the
|
||||
earliest inspected character (<i>offsets[0]</i>), the match start position
|
||||
(<i>offsets[2]</i>) should be used, and the next match attempt started at the
|
||||
<i>offsets[2]</i> character by setting the <i>startoffset</i> argument of
|
||||
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>.
|
||||
</P>
|
||||
<P>
|
||||
For example, if the pattern "(?<=123)abc" is partially
|
||||
matched against the string "xx123a", the three offset values returned are 2, 6,
|
||||
and 5. This indicates that the matching process that gave a partial match
|
||||
started at offset 5, but the characters "123a" were all inspected. The maximum
|
||||
lookbehind for that pattern is 3, so taking that away from 5 shows that we need
|
||||
only keep "123a", and the next match attempt can be started at offset 3 (that
|
||||
is, at "a") when further characters have been added. When the match start is
|
||||
not the earliest inspected character, <b>pcretest</b> shows it explicitly:
|
||||
<pre>
|
||||
re> "(?<=123)abc"
|
||||
data> xx123a\P\P
|
||||
Partial match at offset 5: 123a
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
3. Because a partial match must always contain at least one character, what
|
||||
|
@ -465,9 +500,9 @@ Cambridge CB2 3QH, England.
|
|||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 24 June 2012
|
||||
Last updated: 02 July 2013
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -13,7 +13,7 @@ from the original man page. If there is any nonsense in it, please consult the
|
|||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS OF POSIX API</a>
|
||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||
<li><a name="TOC3" href="#SEC3">COMPILING A PATTERN</a>
|
||||
<li><a name="TOC4" href="#SEC4">MATCHING NEWLINE CHARACTERS</a>
|
||||
|
@ -23,23 +23,21 @@ man page, in case the conversion went wrong.
|
|||
<li><a name="TOC8" href="#SEC8">AUTHOR</a>
|
||||
<li><a name="TOC9" href="#SEC9">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS OF POSIX API</a><br>
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||
<P>
|
||||
<b>#include <pcreposix.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b>
|
||||
<b>int <i>cflags</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>cflags</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int regexec(regex_t *<i>preg</i>, const char *<i>string</i>,</b>
|
||||
<b>size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b>size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
|
||||
<b>char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
|
||||
<b> size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
|
||||
<b> char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void regfree(regex_t *<i>preg</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||
|
|
|
@ -102,8 +102,8 @@ study data.
|
|||
<br><a name="SEC3" href="#TOC1">RE-USING A PRECOMPILED PATTERN</a><br>
|
||||
<P>
|
||||
Re-using a precompiled pattern is straightforward. Having reloaded it into main
|
||||
memory, called <b>pcre[16|32]_pattern_to_host_byte_order()</b> if necessary,
|
||||
you pass its pointer to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> in
|
||||
memory, called <b>pcre[16|32]_pattern_to_host_byte_order()</b> if necessary, you
|
||||
pass its pointer to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> in
|
||||
the usual way.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -119,6 +119,11 @@ in the
|
|||
documentation.
|
||||
</P>
|
||||
<P>
|
||||
<b>Warning:</b> The tables that <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b> use
|
||||
must be the same as those that were used when the pattern was compiled. If this
|
||||
is not the case, the behaviour is undefined.
|
||||
</P>
|
||||
<P>
|
||||
If you did not provide custom character tables when the pattern was compiled,
|
||||
the pointer in the compiled pattern is NULL, which causes the matching
|
||||
functions to use PCRE's internal tables. Thus, you do not need to take any
|
||||
|
@ -126,9 +131,9 @@ special action at run time in this case.
|
|||
</P>
|
||||
<P>
|
||||
If you saved study data with the compiled pattern, you need to create your own
|
||||
<b>pcre[16|32]_extra</b> data block and set the <i>study_data</i> field to point to the
|
||||
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
|
||||
<i>flags</i> field to indicate that study data is present. Then pass the
|
||||
<b>pcre[16|32]_extra</b> data block and set the <i>study_data</i> field to point
|
||||
to the reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in
|
||||
the <i>flags</i> field to indicate that study data is present. Then pass the
|
||||
<b>pcre[16|32]_extra</b> block to the matching function in the usual way. If the
|
||||
pattern was studied for just-in-time optimization, that data cannot be saved,
|
||||
and so is lost by a save/restore cycle.
|
||||
|
@ -149,9 +154,9 @@ Cambridge CB2 3QH, England.
|
|||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 24 June 2012
|
||||
Last updated: 12 November 2013
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
|
|
@ -29,13 +29,13 @@ man page, in case the conversion went wrong.
|
|||
<li><a name="TOC14" href="#SEC14">ATOMIC GROUPS</a>
|
||||
<li><a name="TOC15" href="#SEC15">COMMENT</a>
|
||||
<li><a name="TOC16" href="#SEC16">OPTION SETTING</a>
|
||||
<li><a name="TOC17" href="#SEC17">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
|
||||
<li><a name="TOC18" href="#SEC18">BACKREFERENCES</a>
|
||||
<li><a name="TOC19" href="#SEC19">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
|
||||
<li><a name="TOC20" href="#SEC20">CONDITIONAL PATTERNS</a>
|
||||
<li><a name="TOC21" href="#SEC21">BACKTRACKING CONTROL</a>
|
||||
<li><a name="TOC22" href="#SEC22">NEWLINE CONVENTIONS</a>
|
||||
<li><a name="TOC23" href="#SEC23">WHAT \R MATCHES</a>
|
||||
<li><a name="TOC17" href="#SEC17">NEWLINE CONVENTION</a>
|
||||
<li><a name="TOC18" href="#SEC18">WHAT \R MATCHES</a>
|
||||
<li><a name="TOC19" href="#SEC19">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
|
||||
<li><a name="TOC20" href="#SEC20">BACKREFERENCES</a>
|
||||
<li><a name="TOC21" href="#SEC21">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
|
||||
<li><a name="TOC22" href="#SEC22">CONDITIONAL PATTERNS</a>
|
||||
<li><a name="TOC23" href="#SEC23">BACKTRACKING CONTROL</a>
|
||||
<li><a name="TOC24" href="#SEC24">CALLOUTS</a>
|
||||
<li><a name="TOC25" href="#SEC25">SEE ALSO</a>
|
||||
<li><a name="TOC26" href="#SEC26">AUTHOR</a>
|
||||
|
@ -65,10 +65,14 @@ documentation. This document contains a quick-reference summary of the syntax.
|
|||
\n newline (hex 0A)
|
||||
\r carriage return (hex 0D)
|
||||
\t tab (hex 09)
|
||||
\0dd character with octal code 0dd
|
||||
\ddd character with octal code ddd, or backreference
|
||||
\o{ddd..} character with octal code ddd..
|
||||
\xhh character with hex code hh
|
||||
\x{hhh..} character with hex code hhh..
|
||||
</PRE>
|
||||
</pre>
|
||||
Note that \0dd is always an octal code, and that \8 and \9 are the literal
|
||||
characters "8" and "9".
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">CHARACTER TYPES</a><br>
|
||||
<P>
|
||||
|
@ -92,9 +96,11 @@ documentation. This document contains a quick-reference summary of the syntax.
|
|||
\W a "non-word" character
|
||||
\X a Unicode extended grapheme cluster
|
||||
</pre>
|
||||
In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
|
||||
characters, even in a UTF mode. However, this can be changed by setting the
|
||||
PCRE_UCP option.
|
||||
By default, \d, \s, and \w match only ASCII characters, even in UTF-8 mode
|
||||
or in the 16- bit and 32-bit libraries. However, if locale-specific matching is
|
||||
happening, \s and \w may also match characters with code points in the range
|
||||
128-255. If the PCRE_UCP option is set, the behaviour of these escape sequences
|
||||
is changed to use Unicode properties and they match many more characters.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
|
||||
<P>
|
||||
|
@ -150,9 +156,13 @@ PCRE_UCP option.
|
|||
<pre>
|
||||
Xan Alphanumeric: union of properties L and N
|
||||
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
||||
Xsp Perl space: property Z or tab, NL, FF, CR
|
||||
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
||||
Xuc Univerally-named character: one that can be
|
||||
represented by a Universal Character Name
|
||||
Xwd Perl word: property Xan or underscore
|
||||
</PRE>
|
||||
</pre>
|
||||
Perl and POSIX space are now the same. Perl added VT to its space character set
|
||||
at release 5.18 and PCRE changed at release 8.34.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
|
||||
<P>
|
||||
|
@ -329,7 +339,8 @@ but some of them use Unicode properties if PCRE_UCP is set. You can use
|
|||
<P>
|
||||
<pre>
|
||||
\K reset start of match
|
||||
</PRE>
|
||||
</pre>
|
||||
\K is honoured in positive assertions, but ignored in negative ones.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">ALTERNATION</a><br>
|
||||
<P>
|
||||
|
@ -372,18 +383,45 @@ but some of them use Unicode properties if PCRE_UCP is set. You can use
|
|||
(?x) extended (ignore white space)
|
||||
(?-...) unset option(s)
|
||||
</pre>
|
||||
The following are recognized only at the start of a pattern or after one of the
|
||||
newline-setting options with similar syntax:
|
||||
The following are recognized only at the very start of a pattern or after one
|
||||
of the newline or \R options with similar syntax. More than one of them may
|
||||
appear.
|
||||
<pre>
|
||||
(*LIMIT_MATCH=d) set the match limit to d (decimal number)
|
||||
(*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
|
||||
(*NO_AUTO_POSSESS) no auto-possessification (PCRE_NO_AUTO_POSSESS)
|
||||
(*NO_START_OPT) no start-match optimization (PCRE_NO_START_OPTIMIZE)
|
||||
(*UTF8) set UTF-8 mode: 8-bit library (PCRE_UTF8)
|
||||
(*UTF16) set UTF-16 mode: 16-bit library (PCRE_UTF16)
|
||||
(*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32)
|
||||
(*UTF) set appropriate UTF mode for the library in use
|
||||
(*UCP) set PCRE_UCP (use Unicode properties for \d etc)
|
||||
</pre>
|
||||
Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the
|
||||
limits set by the caller of pcre_exec(), not increase them.
|
||||
</P>
|
||||
<br><a name="SEC17" href="#TOC1">NEWLINE CONVENTION</a><br>
|
||||
<P>
|
||||
These are recognized only at the very start of the pattern or after option
|
||||
settings with a similar syntax.
|
||||
<pre>
|
||||
(*CR) carriage return only
|
||||
(*LF) linefeed only
|
||||
(*CRLF) carriage return followed by linefeed
|
||||
(*ANYCRLF) all three of the above
|
||||
(*ANY) any Unicode newline sequence
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC17" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
|
||||
<br><a name="SEC18" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||
<P>
|
||||
These are recognized only at the very start of the pattern or after option
|
||||
setting with a similar syntax.
|
||||
<pre>
|
||||
(*BSR_ANYCRLF) CR, LF, or CRLF
|
||||
(*BSR_UNICODE) any Unicode newline sequence
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC19" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?=...) positive look ahead
|
||||
|
@ -393,7 +431,7 @@ newline-setting options with similar syntax:
|
|||
</pre>
|
||||
Each top-level branch of a look behind must be of a fixed length.
|
||||
</P>
|
||||
<br><a name="SEC18" href="#TOC1">BACKREFERENCES</a><br>
|
||||
<br><a name="SEC20" href="#TOC1">BACKREFERENCES</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
\n reference by number (can be ambiguous)
|
||||
|
@ -407,7 +445,7 @@ Each top-level branch of a look behind must be of a fixed length.
|
|||
(?P=name) reference by name (Python)
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC19" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
|
||||
<br><a name="SEC21" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?R) recurse whole pattern
|
||||
|
@ -426,7 +464,7 @@ Each top-level branch of a look behind must be of a fixed length.
|
|||
\g'-n' call subpattern by relative number (PCRE extension)
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC20" href="#TOC1">CONDITIONAL PATTERNS</a><br>
|
||||
<br><a name="SEC22" href="#TOC1">CONDITIONAL PATTERNS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
(?(condition)yes-pattern)
|
||||
|
@ -445,7 +483,7 @@ Each top-level branch of a look behind must be of a fixed length.
|
|||
(?(assert)... assertion condition
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
||||
<br><a name="SEC23" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
||||
<P>
|
||||
The following act immediately they are reached:
|
||||
<pre>
|
||||
|
@ -468,27 +506,6 @@ pattern is not anchored.
|
|||
(*THEN:NAME) equivalent to (*MARK:NAME)(*THEN)
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC22" href="#TOC1">NEWLINE CONVENTIONS</a><br>
|
||||
<P>
|
||||
These are recognized only at the very start of the pattern or after a
|
||||
(*BSR_...), (*UTF8), (*UTF16), (*UTF32) or (*UCP) option.
|
||||
<pre>
|
||||
(*CR) carriage return only
|
||||
(*LF) linefeed only
|
||||
(*CRLF) carriage return followed by linefeed
|
||||
(*ANYCRLF) all three of the above
|
||||
(*ANY) any Unicode newline sequence
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC23" href="#TOC1">WHAT \R MATCHES</a><br>
|
||||
<P>
|
||||
These are recognized only at the very start of the pattern or after a
|
||||
(*...) option that sets the newline convention or a UTF or UCP mode.
|
||||
<pre>
|
||||
(*BSR_ANYCRLF) CR, LF, or CRLF
|
||||
(*BSR_UNICODE) any Unicode newline sequence
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
|
||||
<P>
|
||||
<pre>
|
||||
|
@ -512,9 +529,9 @@ Cambridge CB2 3QH, England.
|
|||
</P>
|
||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 11 November 2012
|
||||
Last updated: 08 January 2014
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
|
|
@ -14,21 +14,22 @@ man page, in case the conversion went wrong.
|
|||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||
<li><a name="TOC2" href="#SEC2">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
|
||||
<li><a name="TOC3" href="#SEC3">COMMAND LINE OPTIONS</a>
|
||||
<li><a name="TOC4" href="#SEC4">DESCRIPTION</a>
|
||||
<li><a name="TOC5" href="#SEC5">PATTERN MODIFIERS</a>
|
||||
<li><a name="TOC6" href="#SEC6">DATA LINES</a>
|
||||
<li><a name="TOC7" href="#SEC7">THE ALTERNATIVE MATCHING FUNCTION</a>
|
||||
<li><a name="TOC8" href="#SEC8">DEFAULT OUTPUT FROM PCRETEST</a>
|
||||
<li><a name="TOC9" href="#SEC9">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a>
|
||||
<li><a name="TOC10" href="#SEC10">RESTARTING AFTER A PARTIAL MATCH</a>
|
||||
<li><a name="TOC11" href="#SEC11">CALLOUTS</a>
|
||||
<li><a name="TOC12" href="#SEC12">NON-PRINTING CHARACTERS</a>
|
||||
<li><a name="TOC13" href="#SEC13">SAVING AND RELOADING COMPILED PATTERNS</a>
|
||||
<li><a name="TOC14" href="#SEC14">SEE ALSO</a>
|
||||
<li><a name="TOC15" href="#SEC15">AUTHOR</a>
|
||||
<li><a name="TOC16" href="#SEC16">REVISION</a>
|
||||
<li><a name="TOC2" href="#SEC2">INPUT DATA FORMAT</a>
|
||||
<li><a name="TOC3" href="#SEC3">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
|
||||
<li><a name="TOC4" href="#SEC4">COMMAND LINE OPTIONS</a>
|
||||
<li><a name="TOC5" href="#SEC5">DESCRIPTION</a>
|
||||
<li><a name="TOC6" href="#SEC6">PATTERN MODIFIERS</a>
|
||||
<li><a name="TOC7" href="#SEC7">DATA LINES</a>
|
||||
<li><a name="TOC8" href="#SEC8">THE ALTERNATIVE MATCHING FUNCTION</a>
|
||||
<li><a name="TOC9" href="#SEC9">DEFAULT OUTPUT FROM PCRETEST</a>
|
||||
<li><a name="TOC10" href="#SEC10">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a>
|
||||
<li><a name="TOC11" href="#SEC11">RESTARTING AFTER A PARTIAL MATCH</a>
|
||||
<li><a name="TOC12" href="#SEC12">CALLOUTS</a>
|
||||
<li><a name="TOC13" href="#SEC13">NON-PRINTING CHARACTERS</a>
|
||||
<li><a name="TOC14" href="#SEC14">SAVING AND RELOADING COMPILED PATTERNS</a>
|
||||
<li><a name="TOC15" href="#SEC15">SEE ALSO</a>
|
||||
<li><a name="TOC16" href="#SEC16">AUTHOR</a>
|
||||
<li><a name="TOC17" href="#SEC17">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||
<P>
|
||||
|
@ -63,25 +64,34 @@ conjunction with the test script and data files that are distributed as part of
|
|||
PCRE, and are unlikely to be of use otherwise. They are all documented here,
|
||||
but without much justification.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
|
||||
<br><a name="SEC2" href="#TOC1">INPUT DATA FORMAT</a><br>
|
||||
<P>
|
||||
Input to <b>pcretest</b> is processed line by line, either by calling the C
|
||||
library's <b>fgets()</b> function, or via the <b>libreadline</b> library (see
|
||||
below). In Unix-like environments, <b>fgets()</b> treats any bytes other than
|
||||
newline as data characters. However, in some Windows environments character 26
|
||||
(hex 1A) causes an immediate end of file, and no further data is read. For
|
||||
maximum portability, therefore, it is safest to use only ASCII characters in
|
||||
<b>pcretest</b> input files.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">PCRE's 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
|
||||
<P>
|
||||
From release 8.30, two separate PCRE libraries can be built. The original one
|
||||
supports 8-bit character strings, whereas the newer 16-bit library supports
|
||||
character strings encoded in 16-bit units. From release 8.32, a third
|
||||
library can be built, supporting character strings encoded in 32-bit units.
|
||||
The <b>pcretest</b> program can be
|
||||
used to test all three libraries. However, it is itself still an 8-bit program,
|
||||
reading 8-bit input and writing 8-bit output. When testing the 16-bit or 32-bit
|
||||
library, the patterns and data strings are converted to 16- or 32-bit format
|
||||
before being passed to the PCRE library functions. Results are converted to
|
||||
8-bit for output.
|
||||
character strings encoded in 16-bit units. From release 8.32, a third library
|
||||
can be built, supporting character strings encoded in 32-bit units. The
|
||||
<b>pcretest</b> program can be used to test all three libraries. However, it is
|
||||
itself still an 8-bit program, reading 8-bit input and writing 8-bit output.
|
||||
When testing the 16-bit or 32-bit library, the patterns and data strings are
|
||||
converted to 16- or 32-bit format before being passed to the PCRE library
|
||||
functions. Results are converted to 8-bit for output.
|
||||
</P>
|
||||
<P>
|
||||
References to functions and structures of the form <b>pcre[16|32]_xx</b> below
|
||||
mean "<b>pcre_xx</b> when using the 8-bit library or <b>pcre16_xx</b> when using
|
||||
the 16-bit library".
|
||||
mean "<b>pcre_xx</b> when using the 8-bit library, <b>pcre16_xx</b> when using
|
||||
the 16-bit library, or <b>pcre32_xx</b> when using the 32-bit library".
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">COMMAND LINE OPTIONS</a><br>
|
||||
<br><a name="SEC4" href="#TOC1">COMMAND LINE OPTIONS</a><br>
|
||||
<P>
|
||||
<b>-8</b>
|
||||
If both the 8-bit library has been built, this option causes the 8-bit library
|
||||
|
@ -110,23 +120,30 @@ internal form is output after compilation.
|
|||
<P>
|
||||
<b>-C</b>
|
||||
Output the version number of the PCRE library, and all available information
|
||||
about the optional features that are included, and then exit. All other options
|
||||
are ignored.
|
||||
about the optional features that are included, and then exit with zero exit
|
||||
code. All other options are ignored.
|
||||
</P>
|
||||
<P>
|
||||
<b>-C</b> <i>option</i>
|
||||
Output information about a specific build-time option, then exit. This
|
||||
functionality is intended for use in scripts such as <b>RunTest</b>. The
|
||||
following options output the value indicated:
|
||||
following options output the value and set the exit code as indicated:
|
||||
<pre>
|
||||
ebcdic-nl the code for LF (= NL) in an EBCDIC environment:
|
||||
0x15 or 0x25
|
||||
0 if used in an ASCII environment
|
||||
linksize the internal link size (2, 3, or 4)
|
||||
exit code is always 0
|
||||
linksize the configured internal link size (2, 3, or 4)
|
||||
exit code is set to the link size
|
||||
newline the default newline setting:
|
||||
CR, LF, CRLF, ANYCRLF, or ANY
|
||||
exit code is always 0
|
||||
bsr the default setting for what \R matches:
|
||||
ANYCRLF or ANY
|
||||
exit code is always 0
|
||||
</pre>
|
||||
The following options output 1 for true or zero for false:
|
||||
The following options output 1 for true or 0 for false, and set the exit code
|
||||
to the same value:
|
||||
<pre>
|
||||
ebcdic compiled for an EBCDIC environment
|
||||
jit just-in-time support is available
|
||||
|
@ -134,8 +151,10 @@ The following options output 1 for true or zero for false:
|
|||
pcre32 the 32-bit library was built
|
||||
pcre8 the 8-bit library was built
|
||||
ucp Unicode property support is available
|
||||
utf UTF-8 and/or UTF-16 and/or UTF-32 support is available
|
||||
</PRE>
|
||||
utf UTF-8 and/or UTF-16 and/or UTF-32 support
|
||||
is available
|
||||
</pre>
|
||||
If an unknown option is given, an error message is output; the exit code is 0.
|
||||
</P>
|
||||
<P>
|
||||
<b>-d</b>
|
||||
|
@ -171,6 +190,11 @@ equivalent to adding <b>/M</b> to each regular expression. The size is given in
|
|||
bytes for both libraries.
|
||||
</P>
|
||||
<P>
|
||||
<b>-O</b>
|
||||
Behave as if each pattern has the <b>/O</b> modifier, that is disable
|
||||
auto-possessification for all patterns.
|
||||
</P>
|
||||
<P>
|
||||
<b>-o</b> <i>osize</i>
|
||||
Set the number of elements in the output vector that is used when calling
|
||||
<b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> to be <i>osize</i>. The
|
||||
|
@ -240,20 +264,25 @@ should never be studied (see the <b>/S</b> pattern modifier below).
|
|||
</P>
|
||||
<P>
|
||||
<b>-t</b>
|
||||
Run each compile, study, and match many times with a timer, and output
|
||||
resulting time per compile or match (in milliseconds). Do not set <b>-m</b> with
|
||||
<b>-t</b>, because you will then get the size output a zillion times, and the
|
||||
timing will be distorted. You can control the number of iterations that are
|
||||
used for timing by following <b>-t</b> with a number (as a separate item on the
|
||||
command line). For example, "-t 1000" would iterate 1000 times. The default is
|
||||
to iterate 500000 times.
|
||||
Run each compile, study, and match many times with a timer, and output the
|
||||
resulting times per compile, study, or match (in milliseconds). Do not set
|
||||
<b>-m</b> with <b>-t</b>, because you will then get the size output a zillion
|
||||
times, and the timing will be distorted. You can control the number of
|
||||
iterations that are used for timing by following <b>-t</b> with a number (as a
|
||||
separate item on the command line). For example, "-t 1000" iterates 1000 times.
|
||||
The default is to iterate 500000 times.
|
||||
</P>
|
||||
<P>
|
||||
<b>-tm</b>
|
||||
This is like <b>-t</b> except that it times only the matching phase, not the
|
||||
compile or study phases.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
<b>-T</b> <b>-TM</b>
|
||||
These behave like <b>-t</b> and <b>-tm</b>, but in addition, at the end of a run,
|
||||
the total times for all compiles, studies, and matches are output.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
If <b>pcretest</b> is given two filename arguments, it reads from the first and
|
||||
writes to the second. If it is given only one filename argument, it reads from
|
||||
|
@ -271,7 +300,7 @@ option states whether or not <b>readline()</b> will be used.
|
|||
<P>
|
||||
The program handles any number of sets of input on a single input file. Each
|
||||
set starts with a regular expression, and continues with any number of data
|
||||
lines to be matched against the pattern.
|
||||
lines to be matched against that pattern.
|
||||
</P>
|
||||
<P>
|
||||
Each data line is matched separately and independently. If you want to do
|
||||
|
@ -310,7 +339,7 @@ backslash, because
|
|||
is interpreted as the first line of a pattern that starts with "abc/", causing
|
||||
pcretest to read the next line as a continuation of the regular expression.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">PATTERN MODIFIERS</a><br>
|
||||
<br><a name="SEC6" href="#TOC1">PATTERN MODIFIERS</a><br>
|
||||
<P>
|
||||
A pattern may be followed by any number of modifiers, which are mostly single
|
||||
characters, though some of these can be qualified by further characters.
|
||||
|
@ -323,6 +352,7 @@ fall into several groups that are described in detail in the following
|
|||
sections.
|
||||
<pre>
|
||||
<b>/8</b> set UTF mode
|
||||
<b>/9</b> set PCRE_NEVER_UTF (locks out UTF mode)
|
||||
<b>/?</b> disable UTF validity check
|
||||
<b>/+</b> show remainder of subject after match
|
||||
<b>/=</b> show all captures (not just those that are set)
|
||||
|
@ -344,7 +374,9 @@ sections.
|
|||
<b>/M</b> show compiled memory size
|
||||
<b>/m</b> set PCRE_MULTILINE
|
||||
<b>/N</b> set PCRE_NO_AUTO_CAPTURE
|
||||
<b>/O</b> set PCRE_NO_AUTO_POSSESS
|
||||
<b>/P</b> use the POSIX wrapper
|
||||
<b>/Q</b> test external stack check function
|
||||
<b>/S</b> study the pattern after compilation
|
||||
<b>/s</b> set PCRE_DOTALL
|
||||
<b>/T</b> select character tables
|
||||
|
@ -395,12 +427,14 @@ options that do not correspond to anything in Perl:
|
|||
<b>/8</b> PCRE_UTF32 ) when using the 32-bit
|
||||
<b>/?</b> PCRE_NO_UTF32_CHECK ) library
|
||||
|
||||
<b>/9</b> PCRE_NEVER_UTF
|
||||
<b>/A</b> PCRE_ANCHORED
|
||||
<b>/C</b> PCRE_AUTO_CALLOUT
|
||||
<b>/E</b> PCRE_DOLLAR_ENDONLY
|
||||
<b>/f</b> PCRE_FIRSTLINE
|
||||
<b>/J</b> PCRE_DUPNAMES
|
||||
<b>/N</b> PCRE_NO_AUTO_CAPTURE
|
||||
<b>/O</b> PCRE_NO_AUTO_POSSESS
|
||||
<b>/U</b> PCRE_UNGREEDY
|
||||
<b>/W</b> PCRE_UCP
|
||||
<b>/X</b> PCRE_EXTRA
|
||||
|
@ -504,7 +538,10 @@ below.
|
|||
The <b>/I</b> modifier requests that <b>pcretest</b> output information about the
|
||||
compiled pattern (whether it is anchored, has a fixed first character, and
|
||||
so on). It does this by calling <b>pcre[16|32]_fullinfo()</b> after compiling a
|
||||
pattern. If the pattern is studied, the results of that are also output.
|
||||
pattern. If the pattern is studied, the results of that are also output. In
|
||||
this output, the word "char" means a non-UTF character, that is, the value of a
|
||||
single data item (8-bit, 16-bit, or 32-bit, depending on the library that is
|
||||
being tested).
|
||||
</P>
|
||||
<P>
|
||||
The <b>/K</b> modifier requests <b>pcretest</b> to show names from backtracking
|
||||
|
@ -538,14 +575,22 @@ successfully studied with the PCRE_STUDY_JIT_COMPILE option, the size of the
|
|||
JIT compiled code is also output.
|
||||
</P>
|
||||
<P>
|
||||
The <b>/Q</b> modifier is used to test the use of <b>pcre_stack_guard</b>. It
|
||||
must be followed by '0' or '1', specifying the return code to be given from an
|
||||
external function that is passed to PCRE and used for stack checking during
|
||||
compilation (see the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
documentation for details).
|
||||
</P>
|
||||
<P>
|
||||
The <b>/S</b> modifier causes <b>pcre[16|32]_study()</b> to be called after the
|
||||
expression has been compiled, and the results used when the expression is
|
||||
matched. There are a number of qualifying characters that may follow <b>/S</b>.
|
||||
They may appear in any order.
|
||||
</P>
|
||||
<P>
|
||||
If <b>S</b> is followed by an exclamation mark, <b>pcre[16|32]_study()</b> is called
|
||||
with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
||||
If <b>/S</b> is followed by an exclamation mark, <b>pcre[16|32]_study()</b> is
|
||||
called with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
||||
<b>pcre_extra</b> block, even when studying discovers no useful information.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -624,7 +669,38 @@ function:
|
|||
The <b>/+</b> modifier works as described above. All other modifiers are
|
||||
ignored.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">DATA LINES</a><br>
|
||||
<br><b>
|
||||
Locking out certain modifiers
|
||||
</b><br>
|
||||
<P>
|
||||
PCRE can be compiled with or without support for certain features such as
|
||||
UTF-8/16/32 or Unicode properties. Accordingly, the standard tests are split up
|
||||
into a number of different files that are selected for running depending on
|
||||
which features are available. When updating the tests, it is all too easy to
|
||||
put a new test into the wrong file by mistake; for example, to put a test that
|
||||
requires UTF support into a file that is used when it is not available. To help
|
||||
detect such mistakes as early as possible, there is a facility for locking out
|
||||
specific modifiers. If an input line for <b>pcretest</b> starts with the string
|
||||
"< forbid " the following sequence of characters is taken as a list of
|
||||
forbidden modifiers. For example, in the test files that must not use UTF or
|
||||
Unicode property support, this line appears:
|
||||
<pre>
|
||||
< forbid 8W
|
||||
</pre>
|
||||
This locks out the /8 and /W modifiers. An immediate error is given if they are
|
||||
subsequently encountered. If the character string contains < but not >, all the
|
||||
multi-character modifiers that begin with < are locked out. Otherwise, such
|
||||
modifiers must be explicitly listed, for example:
|
||||
<pre>
|
||||
< forbid <JS><cr>
|
||||
</pre>
|
||||
There must be a single space between < and "forbid" for this feature to be
|
||||
recognised. If there is not, the line is interpreted either as a request to
|
||||
re-load a pre-compiled pattern (see "SAVING AND RELOADING COMPILED PATTERNS"
|
||||
below) or, if there is a another < character, as a pattern that uses < as its
|
||||
delimiter.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">DATA LINES</a><br>
|
||||
<P>
|
||||
Before each data line is passed to <b>pcre[16|32]_exec()</b>, leading and trailing
|
||||
white space is removed, and it is then scanned for \ escapes. Some of these
|
||||
|
@ -644,6 +720,7 @@ recognized:
|
|||
\v vertical tab (\x0b)
|
||||
\nnn octal character (up to 3 octal digits); always
|
||||
a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
|
||||
\o{dd...} octal character (any number of octal digits}
|
||||
\xhh hexadecimal byte (up to 2 hex digits)
|
||||
\x{hh...} hexadecimal character (any number of hex digits)
|
||||
\A pass the PCRE_ANCHORED option to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b>
|
||||
|
@ -748,7 +825,7 @@ API to be used, the only option-setting sequences that have any effect are \B,
|
|||
\N, and \Z, causing REG_NOTBOL, REG_NOTEMPTY, and REG_NOTEOL, respectively,
|
||||
to be passed to <b>regexec()</b>.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||
<br><a name="SEC8" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||
<P>
|
||||
By default, <b>pcretest</b> uses the standard PCRE matching function,
|
||||
<b>pcre[16|32]_exec()</b> to match each data line. PCRE also supports an
|
||||
|
@ -765,7 +842,7 @@ This function finds all possible matches at a given point. If, however, the \F
|
|||
escape sequence is present in the data line, it stops after the first match is
|
||||
found. This is always the shortest possible match.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">DEFAULT OUTPUT FROM PCRETEST</a><br>
|
||||
<br><a name="SEC9" href="#TOC1">DEFAULT OUTPUT FROM PCRETEST</a><br>
|
||||
<P>
|
||||
This section describes the output when the normal matching function,
|
||||
<b>pcre[16|32]_exec()</b>, is being used.
|
||||
|
@ -856,7 +933,7 @@ prompt is used for continuations), data lines may not. However newlines can be
|
|||
included in data by means of the \n escape (or \r, \r\n, etc., depending on
|
||||
the newline sequence setting).
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||
<br><a name="SEC10" href="#TOC1">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||
<P>
|
||||
When the alternative matching function, <b>pcre[16|32]_dfa_exec()</b>, is used (by
|
||||
means of the \D escape sequence or the <b>-dfa</b> command line option), the
|
||||
|
@ -892,7 +969,7 @@ at the end of the longest match. For example:
|
|||
Since the matching function does not support substring capture, the escape
|
||||
sequences that are concerned with captured substrings are not relevant.
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">RESTARTING AFTER A PARTIAL MATCH</a><br>
|
||||
<br><a name="SEC11" href="#TOC1">RESTARTING AFTER A PARTIAL MATCH</a><br>
|
||||
<P>
|
||||
When the alternative matching function has given the PCRE_ERROR_PARTIAL return,
|
||||
indicating that the subject partially matched the pattern, you can restart the
|
||||
|
@ -909,7 +986,7 @@ For further information about partial matching, see the
|
|||
<a href="pcrepartial.html"><b>pcrepartial</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">CALLOUTS</a><br>
|
||||
<br><a name="SEC12" href="#TOC1">CALLOUTS</a><br>
|
||||
<P>
|
||||
If the pattern contains any callout requests, <b>pcretest</b>'s callout function
|
||||
is called during matching. This works with both matching functions. By default,
|
||||
|
@ -970,7 +1047,7 @@ the
|
|||
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">NON-PRINTING CHARACTERS</a><br>
|
||||
<br><a name="SEC13" href="#TOC1">NON-PRINTING CHARACTERS</a><br>
|
||||
<P>
|
||||
When <b>pcretest</b> is outputting text in the compiled version of a pattern,
|
||||
bytes other than 32-126 are always treated as non-printing characters are are
|
||||
|
@ -982,7 +1059,7 @@ string, it behaves in the same way, unless a different locale has been set for
|
|||
the pattern (using the <b>/L</b> modifier). In this case, the <b>isprint()</b>
|
||||
function to distinguish printing and non-printing characters.
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">SAVING AND RELOADING COMPILED PATTERNS</a><br>
|
||||
<br><a name="SEC14" href="#TOC1">SAVING AND RELOADING COMPILED PATTERNS</a><br>
|
||||
<P>
|
||||
The facilities described in this section are not available when the POSIX
|
||||
interface to PCRE is being used, that is, when the <b>/P</b> pattern modifier is
|
||||
|
@ -1013,10 +1090,9 @@ writing the file, <b>pcretest</b> expects to read a new pattern.
|
|||
</P>
|
||||
<P>
|
||||
A saved pattern can be reloaded into <b>pcretest</b> by specifying < and a file
|
||||
name instead of a pattern. The name of the file must not contain a < character,
|
||||
as otherwise <b>pcretest</b> will interpret the line as a pattern delimited by <
|
||||
characters.
|
||||
For example:
|
||||
name instead of a pattern. There must be no space between < and the file name,
|
||||
which must not contain a < character, as otherwise <b>pcretest</b> will
|
||||
interpret the line as a pattern delimited by < characters. For example:
|
||||
<pre>
|
||||
re> </some/file
|
||||
Compiled pattern loaded from /some/file
|
||||
|
@ -1055,14 +1131,14 @@ string using a reloaded pattern is likely to cause <b>pcretest</b> to crash.
|
|||
Finally, if you attempt to load a file that is not in the correct format, the
|
||||
result is undefined.
|
||||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">SEE ALSO</a><br>
|
||||
<br><a name="SEC15" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre</b>(3), <b>pcre16</b>(3), <b>pcre32</b>(3), <b>pcreapi</b>(3),
|
||||
<b>pcrecallout</b>(3),
|
||||
<b>pcrejit</b>, <b>pcrematching</b>(3), <b>pcrepartial</b>(d),
|
||||
<b>pcrepattern</b>(3), <b>pcreprecompile</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">AUTHOR</a><br>
|
||||
<br><a name="SEC16" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
|
@ -1071,11 +1147,11 @@ University Computing Service
|
|||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC16" href="#TOC1">REVISION</a><br>
|
||||
<br><a name="SEC17" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 10 September 2012
|
||||
Last updated: 09 February 2014
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
|
|
@ -85,7 +85,9 @@ place. From release 7.3 of PCRE, the check is according the rules of RFC 3629,
|
|||
which are themselves derived from the Unicode specification. Earlier releases
|
||||
of PCRE followed the rules of RFC 2279, which allows the full range of 31-bit
|
||||
values (0 to 0x7FFFFFFF). The current check allows only values in the range U+0
|
||||
to U+10FFFF, excluding the surrogate area and the non-characters.
|
||||
to U+10FFFF, excluding the surrogate area. (From release 8.33 the so-called
|
||||
"non-character" code points are no longer excluded because Unicode corrigendum
|
||||
#9 makes it clear that they should not be.)
|
||||
</P>
|
||||
<P>
|
||||
Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
|
||||
|
@ -96,10 +98,6 @@ surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
|
|||
UTF-32.)
|
||||
</P>
|
||||
<P>
|
||||
Also excluded are the "Non-Character" code points, which are U+FDD0 to U+FDEF
|
||||
and the last two code points in each plane, U+??FFFE and U+??FFFF.
|
||||
</P>
|
||||
<P>
|
||||
If an invalid UTF-8 string is passed to PCRE, an error return is given. At
|
||||
compile time, the only additional information is the offset to the first byte
|
||||
of the failing character. The run-time functions <b>pcre_exec()</b> and
|
||||
|
@ -135,10 +133,6 @@ U+D800 to U+DFFF are independent code points. Values in the surrogate range
|
|||
must be used in pairs in the correct manner.
|
||||
</P>
|
||||
<P>
|
||||
Excluded are the "Non-Character" code points, which are U+FDD0 to U+FDEF
|
||||
and the last two code points in each plane, U+??FFFE and U+??FFFF.
|
||||
</P>
|
||||
<P>
|
||||
If an invalid UTF-16 string is passed to PCRE, an error return is given. At
|
||||
compile time, the only additional information is the offset to the first data
|
||||
unit of the failing character. The run-time functions <b>pcre16_exec()</b> and
|
||||
|
@ -160,9 +154,7 @@ Validity of UTF-32 strings
|
|||
When you set the PCRE_UTF32 flag, the strings of 32-bit data units that are
|
||||
passed as patterns and subjects are (by default) checked for validity on entry
|
||||
to the relevant functions. This check allows only values in the range U+0
|
||||
to U+10FFFF, excluding the surrogate area U+D800 to U+DFFF, and the
|
||||
"Non-Character" code points, which are U+FDD0 to U+FDEF and the last two
|
||||
characters in each plane, U+??FFFE and U+??FFFF.
|
||||
to U+10FFFF, excluding the surrogate area U+D800 to U+DFFF.
|
||||
</P>
|
||||
<P>
|
||||
If an invalid UTF-32 string is passed to PCRE, an error return is given. At
|
||||
|
@ -261,9 +253,9 @@ Cambridge CB2 3QH, England.
|
|||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 11 November 2012
|
||||
Last updated: 27 February 2013
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
|
|
@ -11,27 +11,29 @@
|
|||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>Perl-compatible Regular Expressions (PCRE)</h1>
|
||||
<p>
|
||||
The HTML documentation for PCRE comprises the following pages:
|
||||
The HTML documentation for PCRE consists of a number of pages that are listed
|
||||
below in alphabetical order. If you are new to PCRE, please read the first one
|
||||
first.
|
||||
</p>
|
||||
|
||||
<table>
|
||||
<tr><td><a href="pcre.html">pcre</a></td>
|
||||
<td> Introductory page</td></tr>
|
||||
|
||||
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
||||
<td> Information about the installation configuration</td></tr>
|
||||
|
||||
<tr><td><a href="pcre16.html">pcre16</a></td>
|
||||
<td> Discussion of the 16-bit PCRE library</td></tr>
|
||||
|
||||
<tr><td><a href="pcre32.html">pcre32</a></td>
|
||||
<td> Discussion of the 32-bit PCRE library</td></tr>
|
||||
|
||||
<tr><td><a href="pcre-config.html">pcre-config</a></td>
|
||||
<td> Information about the installation configuration</td></tr>
|
||||
|
||||
<tr><td><a href="pcreapi.html">pcreapi</a></td>
|
||||
<td> PCRE's native API</td></tr>
|
||||
|
||||
<tr><td><a href="pcrebuild.html">pcrebuild</a></td>
|
||||
<td> Options for building PCRE</td></tr>
|
||||
<td> Building PCRE</td></tr>
|
||||
|
||||
<tr><td><a href="pcrecallout.html">pcrecallout</a></td>
|
||||
<td> The <i>callout</i> facility</td></tr>
|
||||
|
@ -67,7 +69,7 @@ The HTML documentation for PCRE comprises the following pages:
|
|||
<td> Some comments on performance</td></tr>
|
||||
|
||||
<tr><td><a href="pcreposix.html">pcreposix</a></td>
|
||||
<td> The POSIX API to the PCRE library</td></tr>
|
||||
<td> The POSIX API to the PCRE 8-bit library</td></tr>
|
||||
|
||||
<tr><td><a href="pcreprecompile.html">pcreprecompile</a></td>
|
||||
<td> How to save and re-use compiled patterns</td></tr>
|
||||
|
@ -118,13 +120,13 @@ functions.
|
|||
<td> Match a compiled pattern to a subject string
|
||||
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
|
||||
<td> Free study data</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
|
||||
<td> Match a compiled pattern to a subject string
|
||||
(Perl compatible)</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
|
||||
<td> Free study data</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_free_substring.html">pcre_free_substring</a></td>
|
||||
<td> Free extracted substring</td></tr>
|
||||
|
||||
|
@ -140,14 +142,17 @@ functions.
|
|||
<tr><td><a href="pcre_get_stringnumber.html">pcre_get_stringnumber</a></td>
|
||||
<td> Convert captured string name to number</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_stringtable_entries.html">pcre_get_stringtable_entries</a></td>
|
||||
<td> Find table entries for given string name</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_substring.html">pcre_get_substring</a></td>
|
||||
<td> Extract numbered substring into new memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_get_substring_list.html">pcre_get_substring_list</a></td>
|
||||
<td> Extract all substrings into new memory</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_info.html">pcre_info</a></td>
|
||||
<td> Obsolete information extraction function</td></tr>
|
||||
<tr><td><a href="pcre_jit_exec.html">pcre_jit_exec</a></td>
|
||||
<td> Fast path interface to JIT matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
|
||||
<td> Create a stack for JIT matching</td></tr>
|
||||
|
|
|
@ -4,11 +4,11 @@ pcre-config - program to return PCRE configuration
|
|||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
||||
.ti +5n
|
||||
.B [--libs16] [--libs32] [--libs-cpp] [--libs-posix]
|
||||
.ti +5n
|
||||
.B [--cflags] [--cflags-posix]
|
||||
.B " [--libs16] [--libs32] [--libs-cpp] [--libs-posix]"
|
||||
.B " [--cflags] [--cflags-posix]"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
PCRE-CONFIG(1) PCRE-CONFIG(1)
|
||||
PCRE-CONFIG(1) General Commands Manual PCRE-CONFIG(1)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE 3 "11 November 2012" "PCRE 8.32"
|
||||
.TH PCRE 3 "08 January 2014" "PCRE 8.35"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH INTRODUCTION
|
||||
|
@ -19,9 +19,9 @@ built. The majority of the work to make this possible was done by Zoltan
|
|||
Herczeg.
|
||||
.P
|
||||
Starting with release 8.32 it is possible to compile a third separate PCRE
|
||||
library, which supports 32-bit character strings (including
|
||||
UTF-32 strings). The build process allows any set of the 8-, 16- and 32-bit
|
||||
libraries. The work to make this possible was done by Christian Persch.
|
||||
library that supports 32-bit character strings (including UTF-32 strings). The
|
||||
build process allows any combination of the 8-, 16- and 32-bit libraries. The
|
||||
work to make this possible was done by Christian Persch.
|
||||
.P
|
||||
The three libraries contain identical sets of functions, except that the names
|
||||
in the 16-bit library start with \fBpcre16_\fP instead of \fBpcre_\fP, and the
|
||||
|
@ -44,7 +44,7 @@ The current implementation of PCRE corresponds approximately with Perl 5.12,
|
|||
including support for UTF-8/16/32 encoded strings and Unicode general category
|
||||
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
||||
enabled; it is not the default. The Unicode tables correspond to Unicode
|
||||
release 6.2.0.
|
||||
release 6.3.0.
|
||||
.P
|
||||
In addition to the Perl-compatible matching function, PCRE contains an
|
||||
alternative function that matches the same compiled patterns in a different
|
||||
|
@ -68,6 +68,7 @@ in the \fIContrib\fP directory at the primary FTP site, which is:
|
|||
.\" HTML <a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre">
|
||||
.\" </a>
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
|
||||
.\"
|
||||
.P
|
||||
Details of exactly which Perl regular expression features are and are not
|
||||
supported by PCRE are given in separate documents. See the
|
||||
|
@ -95,8 +96,17 @@ available. The features themselves are described in the
|
|||
\fBpcrebuild\fP
|
||||
.\"
|
||||
page. Documentation about building PCRE for various operating systems can be
|
||||
found in the \fBREADME\fP and \fBNON-AUTOTOOLS_BUILD\fP files in the source
|
||||
distribution.
|
||||
found in the
|
||||
.\" HTML <a href="README.txt">
|
||||
.\" </a>
|
||||
\fBREADME\fP
|
||||
.\"
|
||||
and
|
||||
.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
|
||||
.\" </a>
|
||||
\fBNON-AUTOTOOLS_BUILD\fP
|
||||
.\"
|
||||
files in the source distribution.
|
||||
.P
|
||||
The libraries contains a number of undocumented internal functions and data
|
||||
tables that are used by more than one of the exported external functions, but
|
||||
|
@ -121,8 +131,11 @@ checked for UTF-8 validity. If the data string is very long, such a check might
|
|||
use sufficiently many resources as to cause your application to lose
|
||||
performance.
|
||||
.P
|
||||
The best way of guarding against this possibility is to use the
|
||||
One way of guarding against this possibility is to use the
|
||||
\fBpcre_fullinfo()\fP function to check the compiled pattern's options for UTF.
|
||||
Alternatively, from release 8.33, you can set the PCRE_NEVER_UTF option at
|
||||
compile time. This causes an compile time error if a pattern contains a
|
||||
UTF-setting sequence.
|
||||
.P
|
||||
If your application is one that supports UTF, be aware that validity checking
|
||||
can take time. If the same data string is to be matched many times, you can use
|
||||
|
@ -145,15 +158,18 @@ page.
|
|||
The user documentation for PCRE comprises a number of different sections. In
|
||||
the "man" format, each of these is a separate "man page". In the HTML format,
|
||||
each is a separate page, linked from the index page. In the plain text format,
|
||||
all the sections, except the \fBpcredemo\fP section, are concatenated, for ease
|
||||
of searching. The sections are as follows:
|
||||
the descriptions of the \fBpcregrep\fP and \fBpcretest\fP programs are in files
|
||||
called \fBpcregrep.txt\fP and \fBpcretest.txt\fP, respectively. The remaining
|
||||
sections, except for the \fBpcredemo\fP section (which is a program listing),
|
||||
are concatenated in \fBpcre.txt\fP, for ease of searching. The sections are as
|
||||
follows:
|
||||
.sp
|
||||
pcre this document
|
||||
pcre-config show PCRE installation configuration information
|
||||
pcre16 details of the 16-bit library
|
||||
pcre32 details of the 32-bit library
|
||||
pcre-config show PCRE installation configuration information
|
||||
pcreapi details of PCRE's native C API
|
||||
pcrebuild options for building PCRE
|
||||
pcrebuild building PCRE
|
||||
pcrecallout details of the callout feature
|
||||
pcrecompat discussion of Perl compatibility
|
||||
pcrecpp details of the C++ wrapper for the 8-bit library
|
||||
|
@ -175,8 +191,8 @@ of searching. The sections are as follows:
|
|||
pcretest description of the \fBpcretest\fP testing command
|
||||
pcreunicode discussion of Unicode and UTF-8/16/32 support
|
||||
.sp
|
||||
In addition, in the "man" and HTML formats, there is a short page for each
|
||||
C library function, listing its arguments and results.
|
||||
In the "man" and HTML formats, there is also a short page for each C library
|
||||
function, listing its arguments and results.
|
||||
.
|
||||
.
|
||||
.SH AUTHOR
|
||||
|
@ -197,6 +213,6 @@ two digits 10, at the domain cam.ac.uk.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 11 November 2012
|
||||
Copyright (c) 1997-2012 University of Cambridge.
|
||||
Last updated: 08 January 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
.fi
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE 3 "08 November 2012" "PCRE 8.32"
|
||||
.TH PCRE 3 "12 May 2013" "PCRE 8.33"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
|
@ -8,140 +8,120 @@ PCRE - Perl-compatible regular expressions
|
|||
.SH "PCRE 16-BIT API BASIC FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.SM
|
||||
.nf
|
||||
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B int *\fIerrorcodeptr\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " int *\fIerrorcodeptr\fP,"
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP);"
|
||||
.sp
|
||||
.B void pcre16_free_study(pcre16_extra *\fIextra\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||
.sp
|
||||
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 16-BIT API STRING EXTRACTION FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||
.B " PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
|
||||
.ti +5n
|
||||
.B int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,"
|
||||
.B " int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIname\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIname\fP);
|
||||
.sp
|
||||
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
|
||||
.ti +5n
|
||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR16 **\fIlistptr\fP);"
|
||||
.PP
|
||||
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR16 **\fIlistptr\fP);"
|
||||
.sp
|
||||
.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP);
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 16-BIT API AUXILIARY FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre16_jit_stack_free(pcre16_jit_stack *\fIstack\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
||||
.ti +5n
|
||||
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||
.PP
|
||||
.B " pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||
.sp
|
||||
.B const unsigned char *pcre16_maketables(void);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||
.sp
|
||||
.B int pcre16_refcount(pcre16 *\fIcode\fP, int \fIadjust\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B const char *pcre16_version(void);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
||||
.B " pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 16-BIT API INDIRECTED FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B void *(*pcre16_malloc)(size_t);
|
||||
.PP
|
||||
.sp
|
||||
.B void (*pcre16_free)(void *);
|
||||
.PP
|
||||
.sp
|
||||
.B void *(*pcre16_stack_malloc)(size_t);
|
||||
.PP
|
||||
.sp
|
||||
.B void (*pcre16_stack_free)(void *);
|
||||
.PP
|
||||
.sp
|
||||
.B int (*pcre16_callout)(pcre16_callout_block *);
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 16-BIT API 16-BIT-ONLY FUNCTION"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
|
||||
.ti +5n
|
||||
.B int \fIkeep_boms\fP);
|
||||
.B " PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,"
|
||||
.B " int \fIkeep_boms\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "THE PCRE 16-BIT LIBRARY"
|
||||
|
@ -246,8 +226,9 @@ buffer, including the zero terminator if the string was zero-terminated.
|
|||
.SH "SUBJECT STRING OFFSETS"
|
||||
.rs
|
||||
.sp
|
||||
The offsets within subject strings that are returned by the matching functions
|
||||
are in 16-bit units rather than bytes.
|
||||
The lengths and starting offsets of subject strings must be specified in 16-bit
|
||||
data units, and the offsets within subject strings that are returned by the
|
||||
matching functions are in also 16-bit units rather than bytes.
|
||||
.
|
||||
.
|
||||
.SH "NAMED SUBPATTERNS"
|
||||
|
@ -385,6 +366,6 @@ Cambridge CB2 3QH, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 08 November 2012
|
||||
Copyright (c) 1997-2012 University of Cambridge.
|
||||
Last updated: 12 May 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE 3 "08 November 2012" "PCRE 8.32"
|
||||
.TH PCRE 3 "12 May 2013" "PCRE 8.33"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
|
@ -8,140 +8,119 @@ PCRE - Perl-compatible regular expressions
|
|||
.SH "PCRE 32-BIT API BASIC FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.SM
|
||||
.nf
|
||||
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B int *\fIerrorcodeptr\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " int *\fIerrorcodeptr\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP);"
|
||||
.sp
|
||||
.B void pcre32_free_study(pcre32_extra *\fIextra\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||
.sp
|
||||
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 32-BIT API STRING EXTRACTION FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||
.B " PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
|
||||
.ti +5n
|
||||
.B int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,"
|
||||
.B " int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIname\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR32 \fIname\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
|
||||
.ti +5n
|
||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
|
||||
.PP
|
||||
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR32 **\fIlistptr\fP);"
|
||||
.sp
|
||||
.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP);
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 32-BIT API AUXILIARY FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre32_jit_stack_free(pcre32_jit_stack *\fIstack\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
||||
.ti +5n
|
||||
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||
.PP
|
||||
.B " pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||
.sp
|
||||
.B const unsigned char *pcre32_maketables(void);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||
.sp
|
||||
.B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B const char *pcre32_version(void);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
||||
.B " pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 32-BIT API INDIRECTED FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B void *(*pcre32_malloc)(size_t);
|
||||
.PP
|
||||
.sp
|
||||
.B void (*pcre32_free)(void *);
|
||||
.PP
|
||||
.sp
|
||||
.B void *(*pcre32_stack_malloc)(size_t);
|
||||
.PP
|
||||
.sp
|
||||
.B void (*pcre32_stack_free)(void *);
|
||||
.PP
|
||||
.sp
|
||||
.B int (*pcre32_callout)(pcre32_callout_block *);
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 32-BIT API 32-BIT-ONLY FUNCTION"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
|
||||
.ti +5n
|
||||
.B int \fIkeep_boms\fP);
|
||||
.B " PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,"
|
||||
.B " int \fIkeep_boms\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "THE PCRE 32-BIT LIBRARY"
|
||||
|
@ -246,8 +225,9 @@ buffer, including the zero terminator if the string was zero-terminated.
|
|||
.SH "SUBJECT STRING OFFSETS"
|
||||
.rs
|
||||
.sp
|
||||
The offsets within subject strings that are returned by the matching functions
|
||||
are in 32-bit units rather than bytes.
|
||||
The lengths and starting offsets of subject strings must be specified in 32-bit
|
||||
data units, and the offsets within subject strings that are returned by the
|
||||
matching functions are in also 32-bit units rather than bytes.
|
||||
.
|
||||
.
|
||||
.SH "NAMED SUBPATTERNS"
|
||||
|
@ -384,6 +364,6 @@ Cambridge CB2 3QH, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 08 November 2012
|
||||
Copyright (c) 1997-2012 University of Cambridge.
|
||||
Last updated: 12 May 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
|
||||
.ti +5n
|
||||
.B pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||
.PP
|
||||
.B " pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||
.sp
|
||||
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
||||
.ti +5n
|
||||
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||
.PP
|
||||
.B " pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||
.sp
|
||||
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
||||
.ti +5n
|
||||
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||
.B " pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE_COMPILE 3 "24 June 2012" "PCRE 8.30"
|
||||
.TH PCRE_COMPILE 3 "01 October 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
|
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -56,6 +51,7 @@ The option bits are:
|
|||
PCRE_FIRSTLINE Force matching to be before newline
|
||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||
PCRE_MULTILINE ^ and $ match newlines within data
|
||||
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||
sequences
|
||||
|
@ -64,6 +60,8 @@ The option bits are:
|
|||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||
theses (named ones available)
|
||||
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||
validity (only relevant if
|
||||
PCRE_UTF16 is set)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE_COMPILE2 3 "24 June 2012" "PCRE 8.30"
|
||||
.TH PCRE_COMPILE2 3 "01 October 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B int *\fIerrorcodeptr\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " int *\fIerrorcodeptr\fP,"
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B int *\fIerrorcodeptr\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " int *\fIerrorcodeptr\fP,"
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B int *\fIerrorcodeptr\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.B " int *\fIerrorcodeptr\fP,£
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -64,6 +56,7 @@ The option bits are:
|
|||
PCRE_FIRSTLINE Force matching to be before newline
|
||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||
PCRE_MULTILINE ^ and $ match newlines within data
|
||||
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||
sequences
|
||||
|
@ -72,6 +65,8 @@ The option bits are:
|
|||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||
theses (named ones available)
|
||||
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||
validity (only relevant if
|
||||
PCRE_UTF16 is set)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE_CONFIG 3 "24 June 2012" "PCRE 8.30"
|
||||
.TH PCRE_CONFIG 3 "05 November 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
|
@ -33,6 +33,7 @@ point to an unsigned long integer. The available codes are:
|
|||
target architecture for the JIT compiler,
|
||||
or NULL if there is no JIT support
|
||||
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
||||
PCRE_CONFIG_PARENS_LIMIT Parentheses nesting limit
|
||||
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
||||
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
||||
Internal recursion depth limit
|
||||
|
|
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
||||
.ti +5n
|
||||
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||
.B " char *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||
.B " PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||
.B " PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
|
|
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
|
||||
.ti +5n
|
||||
.B int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,"
|
||||
.B " int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
|
||||
.ti +5n
|
||||
.B int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,"
|
||||
.B " int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
|
||||
.ti +5n
|
||||
.B int \fIbuffersize\fP);
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,"
|
||||
.B " int \fIbuffersize\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE_DFA_EXEC 3 "24 June 2012" "PCRE 8.30"
|
||||
.TH PCRE_DFA_EXEC 3 "12 May 2013" "PCRE 8.33"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||
.sp
|
||||
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||
.sp
|
||||
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -44,16 +36,17 @@ are:
|
|||
\fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
|
||||
or is NULL
|
||||
\fIsubject\fP Points to the subject string
|
||||
\fIlength\fP Length of the subject string, in bytes
|
||||
\fIstartoffset\fP Offset in bytes in the subject at which to
|
||||
start matching
|
||||
\fIlength\fP Length of the subject string
|
||||
\fIstartoffset\fP Offset in the subject at which to start matching
|
||||
\fIoptions\fP Option bits
|
||||
\fIovector\fP Points to a vector of ints for result offsets
|
||||
\fIovecsize\fP Number of elements in the vector
|
||||
\fIworkspace\fP Points to a vector of ints used as working space
|
||||
\fIwscount\fP Number of elements in the vector
|
||||
.sp
|
||||
The options are:
|
||||
The units for \fIlength\fP and \fIstartoffset\fP are bytes for
|
||||
\fBpcre_exec()\fP, 16-bit data items for \fBpcre16_exec()\fP, and 32-bit items
|
||||
for \fBpcre32_exec()\fP. The options are:
|
||||
.sp
|
||||
PCRE_ANCHORED Match only at the first position
|
||||
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE_EXEC 3 "24 June 2012" "PCRE 8.30"
|
||||
.TH PCRE_EXEC 3 "12 May 2013" "PCRE 8.33"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
|
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||
.sp
|
||||
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||
.sp
|
||||
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -36,14 +31,15 @@ offsets to captured substrings. Its arguments are:
|
|||
\fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
|
||||
or is NULL
|
||||
\fIsubject\fP Points to the subject string
|
||||
\fIlength\fP Length of the subject string, in bytes
|
||||
\fIstartoffset\fP Offset in bytes in the subject at which to
|
||||
start matching
|
||||
\fIlength\fP Length of the subject string
|
||||
\fIstartoffset\fP Offset in the subject at which to start matching
|
||||
\fIoptions\fP Option bits
|
||||
\fIovector\fP Points to a vector of ints for result offsets
|
||||
\fIovecsize\fP Number of elements in the vector (a multiple of 3)
|
||||
.sp
|
||||
The options are:
|
||||
The units for \fIlength\fP and \fIstartoffset\fP are bytes for
|
||||
\fBpcre_exec()\fP, 16-bit data items for \fBpcre16_exec()\fP, and 32-bit items
|
||||
for \fBpcre32_exec()\fP. The options are:
|
||||
.sp
|
||||
PCRE_ANCHORED Match only at the first position
|
||||
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
|
||||
|
|
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||
.sp
|
||||
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||
.sp
|
||||
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
|
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIstringptr\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||
.B " const char **\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
|
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIname\fP);
|
||||
.PP
|
||||
.B " const char *\fIname\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIname\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIname\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIname\fP);
|
||||
.B " PCRE_SPTR32 \fIname\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
|
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
|
||||
.PP
|
||||
.B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
|
||||
.B " PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
|
|
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIstringptr\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||
.B " const char **\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
|
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_get_substring_list(const char *\fIsubject\fP,
|
||||
.ti +5n
|
||||
.B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
|
||||
.PP
|
||||
.B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
|
||||
.ti +5n
|
||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR16 **\fIlistptr\fP);"
|
||||
.PP
|
||||
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR16 **\fIlistptr\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
|
||||
.ti +5n
|
||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
|
||||
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR32 **\fIlistptr\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
|
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_jit_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B pcre_jit_stack *\fIjstack\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||
.sp
|
||||
.B int pcre16_jit_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B pcre_jit_stack *\fIjstack\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||
.sp
|
||||
.B int pcre32_jit_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B pcre_jit_stack *\fIjstack\fP);
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
|
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP,
|
||||
.ti +5n
|
||||
.B int \fImaxsize\fP);
|
||||
.PP
|
||||
.B " int \fImaxsize\fP);"
|
||||
.sp
|
||||
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP,
|
||||
.ti +5n
|
||||
.B int \fImaxsize\fP);
|
||||
.PP
|
||||
.B " int \fImaxsize\fP);"
|
||||
.sp
|
||||
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP,
|
||||
.ti +5n
|
||||
.B int \fImaxsize\fP);
|
||||
.B " int \fImaxsize\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
|
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
||||
.PP
|
||||
.B " pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||
.sp
|
||||
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
||||
.PP
|
||||
.B " pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||
.sp
|
||||
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
||||
.B " pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
|
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP);"
|
||||
.sp
|
||||
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP);"
|
||||
.sp
|
||||
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP);
|
||||
.B " const char **\fIerrptr\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
|
|
@ -6,12 +6,11 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,
|
||||
.ti +5n
|
||||
.B int \fIkeep_boms\fP);
|
||||
.B " PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,"
|
||||
.B " int \fIkeep_boms\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
|
|
|
@ -6,12 +6,11 @@ PCRE - Perl-compatible regular expressions
|
|||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,
|
||||
.ti +5n
|
||||
.B int \fIkeep_boms\fP);
|
||||
.B " PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,"
|
||||
.B " int \fIkeep_boms\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,24 +1,54 @@
|
|||
.TH PCREBUILD 3 "30 October 2012" "PCRE 8.32"
|
||||
.TH PCREBUILD 3 "12 May 2013" "PCRE 8.33"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.
|
||||
.
|
||||
.SH "BUILDING PCRE"
|
||||
.rs
|
||||
.sp
|
||||
PCRE is distributed with a \fBconfigure\fP script that can be used to build the
|
||||
library in Unix-like environments using the applications known as Autotools.
|
||||
Also in the distribution are files to support building using \fBCMake\fP
|
||||
instead of \fBconfigure\fP. The text file
|
||||
.\" HTML <a href="README.txt">
|
||||
.\" </a>
|
||||
\fBREADME\fP
|
||||
.\"
|
||||
contains general information about building with Autotools (some of which is
|
||||
repeated below), and also has some comments about building on various operating
|
||||
systems. There is a lot more information about building PCRE without using
|
||||
Autotools (including information about using \fBCMake\fP and building "by
|
||||
hand") in the text file called
|
||||
.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
|
||||
.\" </a>
|
||||
\fBNON-AUTOTOOLS-BUILD\fP.
|
||||
.\"
|
||||
You should consult this file as well as the
|
||||
.\" HTML <a href="README.txt">
|
||||
.\" </a>
|
||||
\fBREADME\fP
|
||||
.\"
|
||||
file if you are building in a non-Unix-like environment.
|
||||
.
|
||||
.
|
||||
.SH "PCRE BUILD-TIME OPTIONS"
|
||||
.rs
|
||||
.sp
|
||||
This document describes the optional features of PCRE that can be selected when
|
||||
the library is compiled. It assumes use of the \fBconfigure\fP script, where
|
||||
the optional features are selected or deselected by providing options to
|
||||
\fBconfigure\fP before running the \fBmake\fP command. However, the same
|
||||
options can be selected in both Unix-like and non-Unix-like environments using
|
||||
the GUI facility of \fBcmake-gui\fP if you are using \fBCMake\fP instead of
|
||||
\fBconfigure\fP to build PCRE.
|
||||
The rest of this document describes the optional features of PCRE that can be
|
||||
selected when the library is compiled. It assumes use of the \fBconfigure\fP
|
||||
script, where the optional features are selected or deselected by providing
|
||||
options to \fBconfigure\fP before running the \fBmake\fP command. However, the
|
||||
same options can be selected in both Unix-like and non-Unix-like environments
|
||||
using the GUI facility of \fBcmake-gui\fP if you are using \fBCMake\fP instead
|
||||
of \fBconfigure\fP to build PCRE.
|
||||
.P
|
||||
There is a lot more information about building PCRE without using
|
||||
\fBconfigure\fP (including information about using \fBCMake\fP or building "by
|
||||
hand") in the file called \fINON-AUTOTOOLS-BUILD\fP, which is part of the PCRE
|
||||
distribution. You should consult this file as well as the \fIREADME\fP file if
|
||||
you are building in a non-Unix-like environment.
|
||||
If you are not using Autotools or \fBCMake\fP, option selection can be done by
|
||||
editing the \fBconfig.h\fP file, or by passing parameter settings to the
|
||||
compiler, as described in
|
||||
.\" HTML <a href="NON-AUTOTOOLS-BUILD.txt">
|
||||
.\" </a>
|
||||
\fBNON-AUTOTOOLS-BUILD\fP.
|
||||
.\"
|
||||
.P
|
||||
The complete list of options for \fBconfigure\fP (which includes the standard
|
||||
ones such as the selection of the installation directory) can be obtained by
|
||||
|
@ -45,7 +75,7 @@ strings, by adding
|
|||
.sp
|
||||
--enable-pcre16
|
||||
.sp
|
||||
to the \fBconfigure\fP command. You can also build a separate
|
||||
to the \fBconfigure\fP command. You can also build yet another separate
|
||||
library, called \fBlibpcre32\fP, in which strings are contained in vectors of
|
||||
32-bit data units and interpreted either as single-unit characters or UTF-32
|
||||
strings, by adding
|
||||
|
@ -65,8 +95,8 @@ an 8-bit program. None of these are built if you select only the 16-bit or
|
|||
.SH "BUILDING SHARED AND STATIC LIBRARIES"
|
||||
.rs
|
||||
.sp
|
||||
The PCRE building process uses \fBlibtool\fP to build both shared and static
|
||||
Unix libraries by default. You can suppress one of these by adding one of
|
||||
The Autotools PCRE building process uses \fBlibtool\fP to build both shared and
|
||||
static libraries by default. You can suppress one of these by adding one of
|
||||
.sp
|
||||
--disable-shared
|
||||
--disable-static
|
||||
|
@ -515,6 +545,6 @@ Cambridge CB2 3QH, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 30 October 2012
|
||||
Copyright (c) 1997-2012 University of Cambridge.
|
||||
Last updated: 12 May 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRECALLOUT 3 "24 June 2012" "PCRE 8.30"
|
||||
.TH PCRECALLOUT 3 "12 November 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
|
@ -41,26 +41,64 @@ it is processed as if it were
|
|||
(?C255)A(?C255)((?C255)\ed{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
|
||||
.sp
|
||||
Notice that there is a callout before and after each parenthesis and
|
||||
alternation bar. Automatic callouts can be used for tracking the progress of
|
||||
pattern matching. The
|
||||
alternation bar. If the pattern contains a conditional group whose condition is
|
||||
an assertion, an automatic callout is inserted immediately before the
|
||||
condition. Such a callout may also be inserted explicitly, for example:
|
||||
.sp
|
||||
(?(?C9)(?=a)ab|de)
|
||||
.sp
|
||||
This applies only to assertion conditions (because they are themselves
|
||||
independent groups).
|
||||
.P
|
||||
Automatic callouts can be used for tracking the progress of pattern matching.
|
||||
The
|
||||
.\" HREF
|
||||
\fBpcretest\fP
|
||||
.\"
|
||||
command has an option that sets automatic callouts; when it is used, the output
|
||||
indicates how the pattern is matched. This is useful information when you are
|
||||
trying to optimize the performance of a particular pattern.
|
||||
.P
|
||||
The use of callouts in a pattern makes it ineligible for optimization by the
|
||||
just-in-time compiler. Studying such a pattern with the PCRE_STUDY_JIT_COMPILE
|
||||
option always fails.
|
||||
program has a pattern qualifier (/C) that sets automatic callouts; when it is
|
||||
used, the output indicates how the pattern is being matched. This is useful
|
||||
information when you are trying to optimize the performance of a particular
|
||||
pattern.
|
||||
.
|
||||
.
|
||||
.SH "MISSING CALLOUTS"
|
||||
.rs
|
||||
.sp
|
||||
You should be aware that, because of optimizations in the way PCRE matches
|
||||
patterns by default, callouts sometimes do not happen. For example, if the
|
||||
pattern is
|
||||
You should be aware that, because of optimizations in the way PCRE compiles and
|
||||
matches patterns, callouts sometimes do not happen exactly as you might expect.
|
||||
.P
|
||||
At compile time, PCRE "auto-possessifies" repeated items when it knows that
|
||||
what follows cannot be part of the repeat. For example, a+[bc] is compiled as
|
||||
if it were a++[bc]. The \fBpcretest\fP output when this pattern is anchored and
|
||||
then applied with automatic callouts to the string "aaaa" is:
|
||||
.sp
|
||||
--->aaaa
|
||||
+0 ^ ^
|
||||
+1 ^ a+
|
||||
+3 ^ ^ [bc]
|
||||
No match
|
||||
.sp
|
||||
This indicates that when matching [bc] fails, there is no backtracking into a+
|
||||
and therefore the callouts that would be taken for the backtracks do not occur.
|
||||
You can disable the auto-possessify feature by passing PCRE_NO_AUTO_POSSESS
|
||||
to \fBpcre_compile()\fP, or starting the pattern with (*NO_AUTO_POSSESS). If
|
||||
this is done in \fBpcretest\fP (using the /O qualifier), the output changes to
|
||||
this:
|
||||
.sp
|
||||
--->aaaa
|
||||
+0 ^ ^
|
||||
+1 ^ a+
|
||||
+3 ^ ^ [bc]
|
||||
+3 ^ ^ [bc]
|
||||
+3 ^ ^ [bc]
|
||||
+3 ^^ [bc]
|
||||
No match
|
||||
.sp
|
||||
This time, when matching [bc] fails, the matcher backtracks into a+ and tries
|
||||
again, repeatedly, until a+ itself fails.
|
||||
.P
|
||||
Other optimizations that provide fast "no match" results also affect callouts.
|
||||
For example, if the pattern is
|
||||
.sp
|
||||
ab(?C4)cd
|
||||
.sp
|
||||
|
@ -84,11 +122,11 @@ callouts such as the example above are obeyed.
|
|||
.rs
|
||||
.sp
|
||||
During matching, when PCRE reaches a callout point, the external function
|
||||
defined by \fIpcre_callout\fP or \fIpcre[16|32]_callout\fP is called
|
||||
(if it is set). This applies to both normal and DFA matching. The only
|
||||
argument to the callout function is a pointer to a \fBpcre_callout\fP
|
||||
or \fBpcre[16|32]_callout\fP block.
|
||||
These structures contains the following fields:
|
||||
defined by \fIpcre_callout\fP or \fIpcre[16|32]_callout\fP is called (if it is
|
||||
set). This applies to both normal and DFA matching. The only argument to the
|
||||
callout function is a pointer to a \fBpcre_callout\fP or
|
||||
\fBpcre[16|32]_callout\fP block. These structures contains the following
|
||||
fields:
|
||||
.sp
|
||||
int \fIversion\fP;
|
||||
int \fIcallout_number\fP;
|
||||
|
@ -119,10 +157,10 @@ automatically generated callouts).
|
|||
.P
|
||||
The \fIoffset_vector\fP field is a pointer to the vector of offsets that was
|
||||
passed by the caller to the matching function. When \fBpcre_exec()\fP or
|
||||
\fBpcre[16|32]_exec()\fP is used, the contents can be inspected, in order to extract
|
||||
substrings that have been matched so far, in the same way as for extracting
|
||||
substrings after a match has completed. For the DFA matching functions, this
|
||||
field is not useful.
|
||||
\fBpcre[16|32]_exec()\fP is used, the contents can be inspected, in order to
|
||||
extract substrings that have been matched so far, in the same way as for
|
||||
extracting substrings after a match has completed. For the DFA matching
|
||||
functions, this field is not useful.
|
||||
.P
|
||||
The \fIsubject\fP and \fIsubject_length\fP fields contain copies of the values
|
||||
that were passed to the matching function.
|
||||
|
@ -144,8 +182,10 @@ value of \fIcapture_top\fP is one. This is always the case when the DFA
|
|||
functions are used, because they do not support captured substrings.
|
||||
.P
|
||||
The \fIcapture_last\fP field contains the number of the most recently captured
|
||||
substring. If no substrings have been captured, its value is -1. This is always
|
||||
the case for the DFA matching functions.
|
||||
substring. However, when a recursion exits, the value reverts to what it was
|
||||
outside the recursion, as do the values of all captured substrings. If no
|
||||
substrings have been captured, the value of \fIcapture_last\fP is -1. This is
|
||||
always the case for the DFA matching functions.
|
||||
.P
|
||||
The \fIcallout_data\fP field contains a value that is passed to a matching
|
||||
function specifically so that it can be passed back in callouts. It is passed
|
||||
|
@ -173,11 +213,12 @@ help in distinguishing between different automatic callouts, which all have the
|
|||
same callout number. However, they are set for all callouts.
|
||||
.P
|
||||
The \fImark\fP field is present from version 2 of the callout structure. In
|
||||
callouts from \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP it contains a pointer to
|
||||
the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
|
||||
(*THEN) item in the match, or NULL if no such items have been passed. Instances
|
||||
of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
|
||||
callouts from the DFA matching functions this field always contains NULL.
|
||||
callouts from \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP it contains a
|
||||
pointer to the zero-terminated name of the most recently passed (*MARK),
|
||||
(*PRUNE), or (*THEN) item in the match, or NULL if no such items have been
|
||||
passed. Instances of (*PRUNE) or (*THEN) without a name do not obliterate a
|
||||
previous (*MARK). In callouts from the DFA matching functions this field always
|
||||
contains NULL.
|
||||
.
|
||||
.
|
||||
.SH "RETURN VALUES"
|
||||
|
@ -209,6 +250,6 @@ Cambridge CB2 3QH, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 24 June 2012
|
||||
Copyright (c) 1997-2012 University of Cambridge.
|
||||
Last updated: 12 November 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
.fi
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user