Regex: Update PCRE to v8.35.

I was über lazy at first, so took libs from SM. But actually it's quite easy to compile, so let's update to latest version \o/.
2014-07-05 13:53:30 +02:00
parent d1153b8049
commit d4de0e6f1e
241 changed files with 51074 additions and 15011 deletions
--- a/tools/pcre/testdata/testoutput18-16
+++ b/tools/pcre/testdata/testoutput18-16
@@ -1,5 +1,7 @@
-/-- This set of tests is for UTF-16 and UTF-32 support, and is relevant only to the
-    16- and 32-bit library. --/
+/-- This set of tests is for UTF-16 and UTF-32 support, and is relevant only to
+    the 16- and 32-bit libraries. --/
+    
+< forbid W 

 /<2F><><EFBFBD>xxx/8?DZSS
 **Failed: invalid UTF-8 string cannot be converted to UTF-16
@@ -337,7 +339,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 
  5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 
@@ -367,7 +369,7 @@ No match
 ------------------------------------------------------------------
        Bra
        \x{100}{3}
-        \x{100}?
+        \x{100}?+
        Ket
        End
 ------------------------------------------------------------------
@@ -376,7 +378,7 @@ Options: utf
 First char = \x{100}
 Need char = \x{100}
 Subject length lower bound = 3
-No set of starting bytes
+No starting char list
  \x{100}\x{100}\x{100}\x{100\x{100}
 0: \x{100}\x{100}\x{100}

@@ -384,7 +386,7 @@ No set of starting bytes
 ------------------------------------------------------------------
        Bra
        CBra 1
-        \x{100}+
+        \x{100}++
        Alt
        x
        Ket
@@ -396,7 +398,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: x \xff 
+Starting chars: x \xff 

 /(\x{100}*a|x)/8SDZ
 ------------------------------------------------------------------
@@ -415,13 +417,13 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a x \xff 
+Starting chars: a x \xff 

 /(\x{100}{0,2}a|x)/8SDZ
 ------------------------------------------------------------------
        Bra
        CBra 1
-        \x{100}{0,2}
+        \x{100}{0,2}+
        a
        Alt
        x
@@ -434,14 +436,14 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a x \xff 
+Starting chars: a x \xff 

 /(\x{100}{1,2}a|x)/8SDZ
 ------------------------------------------------------------------
        Bra
        CBra 1
        \x{100}
-        \x{100}{0,1}
+        \x{100}{0,1}+
        a
        Alt
        x
@@ -454,7 +456,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: x \xff 
+Starting chars: x \xff 

 /\x{100}/8DZ
 ------------------------------------------------------------------
@@ -472,7 +474,7 @@ No need char
 ------------------------------------------------------------------
        Bra
        a\x{100}
-        \x{101}*
+        \x{101}*+
        Ket
        End
 ------------------------------------------------------------------
@@ -485,7 +487,7 @@ Need char = \x{100}
 ------------------------------------------------------------------
        Bra
        a\x{100}
-        \x{101}+
+        \x{101}++
        Ket
        End
 ------------------------------------------------------------------
@@ -608,8 +610,6 @@ Need char = 'X'
 Failed: missing terminating ] for character class at offset 13

 /X/8
-    \x{0}\x{d7ff}\x{e000}\x{10ffff}
-Error -10 (bad UTF-16 string) offset=4 reason=4
    \x{d800}
 Error -10 (bad UTF-16 string) offset=0 reason=1
    \x{d800}\?
@@ -634,8 +634,6 @@ No match
 ** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
    \x{d800}\x{1234}
 Error -10 (bad UTF-16 string) offset=1 reason=2
-    \x{fffe}
-Error -10 (bad UTF-16 string) offset=0 reason=4

 /(*UTF16)\x{11234}/
  abcd\x{11234}pqr
@@ -650,7 +648,7 @@ Need char = \x{de34}
 0: \x{11234}

 /(*UTF-32)\x{11234}/
-Failed: (*VERB) not recognized at offset 5
+Failed: (*VERB) not recognized or malformed at offset 5

 /(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
 Capturing subpattern count = 0
@@ -660,7 +658,7 @@ First char = 'a'
 Need char = 'b'

 /(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
-Failed: (*VERB) not recognized at offset 12
+Failed: (*VERB) not recognized or malformed at offset 12

 /\h/SI8
 Capturing subpattern count = 0
@@ -668,7 +666,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x09 \x20 \xa0 \xff 
+Starting chars: \x09 \x20 \xa0 \xff 
    ABC\x{09}
 0: \x{09}
    ABC\x{20}
@@ -694,7 +692,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d \x85 \xff 
    ABC\x{0a}
 0: \x{0a}
    ABC\x{0b}
@@ -714,19 +712,19 @@ Options: utf
 No first char
 Need char = 'A'
 Subject length lower bound = 1
-Starting byte set: \x09 \x20 A \xa0 \xff 
+Starting chars: \x09 \x20 A \xa0 \xff 
    CDBABC
 0: A
    \x{2000}ABC 
 0: \x{2000}A

-/\R*A/SI8
+/\R*A/SI8<bsr_unicode>
 Capturing subpattern count = 0
-Options: utf
+Options: bsr_unicode utf
 No first char
 Need char = 'A'
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d A \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d A \x85 \xff 
    CDBABC
 0: A
    \x{2028}A  
@@ -738,7 +736,7 @@ Options: utf
 No first char
 Need char = 'A'
 Subject length lower bound = 2
-Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d \x85 \xff 

 /\s?xxx\s/8SI
 Capturing subpattern count = 0
@@ -746,7 +744,7 @@ Options: utf
 No first char
 Need char = 'x'
 Subject length lower bound = 4
-Starting byte set: \x09 \x0a \x0c \x0d \x20 x 
+Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 x 

 /\sxxx\s/I8ST1
 Capturing subpattern count = 0
@@ -754,7 +752,7 @@ Options: utf
 No first char
 Need char = 'x'
 Subject length lower bound = 5
-Starting byte set: \x09 \x0a \x0c \x0d \x20 \x85 \xa0 
+Starting chars: \x09 \x0a \x0c \x0d \x20 \x85 \xa0 
    AB\x{85}xxx\x{a0}XYZ
 0: \x{85}xxx\x{a0}
    AB\x{a0}xxx\x{85}XYZ
@@ -766,7 +764,7 @@ Options: utf
 No first char
 Need char = ' '
 Subject length lower bound = 3
-Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e 
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e 
  \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d 
  \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ 
  A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e 
@@ -805,7 +803,7 @@ Options: caseless utf
 First char = \x{1234}
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list

 /\x{1234}+?/iS8I
 Capturing subpattern count = 0
@@ -813,7 +811,7 @@ Options: caseless utf
 First char = \x{1234}
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list

 /\x{1234}++/iS8I
 Capturing subpattern count = 0
@@ -821,7 +819,7 @@ Options: caseless utf
 First char = \x{1234}
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list

 /\x{1234}{2}/iS8I
 Capturing subpattern count = 0
@@ -829,7 +827,7 @@ Options: caseless utf
 First char = \x{1234}
 Need char = \x{1234}
 Subject length lower bound = 2
-No set of starting bytes
+No starting char list

 /[^\x{c4}]/8DZ
 ------------------------------------------------------------------
@@ -862,7 +860,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d \x85 \xff 

 /-- Check bad offset --/

@@ -1019,4 +1017,10 @@ Failed: invalid UTF-16 string at offset 0
    \x{a0}\x20!
 0: \x{a0} !

+/(*UTF)abc/9
+Failed: setting UTF is disabled by the application at offset 0
+
+/abc/89
+Failed: setting UTF is disabled by the application at offset 0
+
 /-- End of testinput18 --/