Regex: Add PCRE 8.32 in tools directory.
This commit is contained in:
296
tools/pcre/testdata/testinput17
vendored
Normal file
296
tools/pcre/testdata/testinput17
vendored
Normal file
@ -0,0 +1,296 @@
|
||||
/-- This set of tests is for the 16- and 32-bit library's basic (non-UTF-16
|
||||
or -32) features that are not compatible with the 8-bit library, or which
|
||||
give different output in 16- or 32-bit mode. --/
|
||||
|
||||
/a\Cb/
|
||||
aXb
|
||||
a\nb
|
||||
|
||||
/[^\x{c4}]/DZ
|
||||
|
||||
/\x{100}/I
|
||||
|
||||
/ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # optional leading comment
|
||||
(?: (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # initial word
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) )* # further okay, if led by a period
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
# address
|
||||
| # or
|
||||
(?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # one word, optionally followed by....
|
||||
(?:
|
||||
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
||||
\(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) | # comments, or...
|
||||
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
# quoted strings
|
||||
)*
|
||||
< (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # leading <
|
||||
(?: @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* , (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
)* # further okay, if led by comma
|
||||
: # closing colon
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* )? # optional route
|
||||
(?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) # initial word
|
||||
(?: (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
|
|
||||
" (?: # opening quote...
|
||||
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||||
| # or
|
||||
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||||
)* " # closing quote
|
||||
) )* # further okay, if led by a period
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* @ (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # initial subdomain
|
||||
(?: #
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* \. # if led by a period...
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* (?:
|
||||
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||||
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||||
| \[ # [
|
||||
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||||
\] # ]
|
||||
) # ...further okay
|
||||
)*
|
||||
# address spec
|
||||
(?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* > # trailing >
|
||||
# name and address
|
||||
) (?: [\040\t] | \(
|
||||
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||||
\) )* # optional trailing comment
|
||||
/xSI
|
||||
|
||||
/[\h]/BZ
|
||||
>\x09<
|
||||
|
||||
/[\h]+/BZ
|
||||
>\x09\x20\xa0<
|
||||
|
||||
/[\v]/BZ
|
||||
|
||||
/[^\h]/BZ
|
||||
|
||||
/\h+/SI
|
||||
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||
|
||||
/[\h\x{dc00}]+/BZSI
|
||||
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||||
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||||
|
||||
/\H+/SI
|
||||
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||
|
||||
/[\H\x{d800}]+/
|
||||
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||||
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||||
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||||
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||||
|
||||
/\v+/SI
|
||||
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||
|
||||
/[\v\x{dc00}]+/BZSI
|
||||
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||
|
||||
/\V+/SI
|
||||
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||
|
||||
/[\V\x{d800}]+/
|
||||
\x{2028}\x{2029}\x{2027}\x{2030}
|
||||
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||||
|
||||
/\R+/SI<bsr_unicode>
|
||||
\x{2027}\x{2030}\x{2028}\x{2029}
|
||||
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||||
|
||||
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
|
||||
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
||||
|
||||
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZ
|
||||
|
||||
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZi
|
||||
|
||||
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZ
|
||||
|
||||
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZi
|
||||
|
||||
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/K
|
||||
XX
|
||||
|
||||
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K
|
||||
XX
|
||||
|
||||
/\u0100/<JS>BZ
|
||||
|
||||
/[\u0100-\u0200]/<JS>BZ
|
||||
|
||||
/\ud800/<JS>BZ
|
||||
|
||||
/^\x{ffff}+/i
|
||||
\x{ffff}
|
||||
|
||||
/^\x{ffff}?/i
|
||||
\x{ffff}
|
||||
|
||||
/^\x{ffff}*/i
|
||||
\x{ffff}
|
||||
|
||||
/^\x{ffff}{3}/i
|
||||
\x{ffff}\x{ffff}\x{ffff}
|
||||
|
||||
/^\x{ffff}{0,3}/i
|
||||
\x{ffff}
|
||||
|
||||
/-- End of testinput17 --/
|
Reference in New Issue
Block a user