673 lines
9.3 KiB
Plaintext
673 lines
9.3 KiB
Plaintext
/-- These tests for Unicode property support test PCRE's API and show some of
|
||
the compiled code. They are not Perl-compatible. --/
|
||
|
||
/[\p{L}]/DZ
|
||
|
||
/[\p{^L}]/DZ
|
||
|
||
/[\P{L}]/DZ
|
||
|
||
/[\P{^L}]/DZ
|
||
|
||
/[abc\p{L}\x{0660}]/8DZ
|
||
|
||
/[\p{Nd}]/8DZ
|
||
1234
|
||
|
||
/[\p{Nd}+-]+/8DZ
|
||
1234
|
||
12-34
|
||
12+\x{661}-34
|
||
** Failers
|
||
abcd
|
||
|
||
/[\x{105}-\x{109}]/8iDZ
|
||
\x{104}
|
||
\x{105}
|
||
\x{109}
|
||
** Failers
|
||
\x{100}
|
||
\x{10a}
|
||
|
||
/[z-\x{100}]/8iDZ
|
||
Z
|
||
z
|
||
\x{39c}
|
||
\x{178}
|
||
|
|
||
\x{80}
|
||
\x{ff}
|
||
\x{100}
|
||
\x{101}
|
||
** Failers
|
||
\x{102}
|
||
Y
|
||
y
|
||
|
||
/[z-\x{100}]/8DZi
|
||
|
||
/(?:[\PPa*]*){8,}/
|
||
|
||
/[\P{Any}]/BZ
|
||
|
||
/[\P{Any}\E]/BZ
|
||
|
||
/(\P{Yi}+\277)/
|
||
|
||
/(\P{Yi}+\277)?/
|
||
|
||
/(?<=\P{Yi}{3}A)X/
|
||
|
||
/\p{Yi}+(\P{Yi}+)(?1)/
|
||
|
||
/(\P{Yi}{2}\277)?/
|
||
|
||
/[\P{Yi}A]/
|
||
|
||
/[\P{Yi}\P{Yi}\P{Yi}A]/
|
||
|
||
/[^\P{Yi}A]/
|
||
|
||
/[^\P{Yi}\P{Yi}\P{Yi}A]/
|
||
|
||
/(\P{Yi}*\277)*/
|
||
|
||
/(\P{Yi}*?\277)*/
|
||
|
||
/(\p{Yi}*+\277)*/
|
||
|
||
/(\P{Yi}?\277)*/
|
||
|
||
/(\P{Yi}??\277)*/
|
||
|
||
/(\p{Yi}?+\277)*/
|
||
|
||
/(\P{Yi}{0,3}\277)*/
|
||
|
||
/(\P{Yi}{0,3}?\277)*/
|
||
|
||
/(\p{Yi}{0,3}+\277)*/
|
||
|
||
/\p{Zl}{2,3}+/8BZ
|
||
|
||
\x{2028}\x{2028}\x{2028}
|
||
|
||
/\p{Zl}/8BZ
|
||
|
||
/\p{Lu}{3}+/8BZ
|
||
|
||
/\pL{2}+/8BZ
|
||
|
||
/\p{Cc}{2}+/8BZ
|
||
|
||
/^\p{Cs}/8
|
||
\?\x{dfff}
|
||
** Failers
|
||
\x{09f}
|
||
|
||
/^\p{Sc}+/8
|
||
$\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
|
||
\x{9f2}
|
||
** Failers
|
||
X
|
||
\x{2c2}
|
||
|
||
/^\p{Zs}/8
|
||
\ \
|
||
\x{a0}
|
||
\x{1680}
|
||
\x{180e}
|
||
\x{2000}
|
||
\x{2001}
|
||
** Failers
|
||
\x{2028}
|
||
\x{200d}
|
||
|
||
/-- These four are here rather than in test 6 because Perl has problems with
|
||
the negative versions of the properties. --/
|
||
|
||
/\p{^Lu}/8i
|
||
1234
|
||
** Failers
|
||
ABC
|
||
|
||
/\P{Lu}/8i
|
||
1234
|
||
** Failers
|
||
ABC
|
||
|
||
/\p{Ll}/8i
|
||
a
|
||
Az
|
||
** Failers
|
||
ABC
|
||
|
||
/\p{Lu}/8i
|
||
A
|
||
a\x{10a0}B
|
||
** Failers
|
||
a
|
||
\x{1d00}
|
||
|
||
/[\x{c0}\x{391}]/8i
|
||
\x{c0}
|
||
\x{e0}
|
||
|
||
/-- The next two are special cases where the lengths of the different cases of
|
||
the same character differ. The first went wrong with heap frame storage; the
|
||
second was broken in all cases. --/
|
||
|
||
/^\x{023a}+?(\x{0130}+)/8i
|
||
\x{023a}\x{2c65}\x{0130}
|
||
|
||
/^\x{023a}+([^X])/8i
|
||
\x{023a}\x{2c65}X
|
||
|
||
/\x{c0}+\x{116}+/8i
|
||
\x{c0}\x{e0}\x{116}\x{117}
|
||
|
||
/[\x{c0}\x{116}]+/8i
|
||
\x{c0}\x{e0}\x{116}\x{117}
|
||
|
||
/(\x{de})\1/8i
|
||
\x{de}\x{de}
|
||
\x{de}\x{fe}
|
||
\x{fe}\x{fe}
|
||
\x{fe}\x{de}
|
||
|
||
/^\x{c0}$/8i
|
||
\x{c0}
|
||
\x{e0}
|
||
|
||
/^\x{e0}$/8i
|
||
\x{c0}
|
||
\x{e0}
|
||
|
||
/-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
|
||
will match it only with UCP support, because without that it has no notion
|
||
of case for anything other than the ASCII letters. --/
|
||
|
||
/((?i)[\x{c0}])/8
|
||
\x{c0}
|
||
\x{e0}
|
||
|
||
/(?i:[\x{c0}])/8
|
||
\x{c0}
|
||
\x{e0}
|
||
|
||
/-- These are PCRE's extra properties to help with Unicodizing \d etc. --/
|
||
|
||
/^\p{Xan}/8
|
||
ABCD
|
||
1234
|
||
\x{6ca}
|
||
\x{a6c}
|
||
\x{10a7}
|
||
** Failers
|
||
_ABC
|
||
|
||
/^\p{Xan}+/8
|
||
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
||
** Failers
|
||
_ABC
|
||
|
||
/^\p{Xan}+?/8
|
||
\x{6ca}\x{a6c}\x{10a7}_
|
||
|
||
/^\p{Xan}*/8
|
||
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
||
|
||
/^\p{Xan}{2,9}/8
|
||
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
||
|
||
/^\p{Xan}{2,9}?/8
|
||
\x{6ca}\x{a6c}\x{10a7}_
|
||
|
||
/^[\p{Xan}]/8
|
||
ABCD1234_
|
||
1234abcd_
|
||
\x{6ca}
|
||
\x{a6c}
|
||
\x{10a7}
|
||
** Failers
|
||
_ABC
|
||
|
||
/^[\p{Xan}]+/8
|
||
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
||
** Failers
|
||
_ABC
|
||
|
||
/^>\p{Xsp}/8
|
||
>\x{1680}\x{2028}\x{0b}
|
||
>\x{a0}
|
||
** Failers
|
||
\x{0b}
|
||
|
||
/^>\p{Xsp}+/8
|
||
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
||
|
||
/^>\p{Xsp}+?/8
|
||
>\x{1680}\x{2028}\x{0b}
|
||
|
||
/^>\p{Xsp}*/8
|
||
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
||
|
||
/^>\p{Xsp}{2,9}/8
|
||
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
||
|
||
/^>\p{Xsp}{2,9}?/8
|
||
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
||
|
||
/^>[\p{Xsp}]/8
|
||
>\x{2028}\x{0b}
|
||
|
||
/^>[\p{Xsp}]+/8
|
||
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
||
|
||
/^>\p{Xps}/8
|
||
>\x{1680}\x{2028}\x{0b}
|
||
>\x{a0}
|
||
** Failers
|
||
\x{0b}
|
||
|
||
/^>\p{Xps}+/8
|
||
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
||
|
||
/^>\p{Xps}+?/8
|
||
>\x{1680}\x{2028}\x{0b}
|
||
|
||
/^>\p{Xps}*/8
|
||
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
||
|
||
/^>\p{Xps}{2,9}/8
|
||
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
||
|
||
/^>\p{Xps}{2,9}?/8
|
||
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
||
|
||
/^>[\p{Xps}]/8
|
||
>\x{2028}\x{0b}
|
||
|
||
/^>[\p{Xps}]+/8
|
||
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
|
||
|
||
/^\p{Xwd}/8
|
||
ABCD
|
||
1234
|
||
\x{6ca}
|
||
\x{a6c}
|
||
\x{10a7}
|
||
_ABC
|
||
** Failers
|
||
[]
|
||
|
||
/^\p{Xwd}+/8
|
||
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
||
|
||
/^\p{Xwd}+?/8
|
||
\x{6ca}\x{a6c}\x{10a7}_
|
||
|
||
/^\p{Xwd}*/8
|
||
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
||
|
||
/^\p{Xwd}{2,9}/8
|
||
A_B12\x{6ca}\x{a6c}\x{10a7}
|
||
|
||
/^\p{Xwd}{2,9}?/8
|
||
\x{6ca}\x{a6c}\x{10a7}_
|
||
|
||
/^[\p{Xwd}]/8
|
||
ABCD1234_
|
||
1234abcd_
|
||
\x{6ca}
|
||
\x{a6c}
|
||
\x{10a7}
|
||
_ABC
|
||
** Failers
|
||
[]
|
||
|
||
/^[\p{Xwd}]+/8
|
||
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
|
||
|
||
/-- A check not in UTF-8 mode --/
|
||
|
||
/^[\p{Xwd}]+/
|
||
ABCD1234_
|
||
|
||
/-- Some negative checks --/
|
||
|
||
/^[\P{Xwd}]+/8
|
||
!.+\x{019}\x{35a}AB
|
||
|
||
/^[\p{^Xwd}]+/8
|
||
!.+\x{019}\x{35a}AB
|
||
|
||
/[\D]/WBZ8
|
||
1\x{3c8}2
|
||
|
||
/[\d]/WBZ8
|
||
>\x{6f4}<
|
||
|
||
/[\S]/WBZ8
|
||
\x{1680}\x{6f4}\x{1680}
|
||
|
||
/[\s]/WBZ8
|
||
>\x{1680}<
|
||
|
||
/[\W]/WBZ8
|
||
A\x{1712}B
|
||
|
||
/[\w]/WBZ8
|
||
>\x{1723}<
|
||
|
||
/\D/WBZ8
|
||
1\x{3c8}2
|
||
|
||
/\d/WBZ8
|
||
>\x{6f4}<
|
||
|
||
/\S/WBZ8
|
||
\x{1680}\x{6f4}\x{1680}
|
||
|
||
/\s/WBZ8
|
||
>\x{1680}>
|
||
|
||
/\W/WBZ8
|
||
A\x{1712}B
|
||
|
||
/\w/WBZ8
|
||
>\x{1723}<
|
||
|
||
/[[:alpha:]]/WBZ
|
||
|
||
/[[:lower:]]/WBZ
|
||
|
||
/[[:upper:]]/WBZ
|
||
|
||
/[[:alnum:]]/WBZ
|
||
|
||
/[[:ascii:]]/WBZ
|
||
|
||
/[[:cntrl:]]/WBZ
|
||
|
||
/[[:digit:]]/WBZ
|
||
|
||
/[[:graph:]]/WBZ
|
||
|
||
/[[:print:]]/WBZ
|
||
|
||
/[[:punct:]]/WBZ
|
||
|
||
/[[:space:]]/WBZ
|
||
|
||
/[[:word:]]/WBZ
|
||
|
||
/[[:xdigit:]]/WBZ
|
||
|
||
/-- Unicode properties for \b abd \B --/
|
||
|
||
/\b...\B/8W
|
||
abc_
|
||
\x{37e}abc\x{376}
|
||
\x{37e}\x{376}\x{371}\x{393}\x{394}
|
||
!\x{c0}++\x{c1}\x{c2}
|
||
!\x{c0}+++++
|
||
|
||
/-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/
|
||
|
||
/\b...\B/8
|
||
abc_
|
||
** Failers
|
||
\x{37e}abc\x{376}
|
||
\x{37e}\x{376}\x{371}\x{393}\x{394}
|
||
!\x{c0}++\x{c1}\x{c2}
|
||
!\x{c0}+++++
|
||
|
||
/-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/
|
||
|
||
/\b...\B/W
|
||
abc_
|
||
!\x{c0}++\x{c1}\x{c2}
|
||
!\x{c0}+++++
|
||
|
||
/-- Some of these are silly, but they check various combinations --/
|
||
|
||
/[[:^alpha:][:^cntrl:]]+/8WBZ
|
||
123
|
||
abc
|
||
|
||
/[[:^cntrl:][:^alpha:]]+/8WBZ
|
||
123
|
||
abc
|
||
|
||
/[[:alpha:]]+/8WBZ
|
||
abc
|
||
|
||
/[[:^alpha:]\S]+/8WBZ
|
||
123
|
||
abc
|
||
|
||
/[^\d]+/8WBZ
|
||
abc123
|
||
abc\x{123}
|
||
\x{660}abc
|
||
|
||
/\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ
|
||
|
||
/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ
|
||
|
||
/\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ
|
||
|
||
/\p{Han}+X\p{Greek}+\x{370}/BZ8
|
||
|
||
/\p{Xan}+!\p{Xan}+A/BZ
|
||
|
||
/\p{Xsp}+!\p{Xsp}\t/BZ
|
||
|
||
/\p{Xps}+!\p{Xps}\t/BZ
|
||
|
||
/\p{Xwd}+!\p{Xwd}_/BZ
|
||
|
||
/A+\p{N}A+\dB+\p{N}*B+\d*/WBZ
|
||
|
||
/-- These behaved oddly in Perl, so they are kept in this test --/
|
||
|
||
/(\x{23a}\x{23a}\x{23a})?\1/8i
|
||
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
|
||
|
||
/(ȺȺȺ)?\1/8i
|
||
ȺȺȺⱥⱥ
|
||
|
||
/(\x{23a}\x{23a}\x{23a})?\1/8i
|
||
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
|
||
|
||
/(ȺȺȺ)?\1/8i
|
||
ȺȺȺⱥⱥⱥ
|
||
|
||
/(\x{23a}\x{23a}\x{23a})\1/8i
|
||
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
|
||
|
||
/(ȺȺȺ)\1/8i
|
||
ȺȺȺⱥⱥ
|
||
|
||
/(\x{23a}\x{23a}\x{23a})\1/8i
|
||
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
|
||
|
||
/(ȺȺȺ)\1/8i
|
||
ȺȺȺⱥⱥⱥ
|
||
|
||
/(\x{2c65}\x{2c65})\1/8i
|
||
\x{2c65}\x{2c65}\x{23a}\x{23a}
|
||
|
||
/(ⱥⱥ)\1/8i
|
||
ⱥⱥȺȺ
|
||
|
||
/(\x{23a}\x{23a}\x{23a})\1Y/8i
|
||
X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
|
||
|
||
/(\x{2c65}\x{2c65})\1Y/8i
|
||
X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
|
||
|
||
/-- --/
|
||
|
||
/-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/
|
||
|
||
/^[\p{Batak}]/8
|
||
\x{1bc0}
|
||
\x{1bff}
|
||
** Failers
|
||
\x{1bf4}
|
||
|
||
/^[\p{Brahmi}]/8
|
||
\x{11000}
|
||
\x{1106f}
|
||
** Failers
|
||
\x{1104e}
|
||
|
||
/^[\p{Mandaic}]/8
|
||
\x{840}
|
||
\x{85e}
|
||
** Failers
|
||
\x{85c}
|
||
\x{85d}
|
||
|
||
/-- --/
|
||
|
||
/(\X*)(.)/s8
|
||
A\x{300}
|
||
|
||
/^S(\X*)e(\X*)$/8
|
||
Stéréo
|
||
|
||
/^\X/8
|
||
́réo
|
||
|
||
/^a\X41z/<JS>
|
||
aX41z
|
||
*** Failers
|
||
aAz
|
||
|
||
/(?<=ab\Cde)X/8
|
||
|
||
/\X/
|
||
a\P
|
||
a\P\P
|
||
|
||
/\Xa/
|
||
aa\P
|
||
aa\P\P
|
||
|
||
/\X{2}/
|
||
aa\P
|
||
aa\P\P
|
||
|
||
/\X+a/
|
||
a\P
|
||
aa\P
|
||
aa\P\P
|
||
|
||
/\X+?a/
|
||
a\P
|
||
ab\P
|
||
aa\P
|
||
aa\P\P
|
||
aba\P
|
||
|
||
/-- These Unicode 6.1.0 scripts are not known to Perl. --/
|
||
|
||
/\p{Chakma}\d/8W
|
||
\x{11100}\x{1113c}
|
||
|
||
/\p{Takri}\d/8W
|
||
\x{11680}\x{116c0}
|
||
|
||
/^\X/8
|
||
A\P
|
||
A\P\P
|
||
A\x{300}\x{301}\P
|
||
A\x{300}\x{301}\P\P
|
||
A\x{301}\P
|
||
A\x{301}\P\P
|
||
|
||
/^\X{2,3}/8
|
||
A\P
|
||
A\P\P
|
||
AA\P
|
||
AA\P\P
|
||
A\x{300}\x{301}\P
|
||
A\x{300}\x{301}\P\P
|
||
A\x{300}\x{301}A\x{300}\x{301}\P
|
||
A\x{300}\x{301}A\x{300}\x{301}\P\P
|
||
|
||
/^\X{2}/8
|
||
AA\P
|
||
AA\P\P
|
||
A\x{300}\x{301}A\x{300}\x{301}\P
|
||
A\x{300}\x{301}A\x{300}\x{301}\P\P
|
||
|
||
/^\X+/8
|
||
AA\P
|
||
AA\P\P
|
||
|
||
/^\X+?Z/8
|
||
AA\P
|
||
AA\P\P
|
||
|
||
/A\x{3a3}B/8iDZ
|
||
|
||
/\x{3a3}B/8iDZ
|
||
|
||
/[\x{3a3}]/8iBZ
|
||
|
||
/[^\x{3a3}]/8iBZ
|
||
|
||
/[\x{3a3}]+/8iBZ
|
||
|
||
/[^\x{3a3}]+/8iBZ
|
||
|
||
/a*\x{3a3}/8iBZ
|
||
|
||
/\x{3a3}+a/8iBZ
|
||
|
||
/\x{3a3}*\x{3c2}/8iBZ
|
||
|
||
/\x{3a3}{3}/8i+
|
||
\x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
|
||
|
||
/\x{3a3}{2,4}/8i+
|
||
\x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
|
||
|
||
/\x{3a3}{2,4}?/8i+
|
||
\x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
|
||
|
||
/\x{3a3}+./8i+
|
||
\x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
|
||
|
||
/\x{3a3}++./8i+
|
||
** Failers
|
||
\x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
|
||
|
||
/\x{3a3}*\x{3c2}/8iBZ
|
||
|
||
/[^\x{3a3}]*\x{3c2}/8iBZ
|
||
|
||
/[^a]*\x{3c2}/8iBZ
|
||
|
||
/ist/8iBZ
|
||
ikt
|
||
|
||
/is+t/8i
|
||
iSs\x{17f}t
|
||
ikt
|
||
|
||
/is+?t/8i
|
||
ikt
|
||
|
||
/is?t/8i
|
||
ikt
|
||
|
||
/is{2}t/8i
|
||
iskt
|
||
|
||
/-- End of testinput7 --/
|