d4de0e6f1e
I was über lazy at first, so took libs from SM. But actually it's quite easy to compile, so let's update to latest version \o/.
367 lines
4.9 KiB
Plaintext
367 lines
4.9 KiB
Plaintext
/-- This set of tests is for UTF-8 support but not Unicode property support,
|
||
and is relevant only to the 8-bit library. --/
|
||
|
||
< forbid W
|
||
|
||
/X(\C{3})/8
|
||
X\x{1234}
|
||
|
||
/X(\C{4})/8
|
||
X\x{1234}YZ
|
||
|
||
/X\C*/8
|
||
XYZabcdce
|
||
|
||
/X\C*?/8
|
||
XYZabcde
|
||
|
||
/X\C{3,5}/8
|
||
Xabcdefg
|
||
X\x{1234}
|
||
X\x{1234}YZ
|
||
X\x{1234}\x{512}
|
||
X\x{1234}\x{512}YZ
|
||
|
||
/X\C{3,5}?/8
|
||
Xabcdefg
|
||
X\x{1234}
|
||
X\x{1234}YZ
|
||
X\x{1234}\x{512}
|
||
|
||
/a\Cb/8
|
||
aXb
|
||
a\nb
|
||
|
||
/a\C\Cb/8
|
||
a\x{100}b
|
||
|
||
/ab\Cde/8
|
||
abXde
|
||
|
||
/a\C\Cb/8
|
||
a\x{100}b
|
||
** Failers
|
||
a\x{12257}b
|
||
|
||
/[Ã]/8
|
||
|
||
/Ã/8
|
||
|
||
/ÃÃÃxxx/8
|
||
|
||
/ÃÃÃxxx/8?DZSSO
|
||
|
||
/badutf/8
|
||
\xdf
|
||
\xef
|
||
\xef\x80
|
||
\xf7
|
||
\xf7\x80
|
||
\xf7\x80\x80
|
||
\xfb
|
||
\xfb\x80
|
||
\xfb\x80\x80
|
||
\xfb\x80\x80\x80
|
||
\xfd
|
||
\xfd\x80
|
||
\xfd\x80\x80
|
||
\xfd\x80\x80\x80
|
||
\xfd\x80\x80\x80\x80
|
||
\xdf\x7f
|
||
\xef\x7f\x80
|
||
\xef\x80\x7f
|
||
\xf7\x7f\x80\x80
|
||
\xf7\x80\x7f\x80
|
||
\xf7\x80\x80\x7f
|
||
\xfb\x7f\x80\x80\x80
|
||
\xfb\x80\x7f\x80\x80
|
||
\xfb\x80\x80\x7f\x80
|
||
\xfb\x80\x80\x80\x7f
|
||
\xfd\x7f\x80\x80\x80\x80
|
||
\xfd\x80\x7f\x80\x80\x80
|
||
\xfd\x80\x80\x7f\x80\x80
|
||
\xfd\x80\x80\x80\x7f\x80
|
||
\xfd\x80\x80\x80\x80\x7f
|
||
\xed\xa0\x80
|
||
\xc0\x8f
|
||
\xe0\x80\x8f
|
||
\xf0\x80\x80\x8f
|
||
\xf8\x80\x80\x80\x8f
|
||
\xfc\x80\x80\x80\x80\x8f
|
||
\x80
|
||
\xfe
|
||
\xff
|
||
|
||
/badutf/8
|
||
\xfb\x80\x80\x80\x80
|
||
\xfd\x80\x80\x80\x80\x80
|
||
\xf7\xbf\xbf\xbf
|
||
|
||
/shortutf/8
|
||
\P\P\xdf
|
||
\P\P\xef
|
||
\P\P\xef\x80
|
||
\P\P\xf7
|
||
\P\P\xf7\x80
|
||
\P\P\xf7\x80\x80
|
||
\P\P\xfb
|
||
\P\P\xfb\x80
|
||
\P\P\xfb\x80\x80
|
||
\P\P\xfb\x80\x80\x80
|
||
\P\P\xfd
|
||
\P\P\xfd\x80
|
||
\P\P\xfd\x80\x80
|
||
\P\P\xfd\x80\x80\x80
|
||
\P\P\xfd\x80\x80\x80\x80
|
||
|
||
/anything/8
|
||
\xc0\x80
|
||
\xc1\x8f
|
||
\xe0\x9f\x80
|
||
\xf0\x8f\x80\x80
|
||
\xf8\x87\x80\x80\x80
|
||
\xfc\x83\x80\x80\x80\x80
|
||
\xfe\x80\x80\x80\x80\x80
|
||
\xff\x80\x80\x80\x80\x80
|
||
\xc3\x8f
|
||
\xe0\xaf\x80
|
||
\xe1\x80\x80
|
||
\xf0\x9f\x80\x80
|
||
\xf1\x8f\x80\x80
|
||
\xf8\x88\x80\x80\x80
|
||
\xf9\x87\x80\x80\x80
|
||
\xfc\x84\x80\x80\x80\x80
|
||
\xfd\x83\x80\x80\x80\x80
|
||
\?\xf8\x88\x80\x80\x80
|
||
\?\xf9\x87\x80\x80\x80
|
||
\?\xfc\x84\x80\x80\x80\x80
|
||
\?\xfd\x83\x80\x80\x80\x80
|
||
|
||
/\x{100}/8DZ
|
||
|
||
/\x{1000}/8DZ
|
||
|
||
/\x{10000}/8DZ
|
||
|
||
/\x{100000}/8DZ
|
||
|
||
/\x{10ffff}/8DZ
|
||
|
||
/[\x{ff}]/8DZ
|
||
|
||
/[\x{100}]/8DZ
|
||
|
||
/\x80/8DZ
|
||
|
||
/\xff/8DZ
|
||
|
||
/\x{D55c}\x{ad6d}\x{C5B4}/DZ8
|
||
\x{D55c}\x{ad6d}\x{C5B4}
|
||
|
||
/\x{65e5}\x{672c}\x{8a9e}/DZ8
|
||
\x{65e5}\x{672c}\x{8a9e}
|
||
|
||
/\x{80}/DZ8
|
||
|
||
/\x{084}/DZ8
|
||
|
||
/\x{104}/DZ8
|
||
|
||
/\x{861}/DZ8
|
||
|
||
/\x{212ab}/DZ8
|
||
|
||
/-- This one is here not because it's different to Perl, but because the way
|
||
the captured single-byte is displayed. (In Perl it becomes a character, and you
|
||
can't tell the difference.) --/
|
||
|
||
/X(\C)(.*)/8
|
||
X\x{1234}
|
||
X\nabc
|
||
|
||
/-- This one is here because Perl gives out a grumbly error message (quite
|
||
correctly, but that messes up comparisons). --/
|
||
|
||
/a\Cb/8
|
||
*** Failers
|
||
a\x{100}b
|
||
|
||
/[^ab\xC0-\xF0]/8SDZ
|
||
\x{f1}
|
||
\x{bf}
|
||
\x{100}
|
||
\x{1000}
|
||
*** Failers
|
||
\x{c0}
|
||
\x{f0}
|
||
|
||
/Ä€{3,4}/8SDZ
|
||
\x{100}\x{100}\x{100}\x{100\x{100}
|
||
|
||
/(\x{100}+|x)/8SDZ
|
||
|
||
/(\x{100}*a|x)/8SDZ
|
||
|
||
/(\x{100}{0,2}a|x)/8SDZ
|
||
|
||
/(\x{100}{1,2}a|x)/8SDZ
|
||
|
||
/\x{100}/8DZ
|
||
|
||
/a\x{100}\x{101}*/8DZ
|
||
|
||
/a\x{100}\x{101}+/8DZ
|
||
|
||
/[^\x{c4}]/DZ
|
||
|
||
/[\x{100}]/8DZ
|
||
\x{100}
|
||
Z\x{100}
|
||
\x{100}Z
|
||
*** Failers
|
||
|
||
/[\xff]/DZ8
|
||
>\x{ff}<
|
||
|
||
/[^\xff]/8DZ
|
||
|
||
/\x{100}abc(xyz(?1))/8DZ
|
||
|
||
/a\x{1234}b/P8
|
||
a\x{1234}b
|
||
|
||
/\777/8I
|
||
\x{1ff}
|
||
\777
|
||
|
||
/\x{100}+\x{200}/8DZ
|
||
|
||
/\x{100}+X/8DZ
|
||
|
||
/^[\QÄ€\E-\QÅ<51>\E/BZ8
|
||
|
||
/-- This tests the stricter UTF-8 check according to RFC 3629. --/
|
||
|
||
/X/8
|
||
\x{d800}
|
||
\x{d800}\?
|
||
\x{da00}
|
||
\x{da00}\?
|
||
\x{dfff}
|
||
\x{dfff}\?
|
||
\x{110000}
|
||
\x{110000}\?
|
||
\x{2000000}
|
||
\x{2000000}\?
|
||
\x{7fffffff}
|
||
\x{7fffffff}\?
|
||
|
||
/(*UTF8)\x{1234}/
|
||
abcd\x{1234}pqr
|
||
|
||
/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
|
||
|
||
/\h/SI8
|
||
ABC\x{09}
|
||
ABC\x{20}
|
||
ABC\x{a0}
|
||
ABC\x{1680}
|
||
ABC\x{180e}
|
||
ABC\x{2000}
|
||
ABC\x{202f}
|
||
ABC\x{205f}
|
||
ABC\x{3000}
|
||
|
||
/\v/SI8
|
||
ABC\x{0a}
|
||
ABC\x{0b}
|
||
ABC\x{0c}
|
||
ABC\x{0d}
|
||
ABC\x{85}
|
||
ABC\x{2028}
|
||
|
||
/\h*A/SI8
|
||
CDBABC
|
||
|
||
/\v+A/SI8
|
||
|
||
/\s?xxx\s/8SI
|
||
|
||
/\sxxx\s/I8ST1
|
||
AB\x{85}xxx\x{a0}XYZ
|
||
AB\x{a0}xxx\x{85}XYZ
|
||
|
||
/\S \S/I8ST1
|
||
\x{a2} \x{84}
|
||
A Z
|
||
|
||
/a+/8
|
||
a\x{123}aa\>1
|
||
a\x{123}aa\>2
|
||
a\x{123}aa\>3
|
||
a\x{123}aa\>4
|
||
a\x{123}aa\>5
|
||
a\x{123}aa\>6
|
||
|
||
/\x{1234}+/iS8I
|
||
|
||
/\x{1234}+?/iS8I
|
||
|
||
/\x{1234}++/iS8I
|
||
|
||
/\x{1234}{2}/iS8I
|
||
|
||
/[^\x{c4}]/8DZ
|
||
|
||
/X+\x{200}/8DZ
|
||
|
||
/\R/SI8
|
||
|
||
/\777/8DZ
|
||
|
||
/\w+\x{C4}/8BZ
|
||
a\x{C4}\x{C4}
|
||
|
||
/\w+\x{C4}/8BZT1
|
||
a\x{C4}\x{C4}
|
||
|
||
/\W+\x{C4}/8BZ
|
||
!\x{C4}
|
||
|
||
/\W+\x{C4}/8BZT1
|
||
!\x{C4}
|
||
|
||
/\W+\x{A1}/8BZ
|
||
!\x{A1}
|
||
|
||
/\W+\x{A1}/8BZT1
|
||
!\x{A1}
|
||
|
||
/X\s+\x{A0}/8BZ
|
||
X\x20\x{A0}\x{A0}
|
||
|
||
/X\s+\x{A0}/8BZT1
|
||
X\x20\x{A0}\x{A0}
|
||
|
||
/\S+\x{A0}/8BZ
|
||
X\x{A0}\x{A0}
|
||
|
||
/\S+\x{A0}/8BZT1
|
||
X\x{A0}\x{A0}
|
||
|
||
/\x{a0}+\s!/8BZ
|
||
\x{a0}\x20!
|
||
|
||
/\x{a0}+\s!/8BZT1
|
||
\x{a0}\x20!
|
||
|
||
/A/8
|
||
\x{ff000041}
|
||
\x{7f000041}
|
||
|
||
/(*UTF8)abc/9
|
||
|
||
/abc/89
|
||
|
||
/-- End of testinput15 --/
|