431 lines
5.8 KiB
Plaintext
431 lines
5.8 KiB
Plaintext
/-- This set of tests is for UTF-8 support, and is relevant only to the 8-bit
|
||
library. --/
|
||
|
||
/X(\C{3})/8
|
||
X\x{1234}
|
||
|
||
/X(\C{4})/8
|
||
X\x{1234}YZ
|
||
|
||
/X\C*/8
|
||
XYZabcdce
|
||
|
||
/X\C*?/8
|
||
XYZabcde
|
||
|
||
/X\C{3,5}/8
|
||
Xabcdefg
|
||
X\x{1234}
|
||
X\x{1234}YZ
|
||
X\x{1234}\x{512}
|
||
X\x{1234}\x{512}YZ
|
||
|
||
/X\C{3,5}?/8
|
||
Xabcdefg
|
||
X\x{1234}
|
||
X\x{1234}YZ
|
||
X\x{1234}\x{512}
|
||
|
||
/a\Cb/8
|
||
aXb
|
||
a\nb
|
||
|
||
/a\C\Cb/8
|
||
a\x{100}b
|
||
|
||
/ab\Cde/8
|
||
abXde
|
||
|
||
/a\C\Cb/8
|
||
a\x{100}b
|
||
** Failers
|
||
a\x{12257}b
|
||
|
||
/[Ã]/8
|
||
|
||
/Ã/8
|
||
|
||
/ÃÃÃxxx/8
|
||
|
||
/ÃÃÃxxx/8?DZSS
|
||
|
||
/badutf/8
|
||
\xdf
|
||
\xef
|
||
\xef\x80
|
||
\xf7
|
||
\xf7\x80
|
||
\xf7\x80\x80
|
||
\xfb
|
||
\xfb\x80
|
||
\xfb\x80\x80
|
||
\xfb\x80\x80\x80
|
||
\xfd
|
||
\xfd\x80
|
||
\xfd\x80\x80
|
||
\xfd\x80\x80\x80
|
||
\xfd\x80\x80\x80\x80
|
||
\xdf\x7f
|
||
\xef\x7f\x80
|
||
\xef\x80\x7f
|
||
\xf7\x7f\x80\x80
|
||
\xf7\x80\x7f\x80
|
||
\xf7\x80\x80\x7f
|
||
\xfb\x7f\x80\x80\x80
|
||
\xfb\x80\x7f\x80\x80
|
||
\xfb\x80\x80\x7f\x80
|
||
\xfb\x80\x80\x80\x7f
|
||
\xfd\x7f\x80\x80\x80\x80
|
||
\xfd\x80\x7f\x80\x80\x80
|
||
\xfd\x80\x80\x7f\x80\x80
|
||
\xfd\x80\x80\x80\x7f\x80
|
||
\xfd\x80\x80\x80\x80\x7f
|
||
\xed\xa0\x80
|
||
\xc0\x8f
|
||
\xe0\x80\x8f
|
||
\xf0\x80\x80\x8f
|
||
\xf8\x80\x80\x80\x8f
|
||
\xfc\x80\x80\x80\x80\x8f
|
||
\x80
|
||
\xfe
|
||
\xff
|
||
\xef\xb7\x90
|
||
|
||
/badutf/8
|
||
\xfb\x80\x80\x80\x80
|
||
\xfd\x80\x80\x80\x80\x80
|
||
\xf7\xbf\xbf\xbf
|
||
|
||
/shortutf/8
|
||
\P\P\xdf
|
||
\P\P\xef
|
||
\P\P\xef\x80
|
||
\P\P\xf7
|
||
\P\P\xf7\x80
|
||
\P\P\xf7\x80\x80
|
||
\P\P\xfb
|
||
\P\P\xfb\x80
|
||
\P\P\xfb\x80\x80
|
||
\P\P\xfb\x80\x80\x80
|
||
\P\P\xfd
|
||
\P\P\xfd\x80
|
||
\P\P\xfd\x80\x80
|
||
\P\P\xfd\x80\x80\x80
|
||
\P\P\xfd\x80\x80\x80\x80
|
||
|
||
/anything/8
|
||
\xc0\x80
|
||
\xc1\x8f
|
||
\xe0\x9f\x80
|
||
\xf0\x8f\x80\x80
|
||
\xf8\x87\x80\x80\x80
|
||
\xfc\x83\x80\x80\x80\x80
|
||
\xfe\x80\x80\x80\x80\x80
|
||
\xff\x80\x80\x80\x80\x80
|
||
\xc3\x8f
|
||
\xe0\xaf\x80
|
||
\xe1\x80\x80
|
||
\xf0\x9f\x80\x80
|
||
\xf1\x8f\x80\x80
|
||
\xf8\x88\x80\x80\x80
|
||
\xf9\x87\x80\x80\x80
|
||
\xfc\x84\x80\x80\x80\x80
|
||
\xfd\x83\x80\x80\x80\x80
|
||
\?\xf8\x88\x80\x80\x80
|
||
\?\xf9\x87\x80\x80\x80
|
||
\?\xfc\x84\x80\x80\x80\x80
|
||
\?\xfd\x83\x80\x80\x80\x80
|
||
|
||
/noncharacter/8
|
||
\x{fffe}
|
||
\x{ffff}
|
||
\x{1fffe}
|
||
\x{1ffff}
|
||
\x{2fffe}
|
||
\x{2ffff}
|
||
\x{3fffe}
|
||
\x{3ffff}
|
||
\x{4fffe}
|
||
\x{4ffff}
|
||
\x{5fffe}
|
||
\x{5ffff}
|
||
\x{6fffe}
|
||
\x{6ffff}
|
||
\x{7fffe}
|
||
\x{7ffff}
|
||
\x{8fffe}
|
||
\x{8ffff}
|
||
\x{9fffe}
|
||
\x{9ffff}
|
||
\x{afffe}
|
||
\x{affff}
|
||
\x{bfffe}
|
||
\x{bffff}
|
||
\x{cfffe}
|
||
\x{cffff}
|
||
\x{dfffe}
|
||
\x{dffff}
|
||
\x{efffe}
|
||
\x{effff}
|
||
\x{ffffe}
|
||
\x{fffff}
|
||
\x{10fffe}
|
||
\x{10ffff}
|
||
\x{fdd0}
|
||
\x{fdd1}
|
||
\x{fdd2}
|
||
\x{fdd3}
|
||
\x{fdd4}
|
||
\x{fdd5}
|
||
\x{fdd6}
|
||
\x{fdd7}
|
||
\x{fdd8}
|
||
\x{fdd9}
|
||
\x{fdda}
|
||
\x{fddb}
|
||
\x{fddc}
|
||
\x{fddd}
|
||
\x{fdde}
|
||
\x{fddf}
|
||
\x{fde0}
|
||
\x{fde1}
|
||
\x{fde2}
|
||
\x{fde3}
|
||
\x{fde4}
|
||
\x{fde5}
|
||
\x{fde6}
|
||
\x{fde7}
|
||
\x{fde8}
|
||
\x{fde9}
|
||
\x{fdea}
|
||
\x{fdeb}
|
||
\x{fdec}
|
||
\x{fded}
|
||
\x{fdee}
|
||
\x{fdef}
|
||
|
||
/\x{100}/8DZ
|
||
|
||
/\x{1000}/8DZ
|
||
|
||
/\x{10000}/8DZ
|
||
|
||
/\x{100000}/8DZ
|
||
|
||
/\x{10ffff}/8DZ
|
||
|
||
/[\x{ff}]/8DZ
|
||
|
||
/[\x{100}]/8DZ
|
||
|
||
/\x80/8DZ
|
||
|
||
/\xff/8DZ
|
||
|
||
/\x{D55c}\x{ad6d}\x{C5B4}/DZ8
|
||
\x{D55c}\x{ad6d}\x{C5B4}
|
||
|
||
/\x{65e5}\x{672c}\x{8a9e}/DZ8
|
||
\x{65e5}\x{672c}\x{8a9e}
|
||
|
||
/\x{80}/DZ8
|
||
|
||
/\x{084}/DZ8
|
||
|
||
/\x{104}/DZ8
|
||
|
||
/\x{861}/DZ8
|
||
|
||
/\x{212ab}/DZ8
|
||
|
||
/-- This one is here not because it's different to Perl, but because the way
|
||
the captured single-byte is displayed. (In Perl it becomes a character, and you
|
||
can't tell the difference.) --/
|
||
|
||
/X(\C)(.*)/8
|
||
X\x{1234}
|
||
X\nabc
|
||
|
||
/-- This one is here because Perl gives out a grumbly error message (quite
|
||
correctly, but that messes up comparisons). --/
|
||
|
||
/a\Cb/8
|
||
*** Failers
|
||
a\x{100}b
|
||
|
||
/[^ab\xC0-\xF0]/8SDZ
|
||
\x{f1}
|
||
\x{bf}
|
||
\x{100}
|
||
\x{1000}
|
||
*** Failers
|
||
\x{c0}
|
||
\x{f0}
|
||
|
||
/Ä€{3,4}/8SDZ
|
||
\x{100}\x{100}\x{100}\x{100\x{100}
|
||
|
||
/(\x{100}+|x)/8SDZ
|
||
|
||
/(\x{100}*a|x)/8SDZ
|
||
|
||
/(\x{100}{0,2}a|x)/8SDZ
|
||
|
||
/(\x{100}{1,2}a|x)/8SDZ
|
||
|
||
/\x{100}/8DZ
|
||
|
||
/a\x{100}\x{101}*/8DZ
|
||
|
||
/a\x{100}\x{101}+/8DZ
|
||
|
||
/[^\x{c4}]/DZ
|
||
|
||
/[\x{100}]/8DZ
|
||
\x{100}
|
||
Z\x{100}
|
||
\x{100}Z
|
||
*** Failers
|
||
|
||
/[\xff]/DZ8
|
||
>\x{ff}<
|
||
|
||
/[^\xff]/8DZ
|
||
|
||
/\x{100}abc(xyz(?1))/8DZ
|
||
|
||
/a\x{1234}b/P8
|
||
a\x{1234}b
|
||
|
||
/\777/8I
|
||
\x{1ff}
|
||
\777
|
||
|
||
/\x{100}+\x{200}/8DZ
|
||
|
||
/\x{100}+X/8DZ
|
||
|
||
/^[\QÄ€\E-\QÅ<51>\E/BZ8
|
||
|
||
/-- This tests the stricter UTF-8 check according to RFC 3629. --/
|
||
|
||
/X/8
|
||
\x{0}\x{d7ff}\x{e000}\x{10ffff}
|
||
\x{d800}
|
||
\x{d800}\?
|
||
\x{da00}
|
||
\x{da00}\?
|
||
\x{dfff}
|
||
\x{dfff}\?
|
||
\x{110000}
|
||
\x{110000}\?
|
||
\x{2000000}
|
||
\x{2000000}\?
|
||
\x{7fffffff}
|
||
\x{7fffffff}\?
|
||
|
||
/(*UTF8)\x{1234}/
|
||
abcd\x{1234}pqr
|
||
|
||
/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
|
||
|
||
/\h/SI8
|
||
ABC\x{09}
|
||
ABC\x{20}
|
||
ABC\x{a0}
|
||
ABC\x{1680}
|
||
ABC\x{180e}
|
||
ABC\x{2000}
|
||
ABC\x{202f}
|
||
ABC\x{205f}
|
||
ABC\x{3000}
|
||
|
||
/\v/SI8
|
||
ABC\x{0a}
|
||
ABC\x{0b}
|
||
ABC\x{0c}
|
||
ABC\x{0d}
|
||
ABC\x{85}
|
||
ABC\x{2028}
|
||
|
||
/\h*A/SI8
|
||
CDBABC
|
||
|
||
/\v+A/SI8
|
||
|
||
/\s?xxx\s/8SI
|
||
|
||
/\sxxx\s/I8ST1
|
||
AB\x{85}xxx\x{a0}XYZ
|
||
AB\x{a0}xxx\x{85}XYZ
|
||
|
||
/\S \S/I8ST1
|
||
\x{a2} \x{84}
|
||
A Z
|
||
|
||
/a+/8
|
||
a\x{123}aa\>1
|
||
a\x{123}aa\>2
|
||
a\x{123}aa\>3
|
||
a\x{123}aa\>4
|
||
a\x{123}aa\>5
|
||
a\x{123}aa\>6
|
||
|
||
/\x{1234}+/iS8I
|
||
|
||
/\x{1234}+?/iS8I
|
||
|
||
/\x{1234}++/iS8I
|
||
|
||
/\x{1234}{2}/iS8I
|
||
|
||
/[^\x{c4}]/8DZ
|
||
|
||
/X+\x{200}/8DZ
|
||
|
||
/\R/SI8
|
||
|
||
/\777/8DZ
|
||
|
||
/\w+\x{C4}/8BZ
|
||
a\x{C4}\x{C4}
|
||
|
||
/\w+\x{C4}/8BZT1
|
||
a\x{C4}\x{C4}
|
||
|
||
/\W+\x{C4}/8BZ
|
||
!\x{C4}
|
||
|
||
/\W+\x{C4}/8BZT1
|
||
!\x{C4}
|
||
|
||
/\W+\x{A1}/8BZ
|
||
!\x{A1}
|
||
|
||
/\W+\x{A1}/8BZT1
|
||
!\x{A1}
|
||
|
||
/X\s+\x{A0}/8BZ
|
||
X\x20\x{A0}\x{A0}
|
||
|
||
/X\s+\x{A0}/8BZT1
|
||
X\x20\x{A0}\x{A0}
|
||
|
||
/\S+\x{A0}/8BZ
|
||
X\x{A0}\x{A0}
|
||
|
||
/\S+\x{A0}/8BZT1
|
||
X\x{A0}\x{A0}
|
||
|
||
/\x{a0}+\s!/8BZ
|
||
\x{a0}\x20!
|
||
|
||
/\x{a0}+\s!/8BZT1
|
||
\x{a0}\x20!
|
||
|
||
/A/8
|
||
\x{ff000041}
|
||
\x{7f000041}
|
||
|
||
/-- End of testinput15 --/
|