I was über lazy at first, so took libs from SM. But actually it's quite easy to compile, so let's update to latest version \o/.
		
			
				
	
	
		
			301 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			301 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
/-- This set of tests is for UTF-16 and UTF-32 support, and is relevant only to
 | 
						||
    the 16- and 32-bit libraries. --/
 | 
						||
    
 | 
						||
< forbid W 
 | 
						||
 | 
						||
/ÃÃÃxxx/8?DZSS
 | 
						||
 | 
						||
/abc/8
 | 
						||
    Ã]
 | 
						||
 | 
						||
/X(\C{3})/8
 | 
						||
    X\x{11234}Y
 | 
						||
    X\x{11234}YZ
 | 
						||
 | 
						||
/X(\C{4})/8
 | 
						||
    X\x{11234}YZ
 | 
						||
    X\x{11234}YZW
 | 
						||
 | 
						||
/X\C*/8
 | 
						||
    XYZabcdce
 | 
						||
 | 
						||
/X\C*?/8
 | 
						||
    XYZabcde
 | 
						||
 | 
						||
/X\C{3,5}/8
 | 
						||
    Xabcdefg
 | 
						||
    X\x{11234}Y
 | 
						||
    X\x{11234}YZ
 | 
						||
    X\x{11234}\x{512}
 | 
						||
    X\x{11234}\x{512}YZ
 | 
						||
    X\x{11234}\x{512}\x{11234}Z
 | 
						||
 | 
						||
/X\C{3,5}?/8
 | 
						||
    Xabcdefg
 | 
						||
    X\x{11234}Y
 | 
						||
    X\x{11234}YZ
 | 
						||
    X\x{11234}\x{512}YZ
 | 
						||
    *** Failers
 | 
						||
    X\x{11234}
 | 
						||
 | 
						||
/a\Cb/8
 | 
						||
    aXb
 | 
						||
    a\nb
 | 
						||
 | 
						||
/a\C\Cb/8
 | 
						||
    a\x{12257}b
 | 
						||
    a\x{12257}\x{11234}b
 | 
						||
    ** Failers
 | 
						||
    a\x{100}b
 | 
						||
 | 
						||
/ab\Cde/8
 | 
						||
    abXde
 | 
						||
 | 
						||
/-- Check maximum character size --/
 | 
						||
 | 
						||
/\x{ffff}/8DZ
 | 
						||
 | 
						||
/\x{10000}/8DZ
 | 
						||
 | 
						||
/\x{100}/8DZ
 | 
						||
 | 
						||
/\x{1000}/8DZ
 | 
						||
 | 
						||
/\x{10000}/8DZ
 | 
						||
 | 
						||
/\x{100000}/8DZ
 | 
						||
 | 
						||
/\x{10ffff}/8DZ
 | 
						||
 | 
						||
/[\x{ff}]/8DZ
 | 
						||
 | 
						||
/[\x{100}]/8DZ
 | 
						||
 | 
						||
/\x80/8DZ
 | 
						||
 | 
						||
/\xff/8DZ
 | 
						||
 | 
						||
/\x{D55c}\x{ad6d}\x{C5B4}/DZ8
 | 
						||
    \x{D55c}\x{ad6d}\x{C5B4}
 | 
						||
 | 
						||
/\x{65e5}\x{672c}\x{8a9e}/DZ8
 | 
						||
    \x{65e5}\x{672c}\x{8a9e}
 | 
						||
 | 
						||
/\x{80}/DZ8
 | 
						||
 | 
						||
/\x{084}/DZ8
 | 
						||
 | 
						||
/\x{104}/DZ8
 | 
						||
 | 
						||
/\x{861}/DZ8
 | 
						||
 | 
						||
/\x{212ab}/DZ8
 | 
						||
 | 
						||
/-- This one is here not because it's different to Perl, but because the way
 | 
						||
the captured single-byte is displayed. (In Perl it becomes a character, and you
 | 
						||
can't tell the difference.) --/
 | 
						||
 | 
						||
/X(\C)(.*)/8
 | 
						||
    X\x{1234}
 | 
						||
    X\nabc
 | 
						||
 | 
						||
/-- This one is here because Perl gives out a grumbly error message (quite
 | 
						||
correctly, but that messes up comparisons). --/
 | 
						||
 | 
						||
/a\Cb/8
 | 
						||
    *** Failers
 | 
						||
    a\x{100}b
 | 
						||
 | 
						||
/[^ab\xC0-\xF0]/8SDZ
 | 
						||
    \x{f1}
 | 
						||
    \x{bf}
 | 
						||
    \x{100}
 | 
						||
    \x{1000}
 | 
						||
    *** Failers
 | 
						||
    \x{c0}
 | 
						||
    \x{f0}
 | 
						||
 | 
						||
/Ä€{3,4}/8SDZ
 | 
						||
  \x{100}\x{100}\x{100}\x{100\x{100}
 | 
						||
 | 
						||
/(\x{100}+|x)/8SDZ
 | 
						||
 | 
						||
/(\x{100}*a|x)/8SDZ
 | 
						||
 | 
						||
/(\x{100}{0,2}a|x)/8SDZ
 | 
						||
 | 
						||
/(\x{100}{1,2}a|x)/8SDZ
 | 
						||
 | 
						||
/\x{100}/8DZ
 | 
						||
 | 
						||
/a\x{100}\x{101}*/8DZ
 | 
						||
 | 
						||
/a\x{100}\x{101}+/8DZ
 | 
						||
 | 
						||
/[^\x{c4}]/DZ
 | 
						||
 | 
						||
/[\x{100}]/8DZ
 | 
						||
    \x{100}
 | 
						||
    Z\x{100}
 | 
						||
    \x{100}Z
 | 
						||
    *** Failers
 | 
						||
 | 
						||
/[\xff]/DZ8
 | 
						||
    >\x{ff}<
 | 
						||
 | 
						||
/[^\xff]/8DZ
 | 
						||
 | 
						||
/\x{100}abc(xyz(?1))/8DZ
 | 
						||
 | 
						||
/\777/8I
 | 
						||
  \x{1ff}
 | 
						||
  \777
 | 
						||
 | 
						||
/\x{100}+\x{200}/8DZ
 | 
						||
 | 
						||
/\x{100}+X/8DZ
 | 
						||
 | 
						||
/^[\QÄ€\E-\QÅ<51>\E/BZ8
 | 
						||
 | 
						||
/X/8
 | 
						||
    \x{d800}
 | 
						||
    \x{d800}\?
 | 
						||
    \x{da00}
 | 
						||
    \x{da00}\?
 | 
						||
    \x{dc00}
 | 
						||
    \x{dc00}\?
 | 
						||
    \x{de00}
 | 
						||
    \x{de00}\?
 | 
						||
    \x{dfff}
 | 
						||
    \x{dfff}\?
 | 
						||
    \x{110000}
 | 
						||
    \x{d800}\x{1234}
 | 
						||
 | 
						||
/(*UTF16)\x{11234}/
 | 
						||
  abcd\x{11234}pqr
 | 
						||
 | 
						||
/(*UTF)\x{11234}/I
 | 
						||
  abcd\x{11234}pqr
 | 
						||
 | 
						||
/(*UTF-32)\x{11234}/
 | 
						||
  abcd\x{11234}pqr
 | 
						||
 | 
						||
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
 | 
						||
 | 
						||
/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
 | 
						||
 | 
						||
/\h/SI8
 | 
						||
    ABC\x{09}
 | 
						||
    ABC\x{20}
 | 
						||
    ABC\x{a0}
 | 
						||
    ABC\x{1680}
 | 
						||
    ABC\x{180e}
 | 
						||
    ABC\x{2000}
 | 
						||
    ABC\x{202f}
 | 
						||
    ABC\x{205f}
 | 
						||
    ABC\x{3000}
 | 
						||
 | 
						||
/\v/SI8
 | 
						||
    ABC\x{0a}
 | 
						||
    ABC\x{0b}
 | 
						||
    ABC\x{0c}
 | 
						||
    ABC\x{0d}
 | 
						||
    ABC\x{85}
 | 
						||
    ABC\x{2028}
 | 
						||
 | 
						||
/\h*A/SI8
 | 
						||
    CDBABC
 | 
						||
    \x{2000}ABC 
 | 
						||
 | 
						||
/\R*A/SI8<bsr_unicode>
 | 
						||
    CDBABC
 | 
						||
    \x{2028}A  
 | 
						||
 | 
						||
/\v+A/SI8
 | 
						||
 | 
						||
/\s?xxx\s/8SI
 | 
						||
 | 
						||
/\sxxx\s/I8ST1
 | 
						||
    AB\x{85}xxx\x{a0}XYZ
 | 
						||
    AB\x{a0}xxx\x{85}XYZ
 | 
						||
 | 
						||
/\S \S/I8ST1
 | 
						||
    \x{a2} \x{84}
 | 
						||
    A Z
 | 
						||
 | 
						||
/a+/8
 | 
						||
    a\x{123}aa\>1
 | 
						||
    a\x{123}aa\>2
 | 
						||
    a\x{123}aa\>3
 | 
						||
    a\x{123}aa\>4
 | 
						||
    a\x{123}aa\>5
 | 
						||
    a\x{123}aa\>6
 | 
						||
 | 
						||
/\x{1234}+/iS8I
 | 
						||
 | 
						||
/\x{1234}+?/iS8I
 | 
						||
 | 
						||
/\x{1234}++/iS8I
 | 
						||
 | 
						||
/\x{1234}{2}/iS8I
 | 
						||
 | 
						||
/[^\x{c4}]/8DZ
 | 
						||
 | 
						||
/X+\x{200}/8DZ
 | 
						||
 | 
						||
/\R/SI8
 | 
						||
 | 
						||
/-- Check bad offset --/
 | 
						||
 | 
						||
/a/8
 | 
						||
    \x{10000}\>1
 | 
						||
    \x{10000}ab\>1
 | 
						||
    \x{10000}ab\>2
 | 
						||
    \x{10000}ab\>3
 | 
						||
    \x{10000}ab\>4
 | 
						||
    \x{10000}ab\>5
 | 
						||
 | 
						||
/í¼€/8
 | 
						||
 | 
						||
/\w+\x{C4}/8BZ
 | 
						||
    a\x{C4}\x{C4}
 | 
						||
 | 
						||
/\w+\x{C4}/8BZT1
 | 
						||
    a\x{C4}\x{C4}
 | 
						||
    
 | 
						||
/\W+\x{C4}/8BZ
 | 
						||
    !\x{C4}
 | 
						||
 
 | 
						||
/\W+\x{C4}/8BZT1
 | 
						||
    !\x{C4}
 | 
						||
 | 
						||
/\W+\x{A1}/8BZ
 | 
						||
    !\x{A1}
 | 
						||
 
 | 
						||
/\W+\x{A1}/8BZT1
 | 
						||
    !\x{A1}
 | 
						||
 | 
						||
/X\s+\x{A0}/8BZ
 | 
						||
    X\x20\x{A0}\x{A0}
 | 
						||
 | 
						||
/X\s+\x{A0}/8BZT1
 | 
						||
    X\x20\x{A0}\x{A0}
 | 
						||
 | 
						||
/\S+\x{A0}/8BZ
 | 
						||
    X\x{A0}\x{A0}
 | 
						||
 | 
						||
/\S+\x{A0}/8BZT1
 | 
						||
    X\x{A0}\x{A0}
 | 
						||
 | 
						||
/\x{a0}+\s!/8BZ
 | 
						||
    \x{a0}\x20!
 | 
						||
 | 
						||
/\x{a0}+\s!/8BZT1
 | 
						||
    \x{a0}\x20!
 | 
						||
 | 
						||
/(*UTF)abc/9
 | 
						||
 | 
						||
/abc/89
 | 
						||
 | 
						||
/-- End of testinput18 --/
 |