/* */
DEFINITIONS
This source file includes following definitions.
- fun_so_from_utf_16be
- fun_so_to_utf_16be
- fun_so_from_utf_16le
- fun_so_to_utf_16le
- fun_so_from_utf_32be
- fun_so_to_utf_32be
- fun_so_from_utf_32le
- fun_so_to_utf_32le
- state_init
- fun_si_from_utf_16
- fun_so_from_utf_16
- fun_si_from_utf_32
- fun_so_from_utf_32
- fun_so_to_utf_16
- fun_so_to_utf_32
- TRANS_INIT
/* autogenerated. */
/* src="transcode-tblgen.rb", len=30695, checksum=51134 */
/* src="utf_16_32.trans", len=15308, checksum=28538 */
#include "transcode_data.h"
static const unsigned char
utf_16_32_byte_array[1288] = {
#define from_UTF_16LE_00toFF_D8toDB_00toFF_offsets 0
220, 223,
1, 1, 1, 1,
#define from_UTF_16LE_00toFF_D8toDB_offsets 6
0, 255,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#define from_UTF_16LE_00toFF_offsets 264
0, 255,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#define from_UTF_32LE_00toFF_00toD7_00_offsets 522
0, 0,
0,
#define from_UTF_32LE_00toFF_00toD7_offsets 525
0, 16,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0,
#define from_UTF_32LE_00toFF_D8toDF_offsets 544
1, 16,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
#define from_UTF_32LE_00toFF_offsets 562
0, 255,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#define from_UTF_32BE_00_offsets 820
0, 16,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1,
#define from_UTF_8_C2toDF_offsets 839
128, 191,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
#define from_UTF_8_E0_offsets 905
160, 191,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
#define from_UTF_8_ED_offsets 939
128, 159,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
#define from_UTF_8_F0_offsets 973
144, 191,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
#define from_UTF_8_F4_offsets 1023
128, 143,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
#define from_UTF_8_offsets 1041
0, 244,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4,
6, 7, 7, 7, 8,
};
static const unsigned int
utf_16_32_word_array[106] = {
#define from_UTF_16LE_00toFF_D8toDB_00toFF_infos WORDINDEX2INFO(0)
INVALID, FUNso,
#define from_UTF_16LE_00toFF_D8toDB_00toFF WORDINDEX2INFO(2)
from_UTF_16LE_00toFF_D8toDB_00toFF_offsets,
from_UTF_16LE_00toFF_D8toDB_00toFF_infos,
#define from_UTF_16LE_00toFF_D8toDB_infos WORDINDEX2INFO(4)
from_UTF_16LE_00toFF_D8toDB_00toFF,
#define from_UTF_16LE_00toFF_D8toDB WORDINDEX2INFO(5)
from_UTF_16LE_00toFF_D8toDB_offsets,
from_UTF_16LE_00toFF_D8toDB_infos,
#define from_UTF_16LE_00toFF_infos WORDINDEX2INFO(7)
FUNso, from_UTF_16LE_00toFF_D8toDB,
INVALID,
#define from_UTF_16LE_00toFF WORDINDEX2INFO(10)
from_UTF_16LE_00toFF_offsets,
from_UTF_16LE_00toFF_infos,
#define from_UTF_16LE_infos WORDINDEX2INFO(12)
from_UTF_16LE_00toFF,
#define from_UTF_16LE WORDINDEX2INFO(13)
from_UTF_16LE_00toFF_D8toDB_offsets,
from_UTF_16LE_infos,
#define from_UTF_32LE_00toFF_00toD7_00_infos WORDINDEX2INFO(15)
FUNso, INVALID,
#define from_UTF_32LE_00toFF_00toD7_00 WORDINDEX2INFO(17)
from_UTF_32LE_00toFF_00toD7_00_offsets,
from_UTF_32LE_00toFF_00toD7_00_infos,
#define from_UTF_32LE_00toFF_00toD7_infos WORDINDEX2INFO(19)
from_UTF_32LE_00toFF_00toD7_00, INVALID,
#define from_UTF_32LE_00toFF_00toD7 WORDINDEX2INFO(21)
from_UTF_32LE_00toFF_00toD7_offsets,
from_UTF_32LE_00toFF_00toD7_infos,
#define from_UTF_32LE_00toFF_D8toDF_infos WORDINDEX2INFO(23)
INVALID, from_UTF_32LE_00toFF_00toD7_00,
#define from_UTF_32LE_00toFF_D8toDF WORDINDEX2INFO(25)
from_UTF_32LE_00toFF_D8toDF_offsets,
from_UTF_32LE_00toFF_D8toDF_infos,
#define from_UTF_32LE_00toFF_infos WORDINDEX2INFO(27)
from_UTF_32LE_00toFF_00toD7, from_UTF_32LE_00toFF_D8toDF,
#define from_UTF_32LE_00toFF WORDINDEX2INFO(29)
from_UTF_32LE_00toFF_offsets,
from_UTF_32LE_00toFF_infos,
#define from_UTF_32LE_infos WORDINDEX2INFO(31)
from_UTF_32LE_00toFF,
#define from_UTF_32LE WORDINDEX2INFO(32)
from_UTF_16LE_00toFF_D8toDB_offsets,
from_UTF_32LE_infos,
#define from_UTF_16BE_00toD7_infos WORDINDEX2INFO(34)
FUNso,
#define from_UTF_16BE_00toD7 WORDINDEX2INFO(35)
from_UTF_16LE_00toFF_D8toDB_offsets,
from_UTF_16BE_00toD7_infos,
#define from_UTF_16BE_D8toDB_00toFF_infos WORDINDEX2INFO(37)
INVALID, from_UTF_16BE_00toD7,
#define from_UTF_16BE_D8toDB_00toFF WORDINDEX2INFO(39)
from_UTF_16LE_00toFF_D8toDB_00toFF_offsets,
from_UTF_16BE_D8toDB_00toFF_infos,
#define from_UTF_16BE_D8toDB_infos WORDINDEX2INFO(41)
from_UTF_16BE_D8toDB_00toFF,
#define from_UTF_16BE_D8toDB WORDINDEX2INFO(42)
from_UTF_16LE_00toFF_D8toDB_offsets,
from_UTF_16BE_D8toDB_infos,
#define from_UTF_16BE_infos WORDINDEX2INFO(44)
from_UTF_16BE_00toD7, from_UTF_16BE_D8toDB,
INVALID,
#define from_UTF_16BE WORDINDEX2INFO(47)
from_UTF_16LE_00toFF_offsets,
from_UTF_16BE_infos,
#define from_UTF_32BE_00_00_infos WORDINDEX2INFO(49)
from_UTF_16BE_00toD7, INVALID,
#define from_UTF_32BE_00_00 WORDINDEX2INFO(51)
from_UTF_32LE_00toFF_offsets,
from_UTF_32BE_00_00_infos,
#define from_UTF_32BE_00_01to10_infos WORDINDEX2INFO(53)
from_UTF_16BE_00toD7,
#define from_UTF_32BE_00_01to10 WORDINDEX2INFO(54)
from_UTF_16LE_00toFF_D8toDB_offsets,
from_UTF_32BE_00_01to10_infos,
#define from_UTF_32BE_00_infos WORDINDEX2INFO(56)
from_UTF_32BE_00_00, from_UTF_32BE_00_01to10,
INVALID,
#define from_UTF_32BE_00 WORDINDEX2INFO(59)
from_UTF_32BE_00_offsets,
from_UTF_32BE_00_infos,
#define from_UTF_32BE_infos WORDINDEX2INFO(61)
from_UTF_32BE_00, INVALID,
#define from_UTF_32BE WORDINDEX2INFO(63)
from_UTF_32LE_00toFF_00toD7_00_offsets,
from_UTF_32BE_infos,
#define from_UTF_16_00toFF_infos WORDINDEX2INFO(65)
FUNsi,
#define from_UTF_16_00toFF WORDINDEX2INFO(66)
from_UTF_16LE_00toFF_D8toDB_offsets,
from_UTF_16_00toFF_infos,
#define from_UTF_16_infos WORDINDEX2INFO(68)
from_UTF_16_00toFF,
#define from_UTF_16 WORDINDEX2INFO(69)
from_UTF_16LE_00toFF_D8toDB_offsets,
from_UTF_16_infos,
#define from_UTF_32_00toFF_infos WORDINDEX2INFO(71)
from_UTF_16,
#define from_UTF_32_00toFF WORDINDEX2INFO(72)
from_UTF_16LE_00toFF_D8toDB_offsets,
from_UTF_32_00toFF_infos,
#define from_UTF_32_infos WORDINDEX2INFO(74)
from_UTF_32_00toFF,
#define from_UTF_32 WORDINDEX2INFO(75)
from_UTF_16LE_00toFF_D8toDB_offsets,
from_UTF_32_infos,
#define from_UTF_8_C2toDF WORDINDEX2INFO(77)
from_UTF_8_C2toDF_offsets,
from_UTF_16LE_00toFF_D8toDB_00toFF_infos,
#define from_UTF_8_E0_infos WORDINDEX2INFO(79)
INVALID, from_UTF_8_C2toDF,
#define from_UTF_8_E0 WORDINDEX2INFO(81)
from_UTF_8_E0_offsets,
from_UTF_8_E0_infos,
#define from_UTF_8_E1toEC WORDINDEX2INFO(83)
from_UTF_8_C2toDF_offsets,
from_UTF_8_E0_infos,
#define from_UTF_8_ED WORDINDEX2INFO(85)
from_UTF_8_ED_offsets,
from_UTF_8_E0_infos,
#define from_UTF_8_F0_infos WORDINDEX2INFO(87)
INVALID, from_UTF_8_E1toEC,
#define from_UTF_8_F0 WORDINDEX2INFO(89)
from_UTF_8_F0_offsets,
from_UTF_8_F0_infos,
#define from_UTF_8_F1toF3 WORDINDEX2INFO(91)
from_UTF_8_C2toDF_offsets,
from_UTF_8_F0_infos,
#define from_UTF_8_F4 WORDINDEX2INFO(93)
from_UTF_8_F4_offsets,
from_UTF_8_F0_infos,
#define from_UTF_8_infos WORDINDEX2INFO(95)
FUNso, INVALID,
from_UTF_8_C2toDF, from_UTF_8_E0,
from_UTF_8_E1toEC, from_UTF_8_ED,
from_UTF_8_F0, from_UTF_8_F1toF3,
from_UTF_8_F4,
#define from_UTF_8 WORDINDEX2INFO(104)
from_UTF_8_offsets,
from_UTF_8_infos,
};
#define TRANSCODE_TABLE_INFO utf_16_32_byte_array, 1288, utf_16_32_word_array, 106, ((int)sizeof(unsigned int))
static ssize_t
fun_so_from_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
{
if (!s[0] && s[1]<0x80) {
o[0] = s[1];
return 1;
}
else if (s[0]<0x08) {
o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6);
o[1] = 0x80 | (s[1]&0x3F);
return 2;
}
else if ((s[0]&0xF8)!=0xD8) {
o[0] = 0xE0 | (s[0]>>4);
o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6);
o[2] = 0x80 | (s[1]&0x3F);
return 3;
}
else {
unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1;
o[0] = 0xF0 | (u>>2);
o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F);
o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6);
o[3] = 0x80 | (s[3]&0x3F);
return 4;
}
}
static ssize_t
fun_so_to_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
{
if (!(s[0]&0x80)) {
o[0] = 0x00;
o[1] = s[0];
return 2;
}
else if ((s[0]&0xE0)==0xC0) {
o[0] = (s[0]>>2)&0x07;
o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
return 2;
}
else if ((s[0]&0xF0)==0xE0) {
o[0] = (s[0]<<4) | ((s[1]>>2)^0x20);
o[1] = (s[1]<<6) | (s[2]^0x80);
return 2;
}
else {
int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
o[0] = 0xD8 | (w>>2);
o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
o[2] = 0xDC | ((s[2]>>2)&0x03);
o[3] = (s[2]<<6) | (s[3]&~0x80);
return 4;
}
}
static ssize_t
fun_so_from_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
{
if (!s[1] && s[0]<0x80) {
o[0] = s[0];
return 1;
}
else if (s[1]<0x08) {
o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
o[1] = 0x80 | (s[0]&0x3F);
return 2;
}
else if ((s[1]&0xF8)!=0xD8) {
o[0] = 0xE0 | (s[1]>>4);
o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
o[2] = 0x80 | (s[0]&0x3F);
return 3;
}
else {
unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1;
o[0] = 0xF0 | u>>2;
o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F);
o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6);
o[3] = 0x80 | (s[2]&0x3F);
return 4;
}
}
static ssize_t
fun_so_to_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
{
if (!(s[0]&0x80)) {
o[1] = 0x00;
o[0] = s[0];
return 2;
}
else if ((s[0]&0xE0)==0xC0) {
o[1] = (s[0]>>2)&0x07;
o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
return 2;
}
else if ((s[0]&0xF0)==0xE0) {
o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
o[0] = (s[1]<<6) | (s[2]^0x80);
return 2;
}
else {
int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
o[1] = 0xD8 | (w>>2);
o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
o[3] = 0xDC | ((s[2]>>2)&0x03);
o[2] = (s[2]<<6) | (s[3]&~0x80);
return 4;
}
}
static ssize_t
fun_so_from_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
{
if (!s[1]) {
if (s[2]==0 && s[3]<0x80) {
o[0] = s[3];
return 1;
}
else if (s[2]<0x08) {
o[0] = 0xC0 | (s[2]<<2) | (s[3]>>6);
o[1] = 0x80 | (s[3]&0x3F);
return 2;
}
else {
o[0] = 0xE0 | (s[2]>>4);
o[1] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
o[2] = 0x80 | (s[3]&0x3F);
return 3;
}
}
else {
o[0] = 0xF0 | (s[1]>>2);
o[1] = 0x80 | ((s[1]&0x03)<<4) | (s[2]>>4);
o[2] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
o[3] = 0x80 | (s[3]&0x3F);
return 4;
}
}
static ssize_t
fun_so_to_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
{
o[0] = 0;
if (!(s[0]&0x80)) {
o[1] = o[2] = 0x00;
o[3] = s[0];
}
else if ((s[0]&0xE0)==0xC0) {
o[1] = 0x00;
o[2] = (s[0]>>2)&0x07;
o[3] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
}
else if ((s[0]&0xF0)==0xE0) {
o[1] = 0x00;
o[2] = (s[0]<<4) | ((s[1]>>2)^0x20);
o[3] = (s[1]<<6) | (s[2]^0x80);
}
else {
o[1] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03);
o[2] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F);
o[3] = ((s[2]&0x03)<<6) | (s[3]&0x3F);
}
return 4;
}
static ssize_t
fun_so_from_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
{
if (!s[2]) {
if (s[1]==0 && s[0]<0x80) {
o[0] = s[0];
return 1;
}
else if (s[1]<0x08) {
o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
o[1] = 0x80 | (s[0]&0x3F);
return 2;
}
else {
o[0] = 0xE0 | (s[1]>>4);
o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
o[2] = 0x80 | (s[0]&0x3F);
return 3;
}
}
else {
o[0] = 0xF0 | (s[2]>>2);
o[1] = 0x80 | ((s[2]&0x03)<<4) | (s[1]>>4);
o[2] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
o[3] = 0x80 | (s[0]&0x3F);
return 4;
}
}
static ssize_t
fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
{
o[3] = 0;
if (!(s[0]&0x80)) {
o[2] = o[1] = 0x00;
o[0] = s[0];
}
else if ((s[0]&0xE0)==0xC0) {
o[2] = 0x00;
o[1] = (s[0]>>2)&0x07;
o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
}
else if ((s[0]&0xF0)==0xE0) {
o[2] = 0x00;
o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
o[0] = (s[1]<<6) | (s[2]^0x80);
}
else {
o[2] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03);
o[1] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F);
o[0] = ((s[2]&0x03)<<6) | (s[3]&0x3F);
}
return 4;
}
static int
state_init(void *statep)
{
unsigned char *sp = statep;
*sp = 0;
return 0;
}
static VALUE
fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l)
{
#define BE 1
#define LE 2
unsigned char *sp = statep;
switch (*sp) {
case 0:
if (s[0] == 0xFE && s[1] == 0xFF) {
*sp = BE;
return ZERObt;
}
else if (s[0] == 0xFF && s[1] == 0xFE) {
*sp = LE;
return ZERObt;
}
break;
case BE:
if (s[0] < 0xD8 || 0xDF < s[0]) {
return (VALUE)FUNso;
}
else if (s[0] <= 0xDB) {
return (VALUE)from_UTF_16BE_D8toDB_00toFF;
}
break;
case LE:
if (s[1] < 0xD8 || 0xDF < s[1]) {
return (VALUE)FUNso;
}
else if (s[1] <= 0xDB) {
return (VALUE)from_UTF_16LE_00toFF_D8toDB;
}
break;
}
return (VALUE)INVALID;
}
static ssize_t
fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
{
unsigned char *sp = statep;
switch (*sp) {
case BE:
return fun_so_from_utf_16be(statep, s, l, o, osize);
case LE:
return fun_so_from_utf_16le(statep, s, l, o, osize);
}
return 0;
}
static VALUE
fun_si_from_utf_32(void *statep, const unsigned char *s, size_t l)
{
unsigned char *sp = statep;
switch (*sp) {
case 0:
if (s[0] == 0 && s[1] == 0 && s[2] == 0xFE && s[3] == 0xFF) {
*sp = BE;
return ZERObt;
}
else if (s[0] == 0xFF && s[1] == 0xFE && s[2] == 0 && s[3] == 0) {
*sp = LE;
return ZERObt;
}
break;
case BE:
if (s[0] == 0 && ((0 < s[1] && s[1] <= 0x10) ||
(s[1] == 0 && (s[2] < 0xD8 || 0xDF < s[2]))))
return (VALUE)FUNso;
break;
case LE:
if (s[3] == 0 && ((0 < s[2] && s[2] <= 0x10) ||
(s[2] == 0 && (s[1] < 0xD8 || 0xDF < s[1]))))
return (VALUE)FUNso;
break;
}
return (VALUE)INVALID;
}
static ssize_t
fun_so_from_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
{
unsigned char *sp = statep;
switch (*sp) {
case BE:
return fun_so_from_utf_32be(statep, s, l, o, osize);
case LE:
return fun_so_from_utf_32le(statep, s, l, o, osize);
}
return 0;
}
static ssize_t
fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
{
unsigned char *sp = statep;
if (*sp == 0) {
*o++ = 0xFE;
*o++ = 0xFF;
*sp = 1;
return 2 + fun_so_to_utf_16be(statep, s, l, o, osize);
}
return fun_so_to_utf_16be(statep, s, l, o, osize);
}
static ssize_t
fun_so_to_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
{
unsigned char *sp = statep;
if (*sp == 0) {
*o++ = 0x00;
*o++ = 0x00;
*o++ = 0xFE;
*o++ = 0xFF;
*sp = 1;
return 4 + fun_so_to_utf_32be(statep, s, l, o, osize);
}
return fun_so_to_utf_32be(statep, s, l, o, osize);
}
static const rb_transcoder
rb_from_UTF_16BE = {
"UTF-16BE", "UTF-8", from_UTF_16BE,
TRANSCODE_TABLE_INFO,
2, /* input_unit_length */
4, /* max_input */
4, /* max_output */
asciicompat_decoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_16be
};
static const rb_transcoder
rb_to_UTF_16BE = {
"UTF-8", "UTF-16BE", from_UTF_8,
TRANSCODE_TABLE_INFO,
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
asciicompat_encoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_16be
};
static const rb_transcoder
rb_from_UTF_16LE = {
"UTF-16LE", "UTF-8", from_UTF_16LE,
TRANSCODE_TABLE_INFO,
2, /* input_unit_length */
4, /* max_input */
4, /* max_output */
asciicompat_decoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_16le
};
static const rb_transcoder
rb_to_UTF_16LE = {
"UTF-8", "UTF-16LE", from_UTF_8,
TRANSCODE_TABLE_INFO,
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
asciicompat_encoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_16le
};
static const rb_transcoder
rb_from_UTF_32BE = {
"UTF-32BE", "UTF-8", from_UTF_32BE,
TRANSCODE_TABLE_INFO,
4, /* input_unit_length */
4, /* max_input */
4, /* max_output */
asciicompat_decoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_32be
};
static const rb_transcoder
rb_to_UTF_32BE = {
"UTF-8", "UTF-32BE", from_UTF_8,
TRANSCODE_TABLE_INFO,
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
asciicompat_encoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_32be
};
static const rb_transcoder
rb_from_UTF_32LE = {
"UTF-32LE", "UTF-8", from_UTF_32LE,
TRANSCODE_TABLE_INFO,
4, /* input_unit_length */
4, /* max_input */
4, /* max_output */
asciicompat_decoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_from_utf_32le
};
static const rb_transcoder
rb_to_UTF_32LE = {
"UTF-8", "UTF-32LE", from_UTF_8,
TRANSCODE_TABLE_INFO,
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
asciicompat_encoder, /* asciicompat_type */
0, NULL, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_32le
};
static const rb_transcoder
rb_from_UTF_16 = {
"UTF-16", "UTF-8", from_UTF_16,
TRANSCODE_TABLE_INFO,
2, /* input_unit_length */
4, /* max_input */
4, /* max_output */
asciicompat_decoder, /* asciicompat_type */
1, state_init, NULL, /* state_size, state_init, state_fini */
NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16
};
static const rb_transcoder
rb_from_UTF_32 = {
"UTF-32", "UTF-8", from_UTF_32,
TRANSCODE_TABLE_INFO,
4, /* input_unit_length */
4, /* max_input */
4, /* max_output */
asciicompat_decoder, /* asciicompat_type */
1, state_init, NULL, /* state_size, state_init, state_fini */
NULL, fun_si_from_utf_32, NULL, fun_so_from_utf_32
};
static const rb_transcoder
rb_to_UTF_16 = {
"UTF-8", "UTF-16", from_UTF_8,
TRANSCODE_TABLE_INFO,
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
asciicompat_encoder, /* asciicompat_type */
1, state_init, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_16
};
static const rb_transcoder
rb_to_UTF_32 = {
"UTF-8", "UTF-32", from_UTF_8,
TRANSCODE_TABLE_INFO,
1, /* input_unit_length */
4, /* max_input */
4, /* max_output */
asciicompat_encoder, /* asciicompat_type */
1, state_init, NULL, /* state_size, state_init, state_fini */
NULL, NULL, NULL, fun_so_to_utf_32
};
TRANS_INIT(utf_16_32)
{
rb_register_transcoder(&rb_from_UTF_16BE);
rb_register_transcoder(&rb_to_UTF_16BE);
rb_register_transcoder(&rb_from_UTF_16LE);
rb_register_transcoder(&rb_to_UTF_16LE);
rb_register_transcoder(&rb_from_UTF_32BE);
rb_register_transcoder(&rb_to_UTF_32BE);
rb_register_transcoder(&rb_from_UTF_32LE);
rb_register_transcoder(&rb_to_UTF_32LE);
rb_register_transcoder(&rb_from_UTF_16);
rb_register_transcoder(&rb_to_UTF_16);
rb_register_transcoder(&rb_from_UTF_32);
rb_register_transcoder(&rb_to_UTF_32);
}