Browse Source

MFH: updated limbfl to 1.3.2

pull/271/head
Rui Hirokawa 15 years ago
parent
commit
d6303c7962
  1. 3
      ext/mbstring/config.m4
  2. 5
      ext/mbstring/config.w32
  3. 30
      ext/mbstring/libmbfl/NEWS
  4. 11
      ext/mbstring/libmbfl/filters/Makefile.am
  5. 131
      ext/mbstring/libmbfl/filters/mbfilter_big5.c
  6. 5
      ext/mbstring/libmbfl/filters/mbfilter_big5.h
  7. 142
      ext/mbstring/libmbfl/filters/mbfilter_cp936.c
  8. 10
      ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c
  9. 6
      ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c
  10. 78
      ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c
  11. 43
      ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h
  12. 66
      ext/mbstring/libmbfl/filters/mbfilter_gb18030.c
  13. 11
      ext/mbstring/libmbfl/filters/mbfilter_hz.c
  14. 212
      ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c
  15. 44
      ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.h
  16. 2
      ext/mbstring/libmbfl/filters/mbfilter_sjis.c
  17. 676
      ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
  18. 49
      ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h
  19. 73
      ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c
  20. 1
      ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.h
  21. 20
      ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c
  22. 2
      ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c
  23. 1
      ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h
  24. 174
      ext/mbstring/libmbfl/filters/unicode_table_cp936.h
  25. 2
      ext/mbstring/libmbfl/filters/unicode_table_jis.h
  26. 5026
      ext/mbstring/libmbfl/filters/unicode_table_jis2004.h
  27. 2
      ext/mbstring/libmbfl/mbfl/mbfilter.h
  28. 1
      ext/mbstring/libmbfl/mbfl/mbfl_consts.h
  29. 17
      ext/mbstring/libmbfl/mbfl/mbfl_convert.c
  30. 7
      ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
  31. 4
      ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
  32. 5
      ext/mbstring/libmbfl/mbfl/mbfl_ident.c

3
ext/mbstring/config.m4

@ -238,12 +238,14 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/filters/mbfilter_gb18030.c
libmbfl/filters/mbfilter_euc_cn.c
libmbfl/filters/mbfilter_euc_jp.c
libmbfl/filters/mbfilter_euc_jp_2004.c
libmbfl/filters/mbfilter_euc_jp_win.c
libmbfl/filters/mbfilter_euc_kr.c
libmbfl/filters/mbfilter_euc_tw.c
libmbfl/filters/mbfilter_htmlent.c
libmbfl/filters/mbfilter_hz.c
libmbfl/filters/mbfilter_iso2022_jp_ms.c
libmbfl/filters/mbfilter_iso2022jp_2004.c
libmbfl/filters/mbfilter_iso2022_kr.c
libmbfl/filters/mbfilter_iso8859_1.c
libmbfl/filters/mbfilter_iso8859_10.c
@ -267,6 +269,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/filters/mbfilter_sjis_open.c
libmbfl/filters/mbfilter_sjis_mobile.c
libmbfl/filters/mbfilter_sjis_mac.c
libmbfl/filters/mbfilter_sjis_2004.c
libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.c
libmbfl/filters/mbfilter_ucs2.c
libmbfl/filters/mbfilter_ucs4.c

5
ext/mbstring/config.w32

@ -29,13 +29,14 @@ if (PHP_MBSTRING != "no") {
mbfilter_iso8859_2.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c \
mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c \
mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_jis.c \
mbfilter_iso2022_jp_ms.c mbfilter_gb18030.c \
mbfilter_iso2022_jp_ms.c mbfilter_gb18030.c mbfiler_sjis_2004.c \
mbfilter_koi8r.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \
mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \
mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c mbfilter_utf8_mobile.c \
mbfilter_koi8u.c mbfilter_cp1254.c \
mbfilter_koi8u.c mbfilter_cp1254.c mbfilter_euc_jp_2004.c \
mbfilter_uuencode.c mbfilter_armscii8.c mbfilter_cp850.c \
mbfilter_cp5022x.c mbfilter_sjis_open.c mbfilter_sjis_mobile.c mbfilter_sjis_mac.c \
mbfilter_iso2022jp_2004.c \
mbfilter_tl_jisx0201_jisx0208.c", "mbstring");
ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \

30
ext/mbstring/libmbfl/NEWS

@ -0,0 +1,30 @@
version 1.3.2 August 20, 2011
- added JISX-0213:2004 based encoding : Shift_JIS-2004, EUC-JP-2004, ISO-2022-JP-2004 (rui).
- added gb18030 encoding (rui).
- added CP950 with user user defined area based on Big5 (rui).
- added mapping for user defined character area to CP936 (rui).
- added UTF-8-Mobile to support the pictogram characters defined by mobile phone carrier in Japan (rui).
version 1.3.1 August 5, 2011
- added check for invalid/obsolete utf-8 encoding (rui).
version 1.3.0 August 1, 2011
- added encoding conversion between Shift_JIS and Unicode (6.0 or PUA)
for pictogram characters defined by mobile phone carrier in Japan (rui).
Detailed info:https://github.com/hirokawa/libmbfl/wiki/Introduction-to-libmbflhttps:--github.com-hirokawa-libmbfl-wiki-Introduction-to-libmbfl-_edit%23
- fixed encoding conversion of cp5022x for user defined area (rui).
- added MacJapanese (SJIS-mac) for legacy encoding support (rui).
- backport from PHP 5.2 (rui).
version 1.1.0 March 02, 2010
- added cp5022x encoding (moriyoshi)
- added ISO-2022-JP-MS (moriyoshi)
- moved to github.com from sourceforge.jp (moriyoshi)

11
ext/mbstring/libmbfl/filters/Makefile.am

@ -1,4 +1,4 @@
EXTRA_DIST=mk_sb_tbl.awk
EXTRA_DIST=Makefile.bcc32 mk_sb_tbl.awk
noinst_LTLIBRARIES=libmbfl_filters.la
INCLUDES=-I../mbfl
PERL=perl
@ -33,6 +33,7 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \
mbfilter_uuencode.c \
mbfilter_base64.c \
mbfilter_sjis.c \
mbfilter_sjis_2004.c \
mbfilter_sjis_open.c \
mbfilter_sjis_mobile.c \
mbfilter_sjis_mac.c \
@ -47,12 +48,14 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \
mbfilter_utf7.c \
mbfilter_utf7imap.c \
mbfilter_euc_jp_win.c \
mbfilter_euc_jp_2004.c \
mbfilter_cp932.c \
mbfilter_cp51932.c \
mbfilter_euc_cn.c \
mbfilter_euc_kr.c \
mbfilter_uhc.c \
mbfilter_iso2022_jp_ms.c \
mbfilter_iso2022_jp_2004.c \
mbfilter_gb18030.c \
mbfilter_iso2022_kr.c \
mbfilter_cp866.c \
@ -80,12 +83,14 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \
mbfilter_gb18030.h \
mbfilter_euc_cn.h \
mbfilter_euc_jp.h \
mbfilter_euc_jp_2004.h \
mbfilter_euc_jp_win.h \
mbfilter_euc_kr.h \
mbfilter_euc_tw.h \
mbfilter_htmlent.h \
mbfilter_hz.h \
mbfilter_iso2022_jp_ms.h \
mbfilter_iso2022_jp_2004.h \
mbfilter_iso2022_kr.h \
mbfilter_iso8859_1.h \
mbfilter_iso8859_10.h \
@ -152,9 +157,9 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \
unicode_table_armscii8.h \
unicode_table_cp850.h \
unicode_table_uhc.h \
unicode_table_gb18030.h \
translit_kana_jisx0201_jisx0208.h \
emoji2uni.h \
sjis_mac2uni.h
emoji2uni.h
mbfilter_iso8859_2.c: unicode_table_iso8859_2.h

131
ext/mbstring/libmbfl/filters/mbfilter_big5.c

@ -57,7 +57,7 @@ static const unsigned char mblen_table_big5[] = { /* 0x81-0xFE */
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const char *mbfl_encoding_big5_aliases[] = {"CN-BIG5", "BIG-FIVE", "BIGFIVE", "CP950", NULL};
static const char *mbfl_encoding_big5_aliases[] = {"CN-BIG5", "BIG-FIVE", "BIGFIVE", NULL};
const mbfl_encoding mbfl_encoding_big5 = {
mbfl_no_encoding_big5,
@ -68,6 +68,15 @@ const mbfl_encoding mbfl_encoding_big5 = {
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
};
const mbfl_encoding mbfl_encoding_cp950 = {
mbfl_no_encoding_cp950,
"CP950",
"BIG5",
NULL,
mblen_table_big5,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_big5 = {
mbfl_no_encoding_big5,
mbfl_filt_ident_common_ctor,
@ -75,6 +84,13 @@ const struct mbfl_identify_vtbl vtbl_identify_big5 = {
mbfl_filt_ident_big5
};
const struct mbfl_identify_vtbl vtbl_identify_cp950 = {
mbfl_no_encoding_cp950,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_big5
};
const struct mbfl_convert_vtbl vtbl_big5_wchar = {
mbfl_no_encoding_big5,
mbfl_no_encoding_wchar,
@ -93,21 +109,57 @@ const struct mbfl_convert_vtbl vtbl_wchar_big5 = {
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_cp950_wchar = {
mbfl_no_encoding_cp950,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_big5_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_cp950 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp950,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_big5,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/* 63 + 94 = 157 or 94 */
static unsigned short cp950_pua_tbl[][4] = {
{0xe000,0xe310,0xfa40,0xfefe},
{0xe311,0xeeb7,0x8e40,0xa0fe},
{0xeeb8,0xf6b0,0x8140,0x8dfe},
{0xf6b1,0xf70e,0xc6a1,0xc6fe},
{0xf70f,0xf848,0xc740,0xc8fe},
};
/*
* Big5 => wchar
*/
int
mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter)
{
int c1, w;
int k;
int c1, w, c2;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
if (filter->from->no_encoding == mbfl_no_encoding_cp950) {
c1 = 0x80;
} else {
c1 = 0xa0;
}
if (c >= 0 && c <= 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xff) { /* dbcs lead byte */
} else if (c == 0xff) {
CK((*filter->output_function)(0xf8f8, filter->data));
} else if (c > c1 && c < 0xff) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
@ -131,6 +183,30 @@ mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter)
} else {
w = 0;
}
if (filter->from->no_encoding == mbfl_no_encoding_cp950) {
/* PUA for CP950 */
if (w <= 0 &&
(((c1 >= 0xfa && c1 <= 0xfe) || (c1 >= 0x8e && c1 <= 0xa0) ||
(c1 >= 0x81 && c1 <= 0x8d) ||(c1 >= 0xc7 && c1 <= 0xc8))
&& ((c > 0x39 && c < 0x7f) || (c > 0xa0 && c < 0xff))) ||
((c1 == 0xc6) && (c > 0xa0 && c < 0xff))) {
c2 = c1 << 8 | c;
for (k = 0; k < sizeof(cp950_pua_tbl)/(sizeof(unsigned short)*4); k++) {
if (c2 >= cp950_pua_tbl[k][2] && c2 <= cp950_pua_tbl[k][3]) {
break;
}
}
if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) {
w = 157*(c1 - (cp950_pua_tbl[k][2]>>8)) + c - (c >= 0xa1 ? 0x62 : 0x40)
+ cp950_pua_tbl[k][0];
} else {
w = c2 - cp950_pua_tbl[k][2] + cp950_pua_tbl[k][0];
}
}
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
@ -161,7 +237,8 @@ mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter)
int
mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter)
{
int c1, s;
int k;
int c1, s, c2;
s = 0;
if (c >= ucs_a1_big5_table_min && c < ucs_a1_big5_table_max) {
@ -179,6 +256,39 @@ mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter)
} else if (c >= ucs_r2_big5_table_min && c < ucs_r2_big5_table_max) {
s = ucs_r2_big5_table[c - ucs_r2_big5_table_min];
}
if (filter->to->no_encoding == mbfl_no_encoding_cp950) {
if (c >= 0xe000 && c <= 0xf848) { /* PUA for CP950 */
for (k = 0; k < sizeof(cp950_pua_tbl)/(sizeof(unsigned short)*4); k++) {
if (c <= cp950_pua_tbl[k][1]) {
break;
}
}
c1 = c - cp950_pua_tbl[k][0];
if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) {
c2 = cp950_pua_tbl[k][2] >> 8;
s = ((c1 / 157) + c2) << 8; c1 %= 157;
s |= c1 + (c1 >= 0x3f ? 0x62 : 0x40);
} else {
s = c1 + cp950_pua_tbl[k][2];
}
}
if (c == 0x80) {
s = 0x80;
} else if (c == 0xf8f8) {
s = 0xff;
} else if (c == 0x256d) {
s = 0xa27e;
} else if (c == 0x256e) {
s = 0xa2a1;
} else if (c == 0x256f) {
s = 0xa2a3;
} else if (c == 0x2570) {
s = 0xa2a2;
}
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_BIG5) {
@ -191,7 +301,7 @@ mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter)
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
if (s <= 0x80 || s == 0xff) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
@ -208,6 +318,13 @@ mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter)
static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter)
{
int c1;
if (filter->encoding == mbfl_no_encoding_cp950) {
c1 = 0x80;
} else {
c1 = 0xa0;
}
if (filter->status) { /* kanji second char */
if (c < 0x40 || (c > 0x7e && c < 0xa1) ||c > 0xfe) { /* bad */
filter->flag = 1;
@ -215,7 +332,7 @@ static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter)
filter->status = 0;
} else if (c >= 0 && c < 0x80) { /* latin ok */
;
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
} else if (c > c1 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else { /* bad */
filter->flag = 1;

5
ext/mbstring/libmbfl/filters/mbfilter_big5.h

@ -37,6 +37,11 @@ extern const struct mbfl_identify_vtbl vtbl_identify_big5;
extern const struct mbfl_convert_vtbl vtbl_big5_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_big5;
extern const mbfl_encoding mbfl_encoding_cp950;
extern const struct mbfl_identify_vtbl vtbl_identify_cp950;
extern const struct mbfl_convert_vtbl vtbl_cp950_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp950;
int mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter);

142
ext/mbstring/libmbfl/filters/mbfilter_cp936.c

@ -33,7 +33,7 @@
#include "mbfilter.h"
#include "mbfilter_cp936.h"
#define UNICODE_TABLE_CP936_DEF
#include "unicode_table_cp936.h"
static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter);
@ -102,7 +102,8 @@ const struct mbfl_convert_vtbl vtbl_wchar_cp936 = {
int
mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter)
{
int c1, w;
int k;
int c1, c2, w;
switch (filter->status) {
case 0:
@ -110,39 +111,68 @@ mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter)
CK((*filter->output_function)(c, filter->data));
} else if (c == 0x80) { /* euro sign */
CK((*filter->output_function)(0x20ac, filter->data));
} else if (c > 0x80 && c < 0xff) { /* dbcs lead byte */
} else if (c < 0xff) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
} else { /* 0xff */
CK((*filter->output_function)(0xf8f5, filter->data));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
if ( c1 < 0xff && c1 > 0x80 && c > 0x39 && c < 0xff && c != 0x7f) {
w = (c1 - 0x81)*192 + (c - 0x40);
if (w >= 0 && w < cp936_ucs_table_size) {
w = cp936_ucs_table[w];
} else {
w = 0;
if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) &&
(c >= 0xa1 && c <= 0xfe)) {
/* UDA part1,2: U+E000-U+E4C5 */
w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000;
CK((*filter->output_function)(w, filter->data));
} else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) {
/* UDA part3 : U+E4C6-U+E765*/
w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6;
CK((*filter->output_function)(w, filter->data));
}
c2 = (c1 << 8) | c;
if (w <= 0 &&
((c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) ||
(c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) ||
(c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844)))) {
for (k = 0; k < mbfl_cp936_pua_tbl_max; k++) {
if (c2 >= mbfl_cp936_pua_tbl[k][2] &&
c2 <= mbfl_cp936_pua_tbl[k][2] +
mbfl_cp936_pua_tbl[k][1] - mbfl_cp936_pua_tbl[k][0]) {
w = c2 - mbfl_cp936_pua_tbl[k][2] + mbfl_cp936_pua_tbl[k][0];
CK((*filter->output_function)(w, filter->data));
break;
}
}
if (w <= 0) {
}
if (w <= 0) {
if (c1 < 0xff && c1 > 0x80 && c > 0x39 && c < 0xff && c != 0x7f) {
w = (c1 - 0x81)*192 + (c - 0x40);
if (w >= 0 && w < cp936_ucs_table_size) {
w = cp936_ucs_table[w];
} else {
w = 0;
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_WINCP936;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_WINCP936;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
@ -160,25 +190,75 @@ mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter)
int
mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter)
{
int c1, s;
int k, k1, k2;
int c1, s = 0, s1;
s = 0;
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
/* U+0000 - U+0451 */
s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
/* U+2000 - U+26FF */
if (c == 0x203e) {
s = 0xa3fe;
} else if (c == 0x2218) {
s = 0xa1e3;
} else if (c == 0x223c) {
s = 0xa1ab;
} else {
s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
}
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
/* U+2F00 - U+33FF */
s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
/* U+4D00-9FFF CJK Unified Ideographs (+ Extension A) */
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
} else if (c >= 0xe000 && c <= 0xe864) { /* PUA */
if (c < 0xe766) {
if (c < 0xe4c6) {
c1 = c - 0xe000;
s = (c1 % 94) + 0xa1; c1 /= 94;
s |= (c1 < 0x06 ? c1 + 0xaa : c1 + 0xf2) << 8;
} else {
c1 = c - 0xe4c6;
s = ((c1 / 96) + 0xa1) << 8; c1 %= 96;
s |= c1 + (c1 >= 0x3f ? 0x41 : 0x40);
}
} else {
/* U+E766..U+E864 */
k1 = 0; k2 = mbfl_cp936_pua_tbl_max;
while (k1 < k2) {
k = (k1 + k2) >> 1;
if (c < mbfl_cp936_pua_tbl[k][0]) {
k2 = k;
} else if (c > mbfl_cp936_pua_tbl[k][1]) {
k1 = k + 1;
} else {
s = c - mbfl_cp936_pua_tbl[k][0] + mbfl_cp936_pua_tbl[k][2];
break;
}
}
}
} else if (c == 0xf8f5) {
s = 0xff;
} else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) {
/* U+F900-FA2F CJK Compatibility Ideographs */
s = ucs_ci_cp936_table[c - ucs_ci_cp936_table_min];
} else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) {
s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min];
} else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) {
s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min];
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
s = ucs_hff_cp936_table[c - ucs_hff_cp936_table_min];
s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min]; /* U+FE50-FE6F Small Form Variants */
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
/* U+FF00-FFFF HW/FW Forms */
if (c == 0xff04) {
s = 0xa1e7;
} else if (c == 0xff5e) {
s = 0xa1ab;
} else if (c >= 0xff01 && c <= 0xff5d) {
s = c - 0xff01 + 0xa3a1;
} else if (c >= 0xffe0 && c <= 0xffe5) {
s = ucs_hff_s_cp936_table[c-0xffe0];
}
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
@ -192,7 +272,7 @@ mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter)
}
}
if (s >= 0) {
if (s <= 0x80) { /* latin */
if (s <= 0x80 || s == 0xff) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));

10
ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c

@ -169,7 +169,15 @@ mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter)
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
s = ucs_hff_cp936_table[c - ucs_hff_cp936_table_min];
if (c == 0xff04) {
s = 0xa1e7;
} else if (c == 0xff5e) {
s = 0xa1ab;
} else if (c >= 0xff01 && c <= 0xff5d) {
s = c - 0xff01 + 0xa3a1;
} else if (c >= 0xffe0 && c <= 0xffe5) {
s = ucs_hff_s_cp936_table[c-0xffe0];
}
}
c1 = (s >> 8) & 0xff;
c2 = s & 0xff;

6
ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c

@ -37,9 +37,9 @@
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
static int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter);
int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -275,7 +275,7 @@ mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter)
return c;
}
static int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter)
int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter)
{
switch (filter->status) {
case 0: /* latin */

78
ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c

@ -0,0 +1,78 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by rui hirokawa <hirokawa@php.net> on 16 aug 2011.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_euc_jp_2004.h"
#include "mbfilter_sjis_2004.h"
#include "unicode_table_jis2004.h"
extern int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter);
extern const unsigned char mblen_table_eucjp[];
static const char *mbfl_encoding_eucjp2004_aliases[] = {"EUC_JP-2004", NULL};
const mbfl_encoding mbfl_encoding_eucjp2004 = {
mbfl_no_encoding_eucjp2004,
"EUC-JP-2004",
"EUC-JP",
(const char *(*)[])&mbfl_encoding_eucjp2004_aliases,
mblen_table_eucjp,
MBFL_ENCTYPE_MBCS
};
const struct mbfl_identify_vtbl vtbl_identify_eucjp2004 = {
mbfl_no_encoding_eucjp2004,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_eucjp
};
const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar = {
mbfl_no_encoding_eucjp2004,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_jis2004_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_eucjp2004,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_jis2004,
mbfl_filt_conv_jis2004_flush
};

43
ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.h
* by rui hirokawa <hirokawa@php.net> on 15 aug 2011.
*
*/
#ifndef MBFL_MBFILTER_EUC_JP_2004_H
#define MBFL_MBFILTER_EUC_JP_2004_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_eucjp2004;
extern const struct mbfl_identify_vtbl vtbl_identify_eucjp2004;
extern const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004;
int mbfl_filt_conv_eucjp2004_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_eucjp2004(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_EUC_JP_2004_H */

66
ext/mbstring/libmbfl/filters/mbfilter_gb18030.c

@ -136,7 +136,7 @@ mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter)
CK((*filter->output_function)(0x20ac, filter->data));
} else if (c == 0xff) {
CK((*filter->output_function)(0x00ff, filter->data));
} else if (c > 0x80 && c < 0xff) { /* dbcs/fbcs lead byte */
} else if (c > 0x80 && c < 0xff) { /* dbcs/qbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
@ -146,7 +146,7 @@ mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter)
}
break;
case 1: /* dbcs/fbcs second byte */
case 1: /* dbcs/qbcs second byte */
c1 = filter->cache;
filter->status = 0;
@ -154,7 +154,8 @@ mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter)
filter->status = 2;
filter->cache = (c1 << 8) | c;
return c;
} else if (c1 >= 0x90 && c1 <= 0xe3 && c >= 0x30 && c <= 0x39) { /* 4 byte range: Unicode 16 planes */
} else if (c1 >= 0x90 && c1 <= 0xe3 && c >= 0x30 && c <= 0x39) {
/* 4 byte range: Unicode 16 planes */
filter->status = 2;
filter->cache = (c1 << 8) | c;
return c;
@ -162,16 +163,22 @@ mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter)
(c >= 0xa1 && c <= 0xfe)) { /* UDA part1,2: U+E000-U+E4C5 */
w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000;
CK((*filter->output_function)(w, filter->data));
} else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) { /* UDA part3 : U+E4C6-U+E765*/
} else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) {
/* UDA part3 : U+E4C6-U+E765*/
w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6;
CK((*filter->output_function)(w, filter->data));
}
if (w <= 0) {
c2 = (c1 << 8) | c;
c2 = (c1 << 8) | c;
if (w <= 0 &&
((c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) ||
(c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) ||
(c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844)))) {
for (k = 0; k < mbfl_gb18030_pua_tbl_max; k++) {
if (c2 >= mbfl_gb18030_pua_tbl[k][2] &&
c2 <= mbfl_gb18030_pua_tbl[k][2] + mbfl_gb18030_pua_tbl[k][1] - mbfl_gb18030_pua_tbl[k][0]) {
c2 <= mbfl_gb18030_pua_tbl[k][2] + mbfl_gb18030_pua_tbl[k][1]
- mbfl_gb18030_pua_tbl[k][0]) {
w = c2 - mbfl_gb18030_pua_tbl[k][2] + mbfl_gb18030_pua_tbl[k][0];
CK((*filter->output_function)(w, filter->data));
break;
@ -207,7 +214,7 @@ mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter)
}
}
break;
case 2: /* fbcs third byte */
case 2: /* qbcs third byte */
c1 = (filter->cache >> 8) & 0xff;
c2 = filter->cache & 0xff;
filter->status = 0;
@ -224,7 +231,7 @@ mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter)
}
break;
case 3: /* fbcs fourth byte */
case 3: /* qbcs fourth byte */
c1 = (filter->cache >> 16) & 0xff;
c2 = (filter->cache >> 8) & 0xff;
c3 = filter->cache & 0xff;
@ -279,9 +286,8 @@ int
mbfl_filt_conv_wchar_gb18030(int c, mbfl_convert_filter *filter)
{
int k, k1, k2;
int c1, s, s1 = 0;
int c1, s = 0, s1 = 0;
s = 0;
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
@ -291,13 +297,37 @@ mbfl_filt_conv_wchar_gb18030(int c, mbfl_convert_filter *filter)
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
} else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) {
s = ucs_ci_cp936_table[c - ucs_ci_cp936_table_min];
/* U+F900-FA2F CJK Compatibility Ideographs */
if (c == 0xf92c) {
s = 0xfd9c;
} else if (c == 0xf979) {
s = 0xfd9d;
} else if (c == 0xf995) {
s = 0xfd9e;
} else if (c == 0xf9e7) {
s = 0xfd9f;
} else if (c == 0xf9f1) {
s = 0xfda0;
} else if (c >= 0xfa0c && c <= 0xfa29) {
s = ucs_ci_s_cp936_table[c - 0xfa0c];
}
} else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) {
/* FE30h CJK Compatibility Forms */
s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min];
} else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) {
/* U+FE50-FE6F Small Form Variants */
s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min];
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
s = ucs_hff_cp936_table[c - ucs_hff_cp936_table_min];
/* U+FF00-FFFF HW/FW Forms */
if (c == 0xff04) {
s = 0xa1e7;
} else if (c == 0xff5e) {
s = 0xa1ab;
} else if (c >= 0xff01 && c <= 0xff5d) {
s = c - 0xff01 + 0xa3a1;
} else if (c >= 0xffe0 && c <= 0xffe5) {
s = ucs_hff_s_cp936_table[c-0xffe0];
}
}
if (c == 0x20ac) { /* euro-sign */
@ -371,7 +401,7 @@ mbfl_filt_conv_wchar_gb18030(int c, mbfl_convert_filter *filter)
if (s >= 0) {
if (s <= 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data));
} else if (s1 > 0) { /* fbcs */
} else if (s1 > 0) { /* qbcs */
CK((*filter->output_function)(s1 & 0xff, filter->data));
CK((*filter->output_function)((s >> 16) & 0xff, filter->data));
CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
@ -403,8 +433,8 @@ static int mbfl_filt_ident_gb18030(int c, mbfl_identify_filter *filter)
filter->status = 1;
filter->status |= (c << 8);
}
} else if (filter->status == 1) { /* dbcs/fbcs 2nd byte */
if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c >= 0x30 && c <= 0x39) { /* fbcs */
} else if (filter->status == 1) { /* dbcs/qbcs 2nd byte */
if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c >= 0x30 && c <= 0x39) { /* qbcs */
filter->status = 2;
} else if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) && (c >= 0xa1 && c <= 0xfe)) {
filter->status = 0; /* UDA part 1,2 */
@ -420,14 +450,14 @@ static int mbfl_filt_ident_gb18030(int c, mbfl_identify_filter *filter)
filter->flag = 1; /* bad */
filter->status = 0;
}
} else if (filter->status == 2) { /* fbcs 3rd byte */
} else if (filter->status == 2) { /* qbcs 3rd byte */
if (c > 0x80 && c < 0xff) {
filter->status = 3;
} else {
filter->flag = 1; /* bad */
filter->status = 0;
}
} else if (filter->status == 3) { /* fbcs 4th byte */
} else if (filter->status == 3) { /* qbcs 4th byte */
if (c >= 0x30 && c < 0x40) {
filter->status = 0;
} else {

11
ext/mbstring/libmbfl/filters/mbfilter_hz.c

@ -34,7 +34,6 @@
#include "mbfilter.h"
#include "mbfilter_hz.h"
#define UNICODE_TABLE_CP936_DEF
#include "unicode_table_cp936.h"
static int mbfl_filt_ident_hz(int c, mbfl_identify_filter *filter);
@ -166,7 +165,15 @@ mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter)
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
s = ucs_hff_cp936_table[c - ucs_hff_cp936_table_min];
if (c == 0xff04) {
s = 0xa1e7;
} else if (c == 0xff5e) {
s = 0xa1ab;
} else if (c >= 0xff01 && c <= 0xff5d) {
s = c - 0xff01 + 0xa3a1;
} else if (c >= 0xffe0 && c <= 0xffe5) {
s = ucs_hff_s_cp936_table[c-0xffe0];
}
}
if (s & 0x8000) {
s -= 0x8080;

212
ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c

@ -0,0 +1,212 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_jis.c
* by rui hirokawa <hirokawa@php.net> on 18 aug 2011.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso2022jp_2004.h"
#include "mbfilter_sjis_2004.h"
#include "unicode_table_jis2004.h"
#include "unicode_table_jis.h"
extern int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter);
static int mbfl_filt_conv_2022jp_2004_flush(mbfl_convert_filter *filter);
static int mbfl_filt_ident_2022jp_2004(int c, mbfl_identify_filter *filter);
const mbfl_encoding mbfl_encoding_2022jp_2004 = {
mbfl_no_encoding_2022jp_2004,
"ISO-2022-JP-2004",
"ISO-2022-JP-2004",
NULL,
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_2022jp_2004 = {
mbfl_no_encoding_2022jp_2004,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_2022jp_2004
};
const struct mbfl_convert_vtbl vtbl_2022jp_2004_wchar = {
mbfl_no_encoding_2022jp_2004,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_jis2004_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_2022jp_2004,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_jis2004,
mbfl_filt_conv_2022jp_2004_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
static int
mbfl_filt_conv_2022jp_2004_flush(mbfl_convert_filter *filter)
{
int k, c1, c2, s1, s2;
k = filter->cache;
if ((filter->status & 0xf) == 1 && k >= 0 && k <= jisx0213_u2_tbl_len) {
s1 = jisx0213_u2_fb_tbl[k];
c1 = (s1 >> 8) & 0x7f;
c2 = s1 & 0x7f;
if ((filter->status & 0xff00) != 0x200) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x24, filter->data)); /* '$' */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x51, filter->data)); /* 'Q' */
}
filter->status = 0x200;
CK((*filter->output_function)(c1, filter->data));
CK((*filter->output_function)(c2, filter->data));
}
filter->cache = 0;
/* back to latin */
if ((filter->status & 0xff00) != 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status &= 0xff;
if (filter->flush_function != NULL) {
return (*filter->flush_function)(filter->data);
}
return 0;
}
static int mbfl_filt_ident_2022jp_2004(int c, mbfl_identify_filter *filter)
{
retry:
switch (filter->status & 0xf) {
/* case 0x00: ASCII */
/* case 0x80: X 0212 */
/* case 0x90: X 0213 plane 1 */
/* case 0xa0: X 0213 plane 2 */
case 0:
if (c == 0x1b) {
filter->status += 2;
} else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */
filter->status += 1;
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
;
} else {
filter->flag = 1; /* bad */
}
break;
/* case 0x81: X 0208 second char */
case 1:
if (c == 0x1b) {
filter->status++;
} else {
filter->status &= ~0xf;
if (c < 0x21 || c > 0x7e) { /* bad */
filter->flag = 1;
}
}
break;
/* ESC */
case 2:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ */
case 3:
if (c == 0x42) { /* 'B' */
filter->status = 0x80;
} else if (c == 0x28) { /* '(' */
filter->status++;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC $ ( */
case 4:
if (c == 0x51) { /* JIS X 0213 plane 1 */
filter->status = 0x90;
} else if (c == 0x50) { /* JIS X 0213 plane 2 */
filter->status = 0xa0;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
/* ESC ( */
case 5:
if (c == 0x42) { /* 'B' */
filter->status = 0;
} else {
filter->flag = 1; /* bad */
filter->status &= ~0xf;
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}

44
ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.h

@ -0,0 +1,44 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_ja.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_2022JP_2004_H
#define MBFL_MBFILTER_2022JP_2004_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_2022jp_2004;
extern const struct mbfl_identify_vtbl vtbl_identify_2022jp_2004;
extern const struct mbfl_convert_vtbl vtbl_2022jp_2004_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004;
int mbfl_filt_conv_2022jp_2004_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_2022jp_2004(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_any_2022jp_2004_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_2022JP_2004_H */

2
ext/mbstring/libmbfl/filters/mbfilter_sjis.c

@ -42,7 +42,7 @@
int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

676
ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c

@ -0,0 +1,676 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_sjis.c
* by rui hirokawa <hirokawa@php.net> on 15 aug 2011.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_sjis_2004.h"
#define UNICODE_TABLE_JIS2004_DEF
#include "unicode_table_jis2004.h"
#include "unicode_table_jis.h"
extern const unsigned char mblen_table_sjis[];
static int mbfl_filt_ident_sjis2004(int c, mbfl_identify_filter *filter);
static const int uni2sjis_tbl_range[][2] = {
{0x0000, 0x045f},
{0x4e00, 0x9fff},
{0xff00, 0xffe5},
{0xfa0f, 0xfa6a},
};
static const unsigned short *uni2sjis_tbl[] = {
ucs_a1_jisx0213_table,
ucs_i_jisx0213_table,
ucs_r_jisx0213_table,
ucs_r2_jisx0213_table,
};
extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
extern int mbfl_bisec_srch(int w, const unsigned short *tbl, int n);
extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
static const char *mbfl_encoding_sjis2004_aliases[] = {"SJIS2004","Shift_JIS-2004", NULL};
const mbfl_encoding mbfl_encoding_sjis2004 = {
mbfl_no_encoding_sjis2004,
"SJIS-2004",
"Shift_JIS",
(const char *(*)[])&mbfl_encoding_sjis2004_aliases,
mblen_table_sjis,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
};
const struct mbfl_identify_vtbl vtbl_identify_sjis2004 = {
mbfl_no_encoding_sjis2004,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_sjis
};
const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = {
mbfl_no_encoding_sjis2004,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_jis2004_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_sjis2004 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_sjis2004,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_jis2004,
mbfl_filt_conv_jis2004_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
#define SJIS_ENCODE(c1,c2,s1,s2) \
do { \
s1 = c1; \
s1--; \
s1 >>= 1; \
if ((c1) < 0x5f) { \
s1 += 0x71; \
} else { \
s1 += 0xb1; \
} \
s2 = c2; \
if ((c1) & 1) { \
if ((c2) < 0x60) { \
s2--; \
} \
s2 += 0x20; \
} else { \
s2 += 0x7e; \
} \
} while (0)
#define SJIS_DECODE(c1,c2,s1,s2) \
do { \
s1 = c1; \
if (s1 < 0xa0) { \
s1 -= 0x81; \
} else { \
s1 -= 0xc1; \
} \
s1 <<= 1; \
s1 += 0x21; \
s2 = c2; \
if (s2 < 0x9f) { \
if (s2 < 0x7f) { \
s2++; \
} \
s2 -= 0x20; \
} else { \
s1++; \
s2 -= 0x7e; \
} \
} while (0)
/*
* JIS-2004 => wchar
*/
int
mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter)
{
int k;
int c1, c2, s, s1, s2, w = 0, w1;
retry:
switch (filter->status & 0xf) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) {
CK((*filter->output_function)(c, filter->data));
} else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) {
if (c == 0x5c) {
CK((*filter->output_function)(0x00a5, filter->data));
} else if (c == 0x7e) {
CK((*filter->output_function)(0x203e, filter->data));
} else {
CK((*filter->output_function)(c, filter->data));
}
} else { /* ISO-2022-JP-2004 */
if (c == 0x1b) {
filter->status += 6;
} else if ((filter->status == 0x80 || filter->status == 0x90 || filter->status == 0xa0)
&& c > 0x20 && c < 0x7f) { /* kanji first char */
filter->cache = c;
if (filter->status == 0x90) {
filter->status += 1; /* JIS X 0213 plane 1 */
} else if (filter->status == 0xa0) {
filter->status += 4; /* JIS X 0213 plane 2 */
} else {
filter->status += 5; /* JIS X 0208 */
}
} else {
CK((*filter->output_function)(c, filter->data));
}
}
} else {
if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) {
if (c > 0xa0 && c < 0xff) { /* X 0213 plane 1 first char */
filter->status = 1;
filter->cache = c;
} else if (c == 0x8e) { /* kana first char */
filter->status = 2;
} else if (c == 0x8f) { /* X 0213 plane 2 first char */
filter->status = 3;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
} else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) {
if (c > 0xa0 && c < 0xe0) { /* kana */
CK((*filter->output_function)(0xfec0 + c, filter->data));
} else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
}
break;
case 1: /* kanji second char */
filter->status &= ~0xf;
c1 = filter->cache;
if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) {
if (c > 0xa0 && c < 0xff) {
s1 = c1 - 0x80;
s2 = c - 0x80;
}
} else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) {
if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
SJIS_DECODE(c1, c, s1, s2);
}
} else {
s1 = c1;
s2 = c;
}
w1 = (s1 << 8) | s2;
if (w1 >= 0x2121) {
if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) ||
(w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 ||
(w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) {
k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len);
if (k >= 0) {
w = jisx0213_u2_tbl[2*k];
CK((*filter->output_function)(w, filter->data));
w = jisx0213_u2_tbl[2*k+1];
}
}
if (w <= 0) {
w1 = (s1 - 0x21)*94 + s2 - 0x21;
if (w1 >= 0 && w1 < jisx0213_ucs_table_size) {
w = jisx0213_ucs_table[w1];
}
}
if (w <= 0) {
w1 = (s1 << 8) | s2;
k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len);
if (k >= 0) {
w = jisx0213_jis_u5_tbl[k] + 0x20000;
}
}
if (w <= 0) {
if (s1 < 0x7f && s2 < 0x7f) {
w = (s1 << 8) | s2;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_JIS0213;
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
}
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 2: /* got 0x8e : EUC-JP-2004 */
filter->status = 0;
if (c > 0xa0 && c < 0xe0) {
w = 0xfec0 + c;
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = 0x8e00 | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
case 3: /* got 0x8f, X 0213 plane 2 first char : EUC-JP-2004 */
if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
filter->status = 0;
} else {
filter->status++;
filter->cache = c;
}
break;
case 4: /* got 0x8f, X 0213 plane 2 second char */
filter->status &= ~0xf;
c1 = filter->cache;
c2 = c;
if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) {
c1 -= 0x80;
c2 -= 0x80;
}
s1 = c1 - 0x21;
s2 = c2 - 0x21;
if (s1 >= 0 && s1 < 94 && s2 >= 0 && s2 < 94) {
for (k = 0; k < jisx0213_p2_ofst_len; k++) {
if (s1 == jisx0213_p2_ofst[k]-1) {
break;
}
}
k = k - (jisx0213_p2_ofst[k]-1);
s = (s1 + 94 + k)*94 + s2;
if (s >= 0 && s < jisx0213_ucs_table_size) {
w = jisx0213_ucs_table[s];
} else {
w = 0;
}
if (w <= 0) {
w1 = ((c1 + k + 94) << 8) | c2;
k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len);
if (k >= 0) {
w = jisx0213_jis_u5_tbl[k] + 0x20000;
}
}
if (w <= 0) {
w = ((c1 & 0x7f) << 8) | (c2 & 0x7f);
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_JIS0213;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
w = (c1 << 8) | c | 0x8f0000;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
} else {
w = ((c1 & 0x7f) << 8) | (c2 & 0x7f);
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_JIS0213;
}
CK((*filter->output_function)(w, filter->data));
}
break;
case 5: /* X 0212 */
filter->status &= ~0xf;
c1 = filter->cache;
if (c > 0x20 && c < 0x7f) {
s = (c1 - 0x21)*94 + c - 0x21;
if (s >= 0 && s < jisx0208_ucs_table_size) {
w = jisx0208_ucs_table[s];
}
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_JIS0208;
}
CK((*filter->output_function)(w, filter->data));
break;
/* ESC */
/* case 0x06: */
/* case 0x16: */
/* case 0x26: */
/* case 0x86: */
/* case 0x96: */
/* case 0xa6: */
case 6:
if (c == 0x24) { /* '$' */
filter->status++;
} else if (c == 0x28) { /* '(' */
filter->status += 3;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(0x1b, filter->data));
goto retry;
}
break;
/* ESC $ */
/* case 0x07: */
/* case 0x17: */
/* case 0x27: */
/* case 0x87: */
/* case 0x97: */
/* case 0xa7: */
case 7:
if (c == 0x42) { /* 'B' -> JIS X 0208-1983 */
filter->status = 0x80;
} else if (c == 0x28) { /* '(' */
filter->status++;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(0x1b, filter->data));
CK((*filter->output_function)(0x24, filter->data));
goto retry;
}
break;
break;
/* ESC $ ( */
/* case 0x08: */
/* case 0x18: */
/* case 0x28: */
/* case 0x88: */
/* case 0x98: */
/* case 0xa8: */
case 8:
if (c == 0x51) { /* JIS X 0213 plane 1 */
filter->status = 0x90;
} else if (c == 0x50) { /* JIS X 0213 plane 2 */
filter->status = 0xa0;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(0x1b, filter->data));
CK((*filter->output_function)(0x24, filter->data));
CK((*filter->output_function)(0x28, filter->data));
goto retry;
}
break;
/* ESC ( */
/* case 0x09: */
/* case 0x19: */
/* case 0x29: */
/* case 0x89: */
/* case 0x99: */
case 9:
if (c == 0x42) { /* 'B' : ASCII */
filter->status = 0;
} else {
filter->status &= ~0xf;
CK((*filter->output_function)(0x1b, filter->data));
CK((*filter->output_function)(0x28, filter->data));
goto retry;
}
break;
default:
filter->status = 0;
break;
}
return c;
}
int
mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) {
int k;
int c1, c2, s1 = 0, s2;
retry:
if ((filter->status & 0xf)== 0 && ( c == 0x00E6 ||
(c >= 0x0254 && c <= 0x02E9) || (c >= 0x304B && c <= 0x31F7))) {
for (k=0;k<jisx0213_u2_tbl_len;k++) {
if (c == jisx0213_u2_tbl[2*k]) {
filter->status++;
filter->cache = k;
return c;
}
}
}
if ((filter->status & 0xf) == 1 &&
filter->cache >= 0 && filter->cache <= jisx0213_u2_tbl_len) {
k = filter->cache;
filter->status &= ~0xf;
filter->cache = 0;
c1 = jisx0213_u2_tbl[2*k];
if ((c1 == 0x0254 || c1 == 0x028C || c1 == 0x0259 || c1 == 0x025A)
&& c == 0x0301) {
k++;
}
if (c == jisx0213_u2_tbl[2*k+1]) {
s1 = jisx0213_u2_key[k];
} else { /* fallback */
s1 = jisx0213_u2_fb_tbl[k];
if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) {
c1 = (s1 >> 8) & 0xff;
c2 = s1 & 0xff;
SJIS_ENCODE(c1, c2, s1, s2);
} else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
s2 = (s1 & 0xff) + 0x80;
s1 = ((s1 >> 8) & 0xff) + 0x80;
} else {
if (filter->status != 0x200) {
CK((*filter->output_function)(0x1b, filter->data));
CK((*filter->output_function)(0x24, filter->data));
CK((*filter->output_function)(0x28, filter->data));
CK((*filter->output_function)(0x51, filter->data));
}
filter->status = 0x200;
s2 = s1 & 0x7f;
s1 = (s1 >> 8) & 0x7f;
}
CK((*filter->output_function)(s1, filter->data));
CK((*filter->output_function)(s2, filter->data));
goto retry;
}
}
if (s1 <= 0) {
for (k=0; k<sizeof(uni2sjis_tbl_range)/(sizeof(int)*2);k++) {
if (c >= uni2sjis_tbl_range[k][0] && c <= uni2sjis_tbl_range[k][1]) {
s1 = uni2sjis_tbl[k][c-uni2sjis_tbl_range[k][0]];
break;
}
}
}
if (c >= ucs_c1_jisx0213_min && c <= ucs_c1_jisx0213_max) {
k = mbfl_bisec_srch(c, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len);
if (k >= 0) {
s1 = ucs_c1_jisx0213_ofst[k] + c - ucs_c1_jisx0213_tbl[2*k];
}
}
if (c >= jisx0213_u5_tbl_min && c <= jisx0213_u5_tbl_max) {
k = mbfl_bisec_srch2(c - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len);
if (k >= 0) {
s1 = jisx0213_u5_jis_tbl[k];
}
}
if (s1 <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_JIS0213) {
s1 = c & MBFL_WCSPLANE_MASK;
} else {
k = mbfl_bisec_srch2(c, jisx0213_uni2sjis_cmap_key, jisx0213_uni2sjis_cmap_len);
if (k >= 0) {
s1 = jisx0213_uni2sjis_cmap_val[k];
}
}
if (c == 0) {
s1 = 0;
} else if (s1 <= 0) {
s1 = -1;
}
} else if (s1 >= 0x9980) {
s1 = -1;
}
if (s1 >= 0) {
if (s1 < 0x80) { /* ASCII */
if (filter->to->no_encoding == mbfl_no_encoding_2022jp_2004 &&
(filter->status & 0xff00) != 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
}
filter->status = 0;
CK((*filter->output_function)(s1, filter->data));
} else if (s1 < 0x100) { /* latin or kana */
if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
CK((*filter->output_function)(0x8e, filter->data));
}
CK((*filter->output_function)(s1, filter->data));
} else if (s1 < 0x7f00) { /* X 0213 plane 1 */
if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) {
c1 = (s1 >> 8) & 0xff;
c2 = s1 & 0xff;
SJIS_ENCODE(c1, c2, s1, s2);
} else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
s2 = (s1 & 0xff) + 0x80;
s1 = ((s1 >> 8) & 0xff) + 0x80;
} else {
if ((filter->status & 0xff00) != 0x200) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x24, filter->data)); /* '$' */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x51, filter->data)); /* 'Q' */
}
filter->status = 0x200;
s2 = s1 & 0xff;
s1 = (s1 >> 8) & 0xff;
}
CK((*filter->output_function)(s1, filter->data));
CK((*filter->output_function)(s2, filter->data));
} else { /* X 0213 plane 2 */
if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) {
c1 = (s1 >> 8) & 0xff;
c2 = s1 & 0xff;
SJIS_ENCODE(c1, c2, s1, s2);
} else {
s2 = s1 & 0xff;
k = ((s1 >> 8) & 0xff) - 0x7f;
if (k >= 0 && k < jisx0213_p2_ofst_len) {
s1 = jisx0213_p2_ofst[k] - 1 + 0x21;
}
if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
s2 |= 0x80;
s1 |= 0x80;
CK((*filter->output_function)(0x8f, filter->data));
} else {
if ((filter->status & 0xff00) != 0x200) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x24, filter->data)); /* '$' */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */
CK((*filter->output_function)(0x50, filter->data)); /* 'P' */
}
filter->status = 0x200;
}
}
CK((*filter->output_function)(s1, filter->data));
CK((*filter->output_function)(s2, filter->data));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
}
int
mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter)
{
int k, c1, c2, s1, s2;
k = filter->cache;
if (filter->status == 1 && k >= 0 && k <= jisx0213_u2_tbl_len) {
s1 = jisx0213_u2_fb_tbl[k];
if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) {
c1 = (s1 >> 8) & 0xff;
c2 = s1 & 0xff;
SJIS_ENCODE(c1, c2, s1, s2);
} else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
s2 = (s1 & 0xff) | 0x80;
s1 = ((s1 >> 8) & 0xff) | 0x80;
}
CK((*filter->output_function)(s1, filter->data));
CK((*filter->output_function)(s2, filter->data));
}
filter->cache = 0;
filter->status = 0;
if (filter->flush_function != NULL) {
return (*filter->flush_function)(filter->data);
}
return 0;
}

49
ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h

@ -0,0 +1,49 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter_sjis.c
* by rui hirokawa <hirokawa@php.net> on 15 aug 2011.
*
*/
#ifndef MBFL_MBFILTER_SJIS_2004_H
#define MBFL_MBFILTER_SJIS_2004_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_sjis2004;
extern const struct mbfl_identify_vtbl vtbl_identify_sjis2004;
extern const struct mbfl_convert_vtbl vtbl_sjis2004_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis2004;
int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_SJIS_2004_H */
/*
* charset=UTF-8
*/

73
ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c

@ -37,26 +37,10 @@
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
static int mbfl_filt_ident_sjis_mac(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
extern const unsigned char mblen_table_sjis[];
static int mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter);
static const char *mbfl_encoding_sjis_mac_aliases[] = {"MacJapanese", "x-Mac-Japanese", NULL};
@ -73,7 +57,7 @@ const struct mbfl_identify_vtbl vtbl_identify_sjis_mac = {
mbfl_no_encoding_sjis_mac,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_sjis_mac
mbfl_filt_ident_sjis
};
const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar = {
@ -141,7 +125,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac = {
#include "sjis_mac2uni.h"
const int code_tbl[][3] = {
static const int code_tbl[][3] = {
{0x02f0, 0x0303, 0x2460},
{0x030e, 0x0321, 0x2474},
{0x032c, 0x0334, 0x2776},
@ -151,7 +135,7 @@ const int code_tbl[][3] = {
{0x038a, 0x03a3, 0x249c},
};
const int code_ofst_tbl[] [2]= {
static const int code_ofst_tbl[] [2]= {
{0x03ac, 0x03c9},
{0x0406, 0x0420},
{0x0432, 0x0441},
@ -162,11 +146,11 @@ const int code_ofst_tbl[] [2]= {
{0x1ff2, 0x20a5},
};
const int *code_map[] = {
static const int *code_map[] = {
sjis_mac2wchar1, sjis_mac2wchar2, sjis_mac2wchar3, sjis_mac2wchar4,
sjis_mac2wchar5, sjis_mac2wchar6, sjis_mac2wchar7, sjis_mac2wchar8};
const int code_tbl_m[][6] = {
static const int code_tbl_m[][6] = {
{0x0340, 0xf860, 0x0030, 0x002e, 0x0000, 0x0000},
{0x03c9, 0xf860, 0x0054, 0x0042, 0x0000, 0x0000},
{0x035c, 0xf860, 0x0058, 0x0056, 0x0000, 0x0000},
@ -181,7 +165,7 @@ const int code_tbl_m[][6] = {
{0x0523, 0xf862, 0x8ca1, 0x56e3, 0x6cd5, 0x4eba},
};
const int s_form_tbl[] = {
static const int s_form_tbl[] = {
0x2010,0x2016,0x2026,
0x3001,0x3002,0x301c,0x3041,0x3043,0x3045,0x3047,0x3049,
0x3063,0x3083,0x3085,0x3087,0x308e,0x30a1,0x30a3,0x30a5,
@ -192,7 +176,7 @@ const int s_form_tbl[] = {
0x21e6,0x21e7,0x21e8,0x21e9, // black arrow f87a (4)
};
const int s_form_sjis_tbl[] = {
static const int s_form_sjis_tbl[] = {
0xeb5d,0xeb61,0xeb63,
0xeb41,0xeb42,0xeb60,0xec9f,0xeca1,0xeca3,0xeca5,0xeca7,
0xecc1,0xece1,0xece3,0xece5,0xecec,0xed40,0xed42,0xed44,
@ -203,7 +187,7 @@ const int s_form_sjis_tbl[] = {
0x86d4,0x86d5,0x86d3,0x86d6, // black arrow
};
const int s_form_sjis_fallback_tbl[] = {
static const int s_form_sjis_fallback_tbl[] = {
0x815d,0x8161,0x8163,
0x8141,0x8142,0x8160,0x829f,0x82a1,0x82a3,0x82a5,0x82a7,
0x82c1,0x82e1,0x82e3,0x82e5,0x82ec,0x8340,0x8342,0x8344,
@ -214,7 +198,7 @@ const int s_form_sjis_fallback_tbl[] = {
0x86d0,0x86d1,0x86cf,0x86d2, // arrow
};
const int wchar2sjis_mac_r_tbl[][3] = {
static const int wchar2sjis_mac_r_tbl[][3] = {
{0x2160, 0x216b, 0x034e},
{0x2170, 0x217b, 0x0362},
{0x2460, 0x2473, 0x02f0},
@ -226,7 +210,7 @@ const int wchar2sjis_mac_r_tbl[][3] = {
{0x32a4, 0x32a9, 0x04ba},
};
const int wchar2sjis_mac_r_map[][2] = {
static const unsigned short wchar2sjis_mac_r_map[][2] = {
{0x2660, 0x2667},
{0x322a, 0x3243},
{0x3296, 0x329e},
@ -234,10 +218,10 @@ const int wchar2sjis_mac_r_map[][2] = {
{0xfe30, 0xfe44},
};
const int *wchar2sjis_mac_code_map[] = {
static const int *wchar2sjis_mac_code_map[] = {
wchar2sjis_mac4, wchar2sjis_mac7, wchar2sjis_mac8, wchar2sjis_mac9, wchar2sjis_mac10};
const int wchar2sjis_mac_wchar_tbl[][2] = {
static const int wchar2sjis_mac_wchar_tbl[][2] = {
{0x2109, 0x03c2},
{0x2110, 0x21ef5},
{0x2113, 0x03bc},
@ -580,7 +564,7 @@ mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter)
}
if (s1 <= 0) {
for (i=0; i<sizeof(wchar2sjis_mac_r_map)/(2*sizeof(int));i++) {
for (i=0; i<sizeof(wchar2sjis_mac_r_map)/(2*sizeof(unsigned short));i++) {
if (c >= wchar2sjis_mac_r_map[i][0] && c <= wchar2sjis_mac_r_map[i][1]) {
s1 = wchar2sjis_mac_code_map[i][c-wchar2sjis_mac_r_map[i][0]];
break;
@ -812,7 +796,7 @@ mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter)
return c;
}
int
static int
mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter)
{
int i, c1, s1 = 0;
@ -839,24 +823,3 @@ mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter)
return 0;
}
static int mbfl_filt_ident_sjis_mac(int c, mbfl_identify_filter *filter)
{
if (filter->status) { /* kanji second char */
if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */
filter->flag = 1;
}
filter->status = 0;
} else if (c >= 0 && c < 0x80) { /* latin ok */
;
} else if (c > 0xa0 && c < 0xe0) { /* kana ok */
;
} else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
return c;
}

1
ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.h

@ -40,6 +40,5 @@ extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac;
int mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_SJIS_MAC_H */

20
ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c

@ -41,25 +41,7 @@
#include "emoji2uni.h"
extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
extern const unsigned char mblen_table_sjis[];
const mbfl_encoding mbfl_encoding_sjis_docomo = {
mbfl_no_encoding_sjis_docomo,

2
ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c

@ -139,7 +139,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_sjis_open = {
} \
} while (0)
/*
* SJIS-win => wchar
*/
@ -308,6 +307,7 @@ mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter)
s1 = -1;
}
}
if (s1 >= 0) {
if (s1 < 0x100) { /* latin or kana */
CK((*filter->output_function)(s1, filter->data));

1
ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h

@ -33,6 +33,7 @@
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_sjis_open;
extern const struct mbfl_identify_vtbl vtbl_identify_sjis_open;
extern const struct mbfl_convert_vtbl vtbl_sjis_open_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_open;

174
ext/mbstring/libmbfl/filters/unicode_table_cp936.h

@ -6394,51 +6394,62 @@ const int ucs_i_cp936_table_min = 0x4d00;
const int ucs_i_cp936_table_max = 0x4d00 + (sizeof (ucs_i_cp936_table) / sizeof (unsigned short));
/* 0xf900 CJK Compatibility Ideographs (with fallback) */
static const unsigned short ucs_ci_cp936_table[] = { // 0xf900 - 0xfa2f
/* F900h */
0xD84D,0xB8FC,0xDC87,0xD95A,0xBBAC,0xB4AE,0xBEE4,0xFD94,
0xFD94,0xC6F5,0xBDF0,0xC0AE,0xC4CE,0x91D0,0xB05D,0xC15F,
0xCC7D,0xC2DD,0xC2E3,0xDF89,0x98B7,0xC2E5,0xC0D3,0xE7F3,
0xC2E4,0xC0D2,0xF198,0x8179,0xC2D1,0x99DA,0xA080,0xCC6D,
0xFB5B,0x8DB9,0x9E45,0xCB7B,0xD268,0xC0AD,0xC544,0xCF9E,
0xC0C8,0xC0CA,0xC0CB,0xC0C7,0xFD9C,0x81ED,0xC0E4,0x84DA,
0x93EF,0x99A9,0xA074,0xB152,0xC0CF,0xCC4A,0xCC94,0xC2B7,
0xC2B6,0xF494,0xFA98,0xC2B5,0xB593,0xBE47,0xC78A,0xE49B,
0xC2B9,0xD593,0x89C5,0xC5AA,0xBB5C,0xC340,0xC0CE,0xC0DA,
0xD954,0xC0D7,0x89BE,0x8CD2,0x98C7,0x9C49,0xC2A9,0xC0DB,
0xBF7C,0xC2AA,0xC0D5,0xC0DF,0x8443,0xC1E8,0xB6A0,0xBE63,
0xC1E2,0xC1EA,0xD778,0x9282,0x98B7,0xD65A,0xB5A4,0x8C8E,
0xC5AD,0xC2CA,0xAE90,0xB1B1,0xB491,0xB1E3,0x8FCD,0xB2BB,
0xC3DA,0x94B5,0xCBF7,0x85A2,0xC8FB,0xCAA1,0xC87E,0xD566,
0x9AA2,0xB3BD,0xC9F2,0xCAB0,0xC8F4,0xC2D3,0xC2D4,0xC1C1,
0x83C9,0xFD9D,0xC1BA,0xBC5A,0xC1BC,0xD58F,0xC1BF,0x84EE,
0x85CE,0xC5AE,0x8F5D,0xC2C3,0x9E56,0xB55A,0xE982,0xF350,
0xFB90,0xC0E8,0xC1A6,0x95D1,0x9A76,0xDE5D,0xC4EA,0x917A,
0x91D9,0x93D3,0x9D69,0x9F92,0xAD49,0xFD9E,0xBE9A,0xC293,
0xDD82,0xC98F,0xDF42,0xE580,0xC1D0,0xC1D3,0xD1CA,0xC1D2,
0xC1D1,0xD566,0xC1AE,0xC4EE,0xC4ED,0x9A9A,0xBA9F,0xAB43,
0xC1EE,0xE0F2,0x8C8E,0x8E58,0xC1AF,0xC1E1,0xAC93,0xC1E7,
0xF1F6,0xE28F,0xC1E3,0xEC60,0xEE49,0xC0FD,0xB659,0xF5B7,
0xEB60,0x90BA,0xC1CB,0xC1C5,0xE5BC,0xC4F2,0xC1CF,0x98B7,
0xC1C7,0xAF9F,0xDEA4,0xDF7C,0xFD88,0x959E,0xC8EE,0x84A2,
0x9683,0xC1F8,0xC1F7,0xC1EF,0xC1F0,0xC1F4,0xC1F2,0xBC7E,
0xEE90,0xC1F9,0xC2BE,0xEA91,0x8290,0x8D91,0x9C53,0xDD86,
0xC2C9,0x90FC,0xC0F5,0xC2CA,0xC2A1,0xC0FB,0xC0F4,0xC2C4,
0xD2D7,0xC0EE,0xC0E6,0xC4E0,0xC0ED,0xC1A1,0xEEBE,0xFD9F,
0xD165,0xC0EF,0xEB78,0xC4E4,0xC4E7,0xC1DF,0x9FFB,0xAD55,
0xCC41,0xFDA0,0xF75B,0xF7EB,0xC1D6,0xC1DC,0xC552,0xC1A2,
0xF3D2,0xC1A3,0xA0EE,0xD6CB,0xD752,0xCAB2,0xB2E8,0xB4CC,
/* FA00h */
0xC7D0,0xB6C8,0xCDD8,0xCCC7,0xD5AC,0xB6B4,0xB1A9,0xDD97,
0xD0D0,0xBDB5,0xD28A,0xC0AA,0xFE40,0xFE41,0xFE42,0xFE43,
0x8956,0xFE44,0xC7E7,0xFE45,0xFE46,0x8444,0xD869,0xD2E6,
0xFE47,0xC9F1,0xCFE9,0xB8A3,0xBEB8,0xBEAB,0xD3F0,0xFE48,
0xFE49,0xFE4A,0xD654,0xFE4B,0xFE4C,0xD2DD,0xB6BC,0xFE4D,
0xFE4E,0xFE4F,0xEF88,0xEF95,0xF05E,0xFA51,0x0000,0x0000,
};
/* 0xf900 CJK Compatibility Ideographs */
const unsigned short ucs_ci_cp936_table[] = {
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0xfd9c,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0xfd9d,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0xfd9e,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xfd9f,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0xfda0,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
/* 0xfa00 */
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0xfe40,0xfe41,0xfe42,0xfe43,
0x0000,0xfe44,0x0000,0xfe45,0xfe46,0x0000,0x0000,0x0000,
0xfe47,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xfe48,
0xfe49,0xfe4a,0x0000,0xfe4b,0xfe4c,0x0000,0x0000,0xfe4d,
0xfe4e,0xfe4f,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000};
const int ucs_ci_cp936_table_min = 0xf900;
const int ucs_ci_cp936_table_max = 0xf900 + (sizeof (ucs_ci_cp936_table) / sizeof (unsigned short));
const int ucs_ci_cp936_table_max = 0xfa2f;
/* reduced map for GBK: U+FA0C - U+FA29 */
const unsigned short ucs_ci_s_cp936_table[] = {
0xfe40,0xfe41,0xfe42,0xfe43,
0x0000,0xfe44,0x0000,0xfe45,0xfe46,0x0000,0x0000,0x0000,
0xfe47,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xfe48,
0xfe49,0xfe4a,0x0000,0xfe4b,0xfe4c,0x0000,0x0000,0xfe4d,
0xfe4e,0xfe4f};
/* FE30h CJK Compatibility Forms */
const unsigned short ucs_cf_cp936_table[] = {
@ -6462,42 +6473,48 @@ const int ucs_sfv_cp936_table_min = 0xfe50;
const int ucs_sfv_cp936_table_max = 0xfe50 + (sizeof (ucs_sfv_cp936_table) / sizeof (unsigned short));
/* FF00h Halfwidth and Fullwidth Forms */
const unsigned short ucs_hff_cp936_table[] = {
0x0000,0xa3a1,0xa3a2,0xa3a3,0xa1e7,0xa3a5,0xa3a6,0xa3a7,
0xa3a8,0xa3a9,0xa3aa,0xa3ab,0xa3ac,0xa3ad,0xa3ae,0xa3af,
0xa3b0,0xa3b1,0xa3b2,0xa3b3,0xa3b4,0xa3b5,0xa3b6,0xa3b7,
0xa3b8,0xa3b9,0xa3ba,0xa3bb,0xa3bc,0xa3bd,0xa3be,0xa3bf,
0xa3c0,0xa3c1,0xa3c2,0xa3c3,0xa3c4,0xa3c5,0xa3c6,0xa3c7,
0xa3c8,0xa3c9,0xa3ca,0xa3cb,0xa3cc,0xa3cd,0xa3ce,0xa3cf,
0xa3d0,0xa3d1,0xa3d2,0xa3d3,0xa3d4,0xa3d5,0xa3d6,0xa3d7,
0xa3d8,0xa3d9,0xa3da,0xa3db,0xa3dc,0xa3dd,0xa3de,0xa3df,
0xa3e0,0xa3e1,0xa3e2,0xa3e3,0xa3e4,0xa3e5,0xa3e6,0xa3e7,
0xa3e8,0xa3e9,0xa3ea,0xa3eb,0xa3ec,0xa3ed,0xa3ee,0xa3ef,
0xa3f0,0xa3f1,0xa3f2,0xa3f3,0xa3f4,0xa3f5,0xa3f6,0xa3f7,
0xa3f8,0xa3f9,0xa3fa,0xa3fb,0xa3fc,0xa3fd,0xa1ab,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0xa1e9,0xa1ea,0xa956,0xa3fe,0xa957,0xa3a4,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000
/* reduced map: U+FFE0 - U+FFE5 */
const unsigned short ucs_hff_s_cp936_table[] = {
0xa1e9,0xa1ea,0xa956,0xa3fe,0xa957,0xa3a4,
};
const int ucs_hff_cp936_table_min = 0xff00;
const int ucs_hff_cp936_table_max = 0xff00 + (sizeof (ucs_hff_cp936_table) / sizeof (unsigned short));
const int ucs_hff_cp936_table_max = 0xffff;
/* CP936 local */
static const unsigned short mbfl_cp936_pua_tbl[][3] = {
{0xe766, 0xe76b, 0xa2ab},
{0xe76c, 0xe76d, 0xa2e3},
{0xe76e, 0xe76f, 0xa2ef},
{0xe770, 0xe771, 0xa2fd},
{0xe772, 0xe77c, 0xa4f4},
{0xe77d, 0xe784, 0xa5f7},
{0xe785, 0xe78c, 0xa6b9},
{0xe78d, 0xe793, 0xa6d9},
{0xe794, 0xe795, 0xa6ec},
{0xe796, 0xe796, 0xa6f3},
{0xe797, 0xe79f, 0xa6f6},
{0xe7a0, 0xe7ae, 0xa7c2},
{0xe7af, 0xe7bb, 0xa7f2},
{0xe7bc, 0xe7c6, 0xa896},
{0xe7c7, 0xe7c7, 0xa8bc},
{0xe7c8, 0xe7c8, 0xa8bf},
{0xe7c9, 0xe7cc, 0xa8c1},
{0xe7cd, 0xe7e1, 0xa8ea},
{0xe7e2, 0xe7e2, 0xa958},
{0xe7e3, 0xe7e3, 0xa95b},
{0xe7e4, 0xe7e6, 0xa95d},
{0xe7e7, 0xe7f3, 0xa989},
{0xe7f4, 0xe800, 0xa997},
{0xe801, 0xe80f, 0xa9f0},
{0xe810, 0xe814, 0xd7fa},
{0xe815, 0xe843, 0xfe50},
{0xe844, 0xe864, 0xfe80},
};
static const int mbfl_cp936_pua_tbl_max = sizeof(mbfl_cp936_pua_tbl)/(sizeof(unsigned short)*3);
#else
@ -6506,10 +6523,11 @@ extern const unsigned short ucs_a1_cp936_table[];
extern const unsigned short ucs_a2_cp936_table[];
extern const unsigned short ucs_a3_cp936_table[];
extern const unsigned short ucs_i_cp936_table[];
extern const unsigned short ucs_ci_cp936_table[];
extern const unsigned short ucs_cf_cp936_table[];
extern const unsigned short ucs_sfv_cp936_table[];
extern const unsigned short ucs_hff_cp936_table[];
extern const unsigned short ucs_ci_s_cp936_table[];
extern const unsigned short ucs_hff_s_cp936_table[];
extern const int cp936_ucs_table_size;
extern const int ucs_a1_cp936_table_min;

2
ext/mbstring/libmbfl/filters/unicode_table_jis.h

@ -5820,7 +5820,7 @@ const unsigned short ucs_r_jis_table[] = {
0x2177,0x2341,0x2342,0x2343,0x2344,0x2345,0x2346,0x2347,
0x2348,0x2349,0x234A,0x234B,0x234C,0x234D,0x234E,0x234F,
0x2350,0x2351,0x2352,0x2353,0x2354,0x2355,0x2356,0x2357,
0x2358,0x2359,0x235A,0x214E,0x2140,0x214F,0x2130,0x2132,
0x2358,0x2359,0x235A,0x214E,0x0000,0x214F,0x2130,0x2132,
0x212E,0x2361,0x2362,0x2363,0x2364,0x2365,0x2366,0x2367,
0x2368,0x2369,0x236A,0x236B,0x236C,0x236D,0x236E,0x236F,
0x2370,0x2371,0x2372,0x2373,0x2374,0x2375,0x2376,0x2377,

5026
ext/mbstring/libmbfl/filters/unicode_table_jis2004.h
File diff suppressed because it is too large
View File

2
ext/mbstring/libmbfl/mbfl/mbfilter.h

@ -103,7 +103,7 @@
*/
#define MBFL_VERSION_MAJOR 1
#define MBFL_VERSION_MINOR 3
#define MBFL_VERSION_TEENY 1
#define MBFL_VERSION_TEENY 2
/*
* convert filter

1
ext/mbstring/libmbfl/mbfl/mbfl_consts.h

@ -51,6 +51,7 @@
#define MBFL_WCSPLANE_UTF32MAX 0x00110000
#define MBFL_WCSPLANE_SUPMIN 0x00010000
#define MBFL_WCSPLANE_SUPMAX 0x00200000
#define MBFL_WCSPLANE_JIS0213 0x70e00000 /* JIS HEX : 2121h - 7E7Eh */
#define MBFL_WCSPLANE_JIS0208 0x70e10000 /* JIS HEX : 2121h - 7E7Eh */
#define MBFL_WCSPLANE_JIS0212 0x70e20000 /* JIS HEX : 2121h - 7E7Eh */
#define MBFL_WCSPLANE_WINCP932 0x70e30000 /* JIS HEX : 2121h - 9898h */

17
ext/mbstring/libmbfl/mbfl/mbfl_convert.c

@ -52,12 +52,15 @@
#include "filters/mbfilter_iso2022_kr.h"
#include "filters/mbfilter_sjis.h"
#include "filters/mbfilter_sjis_open.h"
#include "filters/mbfilter_sjis_2004.h"
#include "filters/mbfilter_sjis_mobile.h"
#include "filters/mbfilter_sjis_mac.h"
#include "filters/mbfilter_cp51932.h"
#include "filters/mbfilter_jis.h"
#include "filters/mbfilter_iso2022_jp_ms.h"
#include "filters/mbfilter_iso2022jp_2004.h"
#include "filters/mbfilter_euc_jp.h"
#include "filters/mbfilter_euc_jp_2004.h"
#include "filters/mbfilter_euc_jp_win.h"
#include "filters/mbfilter_gb18030.h"
#include "filters/mbfilter_ascii.h"
@ -115,6 +118,8 @@ const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
&vtbl_wchar_sjis,
&vtbl_sjis_open_wchar,
&vtbl_wchar_sjis_open,
&vtbl_sjis2004_wchar,
&vtbl_wchar_sjis2004,
&vtbl_cp51932_wchar,
&vtbl_wchar_cp51932,
&vtbl_jis_wchar,
@ -125,8 +130,12 @@ const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
&vtbl_wchar_2022jp,
&vtbl_2022jpms_wchar,
&vtbl_wchar_2022jpms,
&vtbl_2022jp_2004_wchar,
&vtbl_wchar_2022jp_2004,
&vtbl_eucjpwin_wchar,
&vtbl_wchar_eucjpwin,
&vtbl_eucjp2004_wchar,
&vtbl_wchar_eucjp2004,
&vtbl_cp932_wchar,
&vtbl_wchar_cp932,
&vtbl_sjis_docomo_wchar,
@ -165,6 +174,8 @@ const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
&vtbl_wchar_euctw,
&vtbl_big5_wchar,
&vtbl_wchar_big5,
&vtbl_cp950_wchar,
&vtbl_wchar_cp950,
&vtbl_euckr_wchar,
&vtbl_wchar_euckr,
&vtbl_uhc_wchar,
@ -484,9 +495,15 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
case MBFL_WCSPLANE_JIS0212:
ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"JIS2+");
break;
case MBFL_WCSPLANE_JIS0213:
ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"JIS3+");
break;
case MBFL_WCSPLANE_WINCP932:
ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"W932+");
break;
case MBFL_WCSPLANE_GB18030:
ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"GB+");
break;
case MBFL_WCSPLANE_8859_1:
ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"I8859_1+");
break;

7
ext/mbstring/libmbfl/mbfl/mbfl_encoding.c

@ -60,11 +60,14 @@
#include "filters/mbfilter_sjis_open.h"
#include "filters/mbfilter_sjis_mobile.h"
#include "filters/mbfilter_sjis_mac.h"
#include "filters/mbfilter_sjis_2004.h"
#include "filters/mbfilter_cp51932.h"
#include "filters/mbfilter_jis.h"
#include "filters/mbfilter_iso2022_jp_ms.h"
#include "filters/mbfilter_iso2022jp_2004.h"
#include "filters/mbfilter_euc_jp.h"
#include "filters/mbfilter_euc_jp_win.h"
#include "filters/mbfilter_euc_jp_2004.h"
#include "filters/mbfilter_gb18030.h"
#include "filters/mbfilter_ascii.h"
#include "filters/mbfilter_koi8r.h"
@ -159,6 +162,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_euc_jp,
&mbfl_encoding_sjis,
&mbfl_encoding_eucjp_win,
&mbfl_encoding_eucjp2004,
&mbfl_encoding_sjis_open,
&mbfl_encoding_sjis_docomo,
&mbfl_encoding_sjis_kddi,
@ -168,6 +172,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_sjis_kddi_pua_b,
&mbfl_encoding_sjis_sb_pua,
&mbfl_encoding_sjis_mac,
&mbfl_encoding_sjis2004,
&mbfl_encoding_utf8_docomo,
&mbfl_encoding_utf8_kddi,
&mbfl_encoding_utf8_kddi_b,
@ -199,6 +204,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_hz,
&mbfl_encoding_euc_tw,
&mbfl_encoding_big5,
&mbfl_encoding_cp950,
&mbfl_encoding_euc_kr,
&mbfl_encoding_uhc,
&mbfl_encoding_2022kr,
@ -209,6 +215,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_armscii8,
&mbfl_encoding_cp850,
&mbfl_encoding_jis_ms,
&mbfl_encoding_2022jp_2004,
&mbfl_encoding_cp50220,
&mbfl_encoding_cp50220raw,
&mbfl_encoding_cp50221,

4
ext/mbstring/libmbfl/mbfl/mbfl_encoding.h

@ -70,6 +70,7 @@ enum mbfl_no_encoding {
mbfl_no_encoding_utf7imap,
mbfl_no_encoding_ascii,
mbfl_no_encoding_euc_jp,
mbfl_no_encoding_eucjp2004,
mbfl_no_encoding_sjis,
mbfl_no_encoding_eucjp_win,
mbfl_no_encoding_sjis_open,
@ -81,10 +82,12 @@ enum mbfl_no_encoding {
mbfl_no_encoding_sjis_kddi_pua_b,
mbfl_no_encoding_sjis_sb_pua,
mbfl_no_encoding_sjis_mac,
mbfl_no_encoding_sjis2004,
mbfl_no_encoding_cp932,
mbfl_no_encoding_cp51932,
mbfl_no_encoding_jis,
mbfl_no_encoding_2022jp,
mbfl_no_encoding_2022jp_2004,
mbfl_no_encoding_2022jpms,
mbfl_no_encoding_gb18030,
mbfl_no_encoding_cp1252,
@ -106,6 +109,7 @@ enum mbfl_no_encoding {
mbfl_no_encoding_cp936,
mbfl_no_encoding_euc_tw,
mbfl_no_encoding_big5,
mbfl_no_encoding_cp950,
mbfl_no_encoding_euc_kr,
mbfl_no_encoding_2022kr,
mbfl_no_encoding_uhc,

5
ext/mbstring/libmbfl/mbfl/mbfl_ident.c

@ -54,8 +54,10 @@
#include "filters/mbfilter_sjis_mobile.h"
#include "filters/mbfilter_jis.h"
#include "filters/mbfilter_iso2022_jp_ms.h"
#include "filters/mbfilter_iso2022jp_2004.h"
#include "filters/mbfilter_euc_jp.h"
#include "filters/mbfilter_euc_jp_win.h"
#include "filters/mbfilter_euc_jp_2004.h"
#include "filters/mbfilter_utf8_mobile.h"
#include "filters/mbfilter_ascii.h"
#include "filters/mbfilter_koi8r.h"
@ -114,10 +116,12 @@ static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = {
&vtbl_identify_sjis,
&vtbl_identify_sjis_open,
&vtbl_identify_eucjpwin,
&vtbl_identify_eucjp2004,
&vtbl_identify_cp932,
&vtbl_identify_jis,
&vtbl_identify_2022jp,
&vtbl_identify_2022jpms,
&vtbl_identify_2022jp_2004,
&vtbl_identify_cp51932,
&vtbl_identify_sjis_docomo,
&vtbl_identify_sjis_kddi,
@ -131,6 +135,7 @@ static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = {
&vtbl_identify_hz,
&vtbl_identify_euctw,
&vtbl_identify_big5,
&vtbl_identify_cp950,
&vtbl_identify_euckr,
&vtbl_identify_uhc,
&vtbl_identify_2022kr,

Loading…
Cancel
Save