Browse Source

- added koi8-u (Ukrainian) and CP1254 (Turkish).

PECL
Rui Hirokawa 18 years ago
parent
commit
dea0d31baf
  1. 3
      ext/mbstring/config.m4
  2. 3
      ext/mbstring/config.w32
  3. 15
      ext/mbstring/libmbfl/AUTHORS
  4. 6
      ext/mbstring/libmbfl/Makefile.am
  5. 15
      ext/mbstring/libmbfl/configure.in
  6. 131
      ext/mbstring/libmbfl/filters/Makefile.am
  7. 51
      ext/mbstring/libmbfl/filters/Makefile.bcc32
  8. 156
      ext/mbstring/libmbfl/filters/mbfilter_cp1254.c
  9. 43
      ext/mbstring/libmbfl/filters/mbfilter_cp1254.h
  10. 146
      ext/mbstring/libmbfl/filters/mbfilter_koi8u.c
  11. 47
      ext/mbstring/libmbfl/filters/mbfilter_koi8u.h
  12. BIN
      ext/mbstring/libmbfl/filters/unicode_table_cp1254.h
  13. 166
      ext/mbstring/libmbfl/filters/unicode_table_koi8u.h
  14. 12
      ext/mbstring/libmbfl/libmbfl.dsp
  15. 26
      ext/mbstring/libmbfl/libmbfl.sln
  16. 777
      ext/mbstring/libmbfl/libmbfl.vcproj
  17. 8
      ext/mbstring/libmbfl/mbfl.rc
  18. 29
      ext/mbstring/libmbfl/mbfl/Makefile.am
  19. 13
      ext/mbstring/libmbfl/mbfl/Makefile.bcc32
  20. 7
      ext/mbstring/libmbfl/mbfl/mbfilter.h
  21. 2
      ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h
  22. 4
      ext/mbstring/libmbfl/mbfl/mbfilter_pass.h
  23. 2
      ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h
  24. 4
      ext/mbstring/libmbfl/mbfl/mbfl_consts.h
  25. 6
      ext/mbstring/libmbfl/mbfl/mbfl_convert.c
  26. 24
      ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
  27. 2
      ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
  28. 4
      ext/mbstring/libmbfl/mbfl/mbfl_ident.c
  29. 2
      ext/mbstring/libmbfl/mbfl/mbfl_language.c
  30. 1
      ext/mbstring/libmbfl/mbfl/mbfl_language.h
  31. 23
      ext/mbstring/libmbfl/nls/Makefile.am
  32. 12
      ext/mbstring/libmbfl/nls/Makefile.bcc32
  33. 2
      ext/mbstring/libmbfl/nls/nls_ru.c
  34. 22
      ext/mbstring/libmbfl/nls/nls_ua.c
  35. 9
      ext/mbstring/libmbfl/nls/nls_ua.h
  36. 10
      ext/mbstring/libmbfl/tests/Makefile.am
  37. 104
      ext/mbstring/libmbfl/tests/conv_encoding.c
  38. 1
      ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am
  39. 33
      ext/mbstring/libmbfl/tests/conv_encoding.tests/cp51932_cp50220raw.exp
  40. 35
      ext/mbstring/libmbfl/tests/conv_encoding.tests/ujis_sjis.exp
  41. 35
      ext/mbstring/libmbfl/tests/conv_encoding.tests/utf8_sjis.exp
  42. 147
      ext/mbstring/libmbfl/tests/conv_kana.c
  43. 1
      ext/mbstring/libmbfl/tests/conv_kana.tests/Makefile.am
  44. 1098
      ext/mbstring/libmbfl/tests/conv_kana.tests/conv_kana.exp
  45. 113
      ext/mbstring/libmbfl/tests/strcut.c
  46. 1
      ext/mbstring/libmbfl/tests/strcut.tests/Makefile.am
  47. 129
      ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp
  48. 91
      ext/mbstring/libmbfl/tests/strcut.tests/ujis.exp
  49. 91
      ext/mbstring/libmbfl/tests/strcut.tests/utf8.exp
  50. 79
      ext/mbstring/libmbfl/tests/strwidth.c
  51. 1
      ext/mbstring/libmbfl/tests/strwidth.tests/Makefile.am
  52. 47
      ext/mbstring/libmbfl/tests/strwidth.tests/strwidth.exp
  53. 8
      ext/mbstring/mbstring.c

3
ext/mbstring/config.m4

@ -221,6 +221,8 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/filters/mbfilter_utf7imap.c
libmbfl/filters/mbfilter_utf8.c
libmbfl/filters/mbfilter_uuencode.c
libmbfl/filters/mbfilter_koi8u.c
libmbfl/filters/mbfilter_cp1254.c
libmbfl/mbfl/mbfilter.c
libmbfl/mbfl/mbfilter_8bit.c
libmbfl/mbfl/mbfilter_pass.c
@ -243,6 +245,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/nls/nls_zh.c
libmbfl/nls/nls_hy.c
libmbfl/nls/nls_tr.c
libmbfl/nls/nls_ua.c
])
PHP_MBSTRING_ADD_CFLAG([-DHAVE_CONFIG_H])
else

3
ext/mbstring/config.w32

@ -33,6 +33,7 @@ if (PHP_MBSTRING == "yes") {
mbfilter_koi8r.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \
mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \
mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \
mbfilter_koi8u.c mbfilter_cp1254.c \
mbfilter_uuencode.c mbfilter_armscii8.c", "mbstring");
ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \
@ -42,7 +43,7 @@ if (PHP_MBSTRING == "yes") {
ADD_SOURCES("ext/mbstring/libmbfl/nls", "nls_de.c nls_en.c nls_ja.c \
nls_kr.c nls_neutral.c nls_ru.c nls_uni.c nls_zh.c nls_hy.c \
nls_tr.c", "mbstring");
nls_ua.c nls_tr.c", "mbstring");
AC_DEFINE('HAVE_MBSTRING', 1, 'Have mbstring support');
AC_DEFINE('HAVE_MBSTR_CN', 1, 'CN');

15
ext/mbstring/libmbfl/AUTHORS

@ -1,10 +1,13 @@
Den V. Tsopa <tdv@edisoft.ru>
Hironori Sato <satoh@jpnnet.com>
Marcus Boerger <helly@php.net>
Moriyoshi Koizumi <moriyoshi@php.net>
Hayk Chamyan <hamshen@gmail.com>
Wez Furlong <wez@thebrainroom.com>
Rui Hirokawa <hirokawa@php.net>
Shigeru Kanemoto <sgk@happysize.co.jp>
Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
Tateyama <tateyan@amy.hi-ho.ne.jp>
U. Kenkichi <kenkichi@axes.co.jp>
Wez Furlong <wez@thebrainroom.com>
Moriyoshi Koizumi <moriyoshi@php.net>
Hironori Sato <satoh@jpnnet.com>
Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
Tateyama <tateyan@amy.hi-ho.ne.jp>
Den V. Tsopa <tdv@edisoft.ru>
Maksym Veremeyenko <verem@m1stereo.tv>
Haluk AKIN <halukakin@gmail.com>

6
ext/mbstring/libmbfl/Makefile.am

@ -1,5 +1,9 @@
AUTOMAKE_OPTIONS=dejagnu
DEJATOOL=conv_encoding conv_kana strwidth strcut
RUNTESTDEFAULTFLAGS=--tool $$tool --srcdir "$$srcdir"/tests
LANG=C
EXTRA_DIST=AUTHORS DISCLAIMER LICENSE Makefile.bcc32 \
config.h.bcc32 config.h.vc6 \
libmbfl.dsp libmbfl.dsw libmbfl.sln libmbfl.vcproj mbfl.rc \
mksbcc32.bat rules.mak.bcc32
SUBDIRS = nls filters mbfl
SUBDIRS = nls filters mbfl tests

15
ext/mbstring/libmbfl/configure.in

@ -1,10 +1,10 @@
# Process this file with autoconf to produce a configure script.
AC_INIT(mbfl/mbfilter.c)
AM_INIT_AUTOMAKE(libmbfl, 1.0.0)
AM_INIT_AUTOMAKE(libmbfl, 1.0.2)
AC_CONFIG_SRCDIR(mbfl/mbfilter.c)
AM_CONFIG_HEADER(config.h)
SHLIB_VERSION="1:0:0"
SHLIB_VERSION="1:0:2"
AC_SUBST(SHLIB_VERSION)
# Checks for programs.
@ -34,5 +34,14 @@ if test "$FETCH_VIA_FTP" = "curl"; then
FETCH_VIA_FTP="curl -O"
fi
AC_CONFIG_FILES([Makefile mbfl/Makefile filters/Makefile nls/Makefile])
AC_CONFIG_FILES([
Makefile
mbfl/Makefile
filters/Makefile
nls/Makefile
tests/Makefile
tests/conv_encoding.tests/Makefile
tests/conv_kana.tests/Makefile
tests/strwidth.tests/Makefile
tests/strcut.tests/Makefile])
AC_OUTPUT

131
ext/mbstring/libmbfl/filters/Makefile.am

@ -2,7 +2,136 @@ EXTRA_DIST=Makefile.bcc32 mk_sb_tbl.awk
noinst_LTLIBRARIES=libmbfl_filters.la
INCLUDES=-I../mbfl
libmbfl_filters_la_LDFLAGS=-version-info $(SHLIB_VERSION)
libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_cp51932.c mbfilter_iso2022_jp_ms.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c mbfilter_armscii8.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp51932.h mbfilter_iso2022_jp_ms.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_16.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_armscii8.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_16.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_armscii8.h unicode_table_uhc.h
libmbfl_filters_la_SOURCES=mbfilter_cp936.c \
mbfilter_hz.c \
mbfilter_euc_tw.c \
mbfilter_big5.c \
mbfilter_euc_jp.c \
mbfilter_jis.c \
mbfilter_iso8859_1.c \
mbfilter_iso8859_2.c \
mbfilter_cp1254.c \
mbfilter_cp1252.c \
mbfilter_cp1251.c \
mbfilter_ascii.c \
mbfilter_iso8859_3.c \
mbfilter_iso8859_4.c \
mbfilter_iso8859_5.c \
mbfilter_iso8859_6.c \
mbfilter_iso8859_7.c \
mbfilter_iso8859_8.c \
mbfilter_iso8859_9.c \
mbfilter_iso8859_10.c \
mbfilter_iso8859_13.c \
mbfilter_iso8859_14.c \
mbfilter_iso8859_15.c \
mbfilter_iso8859_16.c \
mbfilter_htmlent.c \
mbfilter_byte2.c \
mbfilter_byte4.c \
mbfilter_uuencode.c \
mbfilter_base64.c \
mbfilter_sjis.c \
mbfilter_7bit.c \
mbfilter_qprint.c \
mbfilter_ucs4.c \
mbfilter_ucs2.c \
mbfilter_utf32.c \
mbfilter_utf16.c \
mbfilter_utf8.c \
mbfilter_utf7.c \
mbfilter_utf7imap.c \
mbfilter_euc_jp_win.c \
mbfilter_cp932.c \
mbfilter_cp51932.c \
mbfilter_euc_cn.c \
mbfilter_euc_kr.c \
mbfilter_uhc.c \
mbfilter_iso2022_kr.c \
mbfilter_cp866.c \
mbfilter_koi8r.c \
mbfilter_koi8u.c \
mbfilter_armscii8.c \
html_entities.c \
cp932_table.h \
html_entities.h \
mbfilter_7bit.h \
mbfilter_ascii.h \
mbfilter_base64.h \
mbfilter_big5.h \
mbfilter_byte2.h \
mbfilter_byte4.h \
mbfilter_cp1251.h \
mbfilter_cp1252.h \
mbfilter_cp1254.h \
mbfilter_cp866.h \
mbfilter_cp932.h \
mbfilter_cp936.h \
mbfilter_euc_cn.h \
mbfilter_euc_jp.h \
mbfilter_euc_jp_win.h \
mbfilter_euc_kr.h \
mbfilter_euc_tw.h \
mbfilter_htmlent.h \
mbfilter_hz.h \
mbfilter_iso2022_kr.h \
mbfilter_iso8859_1.h \
mbfilter_iso8859_10.h \
mbfilter_iso8859_13.h \
mbfilter_iso8859_14.h \
mbfilter_iso8859_15.h \
mbfilter_iso8859_16.h \
mbfilter_iso8859_2.h \
mbfilter_iso8859_3.h \
mbfilter_iso8859_4.h \
mbfilter_iso8859_5.h \
mbfilter_iso8859_6.h \
mbfilter_iso8859_7.h \
mbfilter_iso8859_8.h \
mbfilter_iso8859_9.h \
mbfilter_jis.h \
mbfilter_koi8r.h \
mbfilter_koi8u.h \
mbfilter_armscii8.h \
mbfilter_qprint.h \
mbfilter_sjis.h \
mbfilter_ucs2.h \
mbfilter_ucs4.h \
mbfilter_uhc.h \
mbfilter_utf16.h \
mbfilter_utf32.h \
mbfilter_utf7.h \
mbfilter_utf7imap.h \
mbfilter_utf8.h \
mbfilter_uuencode.h \
mbfilter_cp51932.h \
unicode_prop.h \
unicode_table_big5.h \
unicode_table_cns11643.h \
unicode_table_cp1251.h \
unicode_table_cp1252.h \
unicode_table_cp1254.h \
unicode_table_cp866.h \
unicode_table_cp932_ext.h \
unicode_table_cp936.h \
unicode_table_iso8859_10.h \
unicode_table_iso8859_13.h \
unicode_table_iso8859_14.h \
unicode_table_iso8859_15.h \
unicode_table_iso8859_16.h \
unicode_table_iso8859_2.h \
unicode_table_iso8859_3.h \
unicode_table_iso8859_4.h \
unicode_table_iso8859_5.h \
unicode_table_iso8859_6.h \
unicode_table_iso8859_7.h \
unicode_table_iso8859_8.h \
unicode_table_iso8859_9.h \
unicode_table_jis.h \
unicode_table_koi8r.h \
unicode_table_koi8u.h \
unicode_table_armscii8.h \
unicode_table_uhc.h
mbfilter_iso8859_2.c: unicode_table_iso8859_2.h

51
ext/mbstring/libmbfl/filters/Makefile.bcc32

@ -1,6 +1,55 @@
!include ..\rules.mak.bcc32
INCLUDES=$(INCLUDES) -I../mbfl
OBJS=mbfilter_cp936.obj mbfilter_hz.obj mbfilter_euc_tw.obj mbfilter_big5.obj mbfilter_euc_jp.obj mbfilter_jis.obj mbfilter_iso8859_1.obj mbfilter_iso8859_2.obj mbfilter_cp1252.obj mbfilter_cp1251.obj mbfilter_ascii.obj mbfilter_iso8859_3.obj mbfilter_iso8859_4.obj mbfilter_iso8859_5.obj mbfilter_iso8859_6.obj mbfilter_iso8859_7.obj mbfilter_iso8859_8.obj mbfilter_iso8859_9.obj mbfilter_iso8859_10.obj mbfilter_iso8859_13.obj mbfilter_iso8859_14.obj mbfilter_iso8859_15.obj mbfilter_iso8859_16.obj mbfilter_htmlent.obj mbfilter_byte2.obj mbfilter_byte4.obj mbfilter_uuencode.obj mbfilter_base64.obj mbfilter_sjis.obj mbfilter_7bit.obj mbfilter_qprint.obj mbfilter_ucs4.obj mbfilter_ucs2.obj mbfilter_utf32.obj mbfilter_utf16.obj mbfilter_utf8.obj mbfilter_utf7.obj mbfilter_utf7imap.obj mbfilter_euc_jp_win.obj mbfilter_cp932.obj mbfilter_euc_cn.obj mbfilter_euc_kr.obj mbfilter_uhc.obj mbfilter_iso2022_kr.obj mbfilter_cp866.obj mbfilter_koi8r.obj html_entities.obj mbfilter_armscii8.obj
OBJS=mbfilter_cp936.obj \
mbfilter_hz.obj \
mbfilter_euc_tw.obj \
mbfilter_big5.obj \
mbfilter_euc_jp.obj \
mbfilter_jis.obj \
mbfilter_iso8859_1.obj \
mbfilter_iso8859_2.obj \
mbfilter_cp1252.obj \
mbfilter_cp1251.obj \
mbfilter_cp1254.obj \
mbfilter_ascii.obj \
mbfilter_iso8859_3.obj \
mbfilter_iso8859_4.obj \
mbfilter_iso8859_5.obj \
mbfilter_iso8859_6.obj \
mbfilter_iso8859_7.obj \
mbfilter_iso8859_8.obj \
mbfilter_iso8859_9.obj \
mbfilter_iso8859_10.obj \
mbfilter_iso8859_13.obj \
mbfilter_iso8859_14.obj \
mbfilter_iso8859_15.obj \
mbfilter_iso8859_16.obj \
mbfilter_htmlent.obj \
mbfilter_byte2.obj \
mbfilter_byte4.obj \
mbfilter_uuencode.obj \
mbfilter_base64.obj \
mbfilter_sjis.obj \
mbfilter_7bit.obj \
mbfilter_qprint.obj \
mbfilter_ucs4.obj \
mbfilter_ucs2.obj \
mbfilter_utf32.obj \
mbfilter_utf16.obj \
mbfilter_utf8.obj \
mbfilter_utf7.obj \
mbfilter_utf7imap.obj \
mbfilter_euc_jp_win.obj \
mbfilter_cp932.obj \
mbfilter_euc_cn.obj \
mbfilter_euc_kr.obj \
mbfilter_uhc.obj \
mbfilter_iso2022_kr.obj \
mbfilter_cp866.obj \
mbfilter_koi8r.obj \
mbfilter_koi8u.obj \
html_entities.obj \
mbfilter_armscii8.obj
all: $(OBJS)

156
ext/mbstring/libmbfl/filters/mbfilter_cp1254.c

@ -0,0 +1,156 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Haluk AKIN <halukakin@gmail.com>
*
*/
/*
* the source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_cp1254.h"
#include "unicode_table_cp1254.h"
static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_cp1254_aliases[] = {"CP1254", "CP-1254", "WINDOWS-1254", NULL};
const mbfl_encoding mbfl_encoding_cp1254 = {
mbfl_no_encoding_cp1254,
"Windows-1254",
"Windows-1254",
(const char *(*)[])&mbfl_encoding_cp1254_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_cp1254 = {
mbfl_no_encoding_cp1254,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_cp1254
};
const struct mbfl_convert_vtbl vtbl_cp1254_wchar = {
mbfl_no_encoding_cp1254,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_cp1254_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_cp1254 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp1254,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_cp1254,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* wchar => cp1254
*/
int mbfl_filt_conv_wchar_cp1254(int c, mbfl_convert_filter *filter)
{
int s=-1, n;
if (c < 0x80) {
s = c;
} else {
/* look it up from the cp1254 table */
s = -1;
n = cp1254_ucs_table_len-1;
while (n >= 0) {
if (c == cp1254_ucs_table[n] && c != 0xfffe) {
s = cp1254_ucs_table_min + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP1254)
{
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
/*
* cp1254 => wchar
*/
int mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if ( c >= 0 && c < cp1254_ucs_table_min) {
s = c;
} else if (c >= cp1254_ucs_table_min && c < 0x100) {
s = cp1254_ucs_table[c - cp1254_ucs_table_min];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_CP1254;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/* We only distinguish the MS extensions to ISO-8859-1.
* Actually, this is pretty much a NO-OP, since the identification
* system doesn't allow us to discriminate between a positive match,
* a possible match and a definite non-match.
* The problem here is that cp1254 looks like SJIS for certain chars.
* */
static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter)
{
if (c >= 0x80 && c < 0xff)
filter->flag = 0;
else
filter->flag = 1; /* not it */
return c;
}

43
ext/mbstring/libmbfl/filters/mbfilter_cp1254.h

@ -0,0 +1,43 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Haluk AKIN <halukakin@gmail.com>
*
*/
/*
* the source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifndef MBFL_MBFILTER_CP1254_H
#define MBFL_MBFILTER_CP1254_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_cp1254;
extern const struct mbfl_identify_vtbl vtbl_identify_cp1254;
extern const struct mbfl_convert_vtbl vtbl_cp1254_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_cp1254;
int mbfl_filt_conv_wchar_cp1254(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_CP1254_H */

146
ext/mbstring/libmbfl/filters/mbfilter_koi8u.c

@ -0,0 +1,146 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Maksym Veremeyenko <verem@m1.tv>
*
* Based on mbfilter_koi8r.c code
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_koi8u.h"
#include "unicode_table_koi8u.h"
static int mbfl_filt_ident_koi8u(int c, mbfl_identify_filter *filter);
static const char *mbfl_encoding_koi8u_aliases[] = {"KOI8-U", "KOI8U", NULL};
const mbfl_encoding mbfl_encoding_koi8u = {
mbfl_no_encoding_koi8u,
"KOI8-U",
"KOI8-U",
(const char *(*)[])&mbfl_encoding_koi8u_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_koi8u = {
mbfl_no_encoding_koi8u,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_koi8u
};
const struct mbfl_convert_vtbl vtbl_wchar_koi8u = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_koi8u,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_koi8u,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_koi8u_wchar = {
mbfl_no_encoding_koi8u,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_koi8u_wchar,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* koi8u => wchar
*/
int
mbfl_filt_conv_koi8u_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < koi8u_ucs_table_min) {
s = c;
} else if (c >= koi8u_ucs_table_min && c < 0x100) {
s = koi8u_ucs_table[c - koi8u_ucs_table_min];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_KOI8U;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => koi8u
*/
int
mbfl_filt_conv_wchar_koi8u(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c < 0x80) {
s = c;
} else {
s = -1;
n = koi8u_ucs_table_len-1;
while (n >= 0) {
if (c == koi8u_ucs_table[n]) {
s = koi8u_ucs_table_min + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_KOI8U) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
static int mbfl_filt_ident_koi8u(int c, mbfl_identify_filter *filter)
{
if (c >= 0x80 && c < 0xff)
filter->flag = 0;
else
filter->flag = 1; /* not it */
return c;
}

47
ext/mbstring/libmbfl/filters/mbfilter_koi8u.h

@ -0,0 +1,47 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Maksym Veremeyenko <verem@m1.tv>
*
* Based on mbfilter_koi8r.h code
*
*/
#ifndef MBFL_MBFILTER_KOI8U_H
#define MBFL_MBFILTER_KOI8U_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_koi8u;
extern const struct mbfl_identify_vtbl vtbl_identify_koi8u;
extern const struct mbfl_convert_vtbl vtbl_wchar_koi8u;
extern const struct mbfl_convert_vtbl vtbl_koi8u_wchar;
int mbfl_filt_conv_koi8u_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_koi8u(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_KOI8U_H */
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
*/

BIN
ext/mbstring/libmbfl/filters/unicode_table_cp1254.h

166
ext/mbstring/libmbfl/filters/unicode_table_koi8u.h

@ -0,0 +1,166 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this part: Maksym Veremeyenko <verem@m1.tv>
*
*/
#ifndef UNICODE_TABLE_KOI8U_H
#define UNICODE_TABLE_KOI8U_H
/* KOI8-U (RFC2319) to Unicode */
static const unsigned short koi8u_ucs_table[] = {
0x2500, /* BOX DRAWINGS LIGHT HORIZONTAL */
0x2502, /* BOX DRAWINGS LIGHT VERTICAL */
0x250C, /* BOX DRAWINGS LIGHT DOWN AND RIGHT */
0x2510, /* BOX DRAWINGS LIGHT DOWN AND LEFT */
0x2514, /* BOX DRAWINGS LIGHT UP AND RIGHT */
0x2518, /* BOX DRAWINGS LIGHT UP AND LEFT */
0x251C, /* BOX DRAWINGS LIGHT VERTICAL AND RIGHT */
0x2524, /* BOX DRAWINGS LIGHT VERTICAL AND LEFT */
0x252C, /* BOX DRAWINGS LIGHT DOWN AND HORIZONTAL */
0x2534, /* BOX DRAWINGS LIGHT UP AND HORIZONTAL */
0x253C, /* BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL */
0x2580, /* UPPER HALF BLOCK */
0x2584, /* LOWER HALF BLOCK */
0x2588, /* FULL BLOCK */
0x258C, /* LEFT HALF BLOCK */
0x2590, /* RIGHT HALF BLOCK */
0x2591, /* LIGHT SHADE */
0x2592, /* MEDIUM SHADE */
0x2593, /* DARK SHADE */
0x2320, /* TOP HALF INTEGRAL */
0x25A0, /* BLACK SQUARE */
0x2219, /* BULLET OPERATOR */
0x221A, /* SQUARE ROOT */
0x2248, /* ALMOST EQUAL TO */
0x2264, /* LESS THAN OR EQUAL TO */
0x2265, /* GREATER THAN OR EQUAL TO */
0x00A0, /* NO-BREAK SPACE */
0x2321, /* BOTTOM HALF INTEGRAL */
0x00B0, /* DEGREE SIGN */
0x00B2, /* SUPERSCRIPT TWO */
0x00B7, /* MIDDLE DOT */
0x00F7, /* DIVISION SIGN */
0x2550, /* BOX DRAWINGS DOUBLE HORIZONTAL */
0x2551, /* BOX DRAWINGS DOUBLE VERTICAL */
0x2552, /* BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE */
0x0451, /* CYRILLIC SMALL LETTER IO */
0x0454, /* CYRILLIC SMALL LETTER UKRAINIAN IE */
0x2554, /* BOX DRAWINGS DOUBLE DOWN AND RIGHT */
0x0456, /* CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I */
0x0457, /* CYRILLIC SMALL LETTER YI (UKRAINIAN) */
0x2557, /* BOX DRAWINGS DOUBLE DOWN AND LEFT */
0x2558, /* BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE */
0x2559, /* BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE */
0x255A, /* BOX DRAWINGS DOUBLE UP AND RIGHT */
0x255B, /* BOX DRAWINGS UP SINGLE AND LEFT DOUBLE */
0x0491, /* CYRILLIC SMALL LETTER GHE WITH UPTURN */
0x255D, /* BOX DRAWINGS DOUBLE UP AND LEFT */
0x255E, /* BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE */
0x255F, /* BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE */
0x2560, /* BOX DRAWINGS DOUBLE VERTICAL AND RIGHT */
0x2561, /* BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE */
0x0401, /* CYRILLIC CAPITAL LETTER IO */
0x0404, /* CYRILLIC CAPITAL LETTER UKRAINIAN IE */
0x2563, /* BOX DRAWINGS DOUBLE VERTICAL AND LEFT */
0x0406, /* CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I */
0x0407, /* CYRILLIC CAPITAL LETTER YI (UKRAINIAN) */
0x2566, /* BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL */
0x2567, /* BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE */
0x2568, /* BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE */
0x2569, /* BOX DRAWINGS DOUBLE UP AND HORIZONTAL */
0x256A, /* BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE */
0x0490, /* CYRILLIC CAPITAL LETTER GHE WITH UPTURN */
0x256C, /* BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL */
0x00A9, /* COPYRIGHT SIGN */
0x044E, /* CYRILLIC SMALL LETTER YU */
0x0430, /* CYRILLIC SMALL LETTER A */
0x0431, /* CYRILLIC SMALL LETTER BE */
0x0446, /* CYRILLIC SMALL LETTER TSE */
0x0434, /* CYRILLIC SMALL LETTER DE */
0x0435, /* CYRILLIC SMALL LETTER IE */
0x0444, /* CYRILLIC SMALL LETTER EF */
0x0433, /* CYRILLIC SMALL LETTER GHE */
0x0445, /* CYRILLIC SMALL LETTER KHA */
0x0438, /* CYRILLIC SMALL LETTER I */
0x0439, /* CYRILLIC SMALL LETTER SHORT I */
0x043A, /* CYRILLIC SMALL LETTER KA */
0x043B, /* CYRILLIC SMALL LETTER EL */
0x043C, /* CYRILLIC SMALL LETTER EM */
0x043D, /* CYRILLIC SMALL LETTER EN */
0x043E, /* CYRILLIC SMALL LETTER O */
0x043F, /* CYRILLIC SMALL LETTER PE */
0x044F, /* CYRILLIC SMALL LETTER YA */
0x0440, /* CYRILLIC SMALL LETTER ER */
0x0441, /* CYRILLIC SMALL LETTER ES */
0x0442, /* CYRILLIC SMALL LETTER TE */
0x0443, /* CYRILLIC SMALL LETTER U */
0x0436, /* CYRILLIC SMALL LETTER ZHE */
0x0432, /* CYRILLIC SMALL LETTER VE */
0x044C, /* CYRILLIC SMALL LETTER SOFT SIGN */
0x044B, /* CYRILLIC SMALL LETTER YERU */
0x0437, /* CYRILLIC SMALL LETTER ZE */
0x0448, /* CYRILLIC SMALL LETTER SHA */
0x044D, /* CYRILLIC SMALL LETTER E */
0x0449, /* CYRILLIC SMALL LETTER SHCHA */
0x0447, /* CYRILLIC SMALL LETTER CHE */
0x044A, /* CYRILLIC SMALL LETTER HARD SIGN */
0x042E, /* CYRILLIC CAPITAL LETTER YU */
0x0410, /* CYRILLIC CAPITAL LETTER A */
0x0411, /* CYRILLIC CAPITAL LETTER BE */
0x0426, /* CYRILLIC CAPITAL LETTER TSE */
0x0414, /* CYRILLIC CAPITAL LETTER DE */
0x0415, /* CYRILLIC CAPITAL LETTER IE */
0x0424, /* CYRILLIC CAPITAL LETTER EF */
0x0413, /* CYRILLIC CAPITAL LETTER GHE */
0x0425, /* CYRILLIC CAPITAL LETTER KHA */
0x0418, /* CYRILLIC CAPITAL LETTER I */
0x0419, /* CYRILLIC CAPITAL LETTER SHORT I */
0x041A, /* CYRILLIC CAPITAL LETTER KA */
0x041B, /* CYRILLIC CAPITAL LETTER EL */
0x041C, /* CYRILLIC CAPITAL LETTER EM */
0x041D, /* CYRILLIC CAPITAL LETTER EN */
0x041E, /* CYRILLIC CAPITAL LETTER O */
0x041F, /* CYRILLIC CAPITAL LETTER PE */
0x042F, /* CYRILLIC CAPITAL LETTER YA */
0x0420, /* CYRILLIC CAPITAL LETTER ER */
0x0421, /* CYRILLIC CAPITAL LETTER ES */
0x0422, /* CYRILLIC CAPITAL LETTER TE */
0x0423, /* CYRILLIC CAPITAL LETTER U */
0x0416, /* CYRILLIC CAPITAL LETTER ZHE */
0x0412, /* CYRILLIC CAPITAL LETTER VE */
0x042C, /* CYRILLIC CAPITAL LETTER SOFT SIGN */
0x042B, /* CYRILLIC CAPITAL LETTER YERU */
0x0417, /* CYRILLIC CAPITAL LETTER ZE */
0x0428, /* CYRILLIC CAPITAL LETTER SHA */
0x042D, /* CYRILLIC CAPITAL LETTER E */
0x0429, /* CYRILLIC CAPITAL LETTER SHCHA */
0x0427, /* CYRILLIC CAPITAL LETTER CHE */
0x042A /* CYRILLIC CAPITAL LETTER HARD SIGN */
};
static const int koi8u_ucs_table_min = 0x80;
static const int koi8u_ucs_table_len = (sizeof (koi8u_ucs_table) / sizeof (unsigned short));
static const int koi8u_ucs_table_max = 0x80 + (sizeof (koi8u_ucs_table) / sizeof (unsigned short));
#endif /* UNNICODE_TABLE_KOI8U_H */

12
ext/mbstring/libmbfl/libmbfl.dsp

@ -243,6 +243,10 @@ SOURCE=.\filters\mbfilter_koi8r.c
# End Source File
# Begin Source File
SOURCE=.\filters\mbfilter_koi8u.c
# End Source File
# Begin Source File
SOURCE=.\filters\mbfilter_armscii8.c
# End Source File
# Begin Source File
@ -556,6 +560,10 @@ SOURCE=.\filters\mbfilter_koi8r.h
# End Source File
# Begin Source File
SOURCE=.\filters\mbfilter_koi8u.h
# End Source File
# Begin Source File
SOURCE=.\filters\mbfilter_armscii8.h
# End Source File
# Begin Source File
@ -776,6 +784,10 @@ SOURCE=.\filters\unicode_table_koi8r.h
# End Source File
# Begin Source File
SOURCE=.\filters\unicode_table_koi8u.h
# End Source File
# Begin Source File
SOURCE=.\filters\unicode_table_armscii8.h
# End Source File
# Begin Source File

26
ext/mbstring/libmbfl/libmbfl.sln

@ -1,21 +1,19 @@
Microsoft Visual Studio Solution File, Format Version 7.00
Microsoft Visual Studio Solution File, Format Version 10.00
# Visual C++ Express 2008
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libmbfl", "libmbfl.vcproj", "{B3636594-A785-4270-A765-8EAE922B5207}"
EndProject
Global
GlobalSection(SolutionConfiguration) = preSolution
ConfigName.0 = Debug
ConfigName.1 = Release
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Release|Win32 = Release|Win32
EndGlobalSection
GlobalSection(ProjectDependencies) = postSolution
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{B3636594-A785-4270-A765-8EAE922B5207}.Debug|Win32.ActiveCfg = Debug|Win32
{B3636594-A785-4270-A765-8EAE922B5207}.Debug|Win32.Build.0 = Debug|Win32
{B3636594-A785-4270-A765-8EAE922B5207}.Release|Win32.ActiveCfg = Release|Win32
{B3636594-A785-4270-A765-8EAE922B5207}.Release|Win32.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(ProjectConfiguration) = postSolution
{B3636594-A785-4270-A765-8EAE922B5207}.Debug.ActiveCfg = Debug|Win32
{B3636594-A785-4270-A765-8EAE922B5207}.Debug.Build.0 = Debug|Win32
{B3636594-A785-4270-A765-8EAE922B5207}.Release.ActiveCfg = Release|Win32
{B3636594-A785-4270-A765-8EAE922B5207}.Release.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
EndGlobalSection
GlobalSection(ExtensibilityAddIns) = postSolution
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

777
ext/mbstring/libmbfl/libmbfl.vcproj
File diff suppressed because it is too large
View File

8
ext/mbstring/libmbfl/mbfl.rc

@ -1,7 +1,7 @@
/* $Id$ */
1 VERSIONINFO
FILEVERSION 1,1,0,0
PRODUCTVERSION 1,1,0,0
FILEVERSION 1,0,2,0
PRODUCTVERSION 1,0,2,0
FILEFLAGSMASK 0
FILEOS 0x40000
FILETYPE 1
@ -12,12 +12,12 @@ FILETYPE 1
{
VALUE "CompanyName", "-\0"
VALUE "FileDescription", "streamable kanji code filter\0"
VALUE "FileVersion", "1.1.0\0"
VALUE "FileVersion", "1.0.2\0"
VALUE "InternalName", "mbfl\0"
VALUE "LegalCopyright", "GNU Lesser Public License Version 2.0\0"
VALUE "OriginalFilename", "mbfl.dll\0"
VALUE "ProductName", "mbfl\0"
VALUE "ProductVersion", "1.1.0\0"
VALUE "ProductVersion", "1.0.2\0"
}
}
}

29
ext/mbstring/libmbfl/mbfl/Makefile.am

@ -1,12 +1,37 @@
EXTRA_DIST=Makefile.bcc32 mk_eaw_tbl.awk
lib_LTLIBRARIES=libmbfl.la
libmbfl_la_SOURCES=mbfilter.c mbfl_string.c mbfl_language.c mbfl_encoding.c mbfl_convert.c mbfl_ident.c mbfl_memory_device.c mbfl_allocators.c mbfl_filter_output.c mbfilter_pass.c mbfilter_wchar.c mbfilter_8bit.c eaw_table.h
libmbfl_la_SOURCES=mbfilter.c \
mbfl_string.c \
mbfl_language.c \
mbfl_encoding.c \
mbfl_convert.c \
mbfl_ident.c \
mbfl_memory_device.c \
mbfl_allocators.c \
mbfl_filter_output.c \
mbfilter_pass.c \
mbfilter_wchar.c \
mbfilter_8bit.c \
eaw_table.h
libmbfl_filters_la=../filters/libmbfl_filters.la
libmbfl_nls_la=../nls/libmbfl_nls.la
libmbfl_la_LIBADD=$(libmbfl_filters_la) $(libmbfl_nls_la)
libmbfl_la_LDFLAGS=-version-info $(SHLIB_VERSION)
libmbfl_includedir=$(includedir)/mbfl
libmbfl_include_HEADERS=mbfilter.h mbfl_consts.h mbfl_encoding.h mbfl_language.h mbfl_string.h mbfl_convert.h mbfl_ident.h mbfl_memory_device.h mbfl_allocators.h mbfl_defs.h mbfl_filter_output.h mbfilter_pass.h mbfilter_wchar.h mbfilter_8bit.h
libmbfl_include_HEADERS=mbfilter.h \
mbfl_consts.h \
mbfl_encoding.h \
mbfl_language.h \
mbfl_string.h \
mbfl_convert.h \
mbfl_ident.h \
mbfl_memory_device.h \
mbfl_allocators.h \
mbfl_defs.h \
mbfl_filter_output.h \
mbfilter_pass.h \
mbfilter_wchar.h \
mbfilter_8bit.h
mbfilter.c: eaw_table.h

13
ext/mbstring/libmbfl/mbfl/Makefile.bcc32

@ -1,5 +1,16 @@
!include ..\rules.mak.bcc32
OBJS=mbfilter.obj mbfilter_8bit.obj mbfilter_pass.obj mbfilter_wchar.obj mbfl_allocators.obj mbfl_convert.obj mbfl_encoding.obj mbfl_filter_output.obj mbfl_ident.obj mbfl_language.obj mbfl_memory_device.obj mbfl_string.obj
OBJS=mbfilter.obj \
mbfilter_8bit.obj \
mbfilter_pass.obj \
mbfilter_wchar.obj \
mbfl_allocators.obj \
mbfl_convert.obj \
mbfl_encoding.obj \
mbfl_filter_output.obj \
mbfl_ident.obj \
mbfl_language.obj \
mbfl_memory_device.obj \
mbfl_string.obj
all: $(OBJS)

7
ext/mbstring/libmbfl/mbfl/mbfilter.h

@ -98,6 +98,13 @@
#include "mbfl_convert.h"
#include "mbfl_ident.h"
/*
* version information
*/
#define MBFL_VERSION_MAJOR 1
#define MBFL_VERSION_MINOR 0
#define MBFL_VERSION_TEENY 2
/*
* convert filter
*/

2
ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h

@ -34,6 +34,6 @@
#include "mbfl_defs.h"
#include "mbfilter.h"
MBFLAPI extern const mbfl_encoding mbfl_encoding_8bit;
extern const mbfl_encoding mbfl_encoding_8bit;
#endif /* MBFL_MBFILTER_8BIT_H */

4
ext/mbstring/libmbfl/mbfl/mbfilter_pass.h

@ -33,8 +33,8 @@
#include "mbfl_defs.h"
#include "mbfilter.h"
MBFLAPI extern const mbfl_encoding mbfl_encoding_pass;
MBFLAPI extern const struct mbfl_convert_vtbl vtbl_pass;
extern const mbfl_encoding mbfl_encoding_pass;
extern const struct mbfl_convert_vtbl vtbl_pass;
MBFLAPI extern int mbfl_filt_conv_pass(int c, mbfl_convert_filter *filter);

2
ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h

@ -34,6 +34,6 @@
#include "mbfl_defs.h"
#include "mbfilter.h"
MBFLAPI extern const mbfl_encoding mbfl_encoding_wchar;
extern const mbfl_encoding mbfl_encoding_wchar;
#endif /* MBFL_MBFILTER_WCHAR_H */

4
ext/mbstring/libmbfl/mbfl/mbfl_consts.h

@ -72,10 +72,12 @@
#define MBFL_WCSPLANE_CNS11643 0x70f50000 /* 2121h - 9898h */
#define MBFL_WCSPLANE_UHC 0x70f60000 /* 8141h - fefeh */
#define MBFL_WCSPLANE_CP1251 0x70f70000
#define MBFL_WCSPLANE_CP866 0x70f80000
#define MBFL_WCSPLANE_CP866 0x70f80000
#define MBFL_WCSPLANE_KOI8R 0x70f90000
#define MBFL_WCSPLANE_8859_16 0x70fa0000 /* 00h - FFh */
#define MBFL_WCSPLANE_ARMSCII8 0x70fb0000
#define MBFL_WCSPLANE_KOI8U 0x70fc0000
#define MBFL_WCSPLANE_CP1254 0x70fd0000 /* 00h - FFh */
#define MBFL_WCSGROUP_MASK 0xffffff
#define MBFL_WCSGROUP_UCS4MAX 0x70000000
#define MBFL_WCSGROUP_WCHARMAX 0x78000000

6
ext/mbstring/libmbfl/mbfl/mbfl_convert.c

@ -58,11 +58,13 @@
#include "filters/mbfilter_euc_jp_win.h"
#include "filters/mbfilter_ascii.h"
#include "filters/mbfilter_koi8r.h"
#include "filters/mbfilter_koi8u.h"
#include "filters/mbfilter_cp866.h"
#include "filters/mbfilter_cp932.h"
#include "filters/mbfilter_cp936.h"
#include "filters/mbfilter_cp1251.h"
#include "filters/mbfilter_cp1252.h"
#include "filters/mbfilter_cp1254.h"
#include "filters/mbfilter_iso8859_1.h"
#include "filters/mbfilter_iso8859_2.h"
#include "filters/mbfilter_iso8859_3.h"
@ -140,8 +142,12 @@ const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
&vtbl_wchar_cp866,
&vtbl_koi8r_wchar,
&vtbl_wchar_koi8r,
&vtbl_koi8u_wchar,
&vtbl_wchar_koi8u,
&vtbl_cp1252_wchar,
&vtbl_wchar_cp1252,
&vtbl_cp1254_wchar,
&vtbl_wchar_cp1254,
&vtbl_ascii_wchar,
&vtbl_wchar_ascii,
&vtbl_8859_1_wchar,

24
ext/mbstring/libmbfl/mbfl/mbfl_encoding.c

@ -64,11 +64,13 @@
#include "filters/mbfilter_euc_jp_win.h"
#include "filters/mbfilter_ascii.h"
#include "filters/mbfilter_koi8r.h"
#include "filters/mbfilter_koi8u.h"
#include "filters/mbfilter_cp866.h"
#include "filters/mbfilter_cp932.h"
#include "filters/mbfilter_cp936.h"
#include "filters/mbfilter_cp1251.h"
#include "filters/mbfilter_cp1252.h"
#include "filters/mbfilter_cp1254.h"
#include "filters/mbfilter_iso8859_1.h"
#include "filters/mbfilter_iso8859_2.h"
#include "filters/mbfilter_iso8859_3.h"
@ -156,6 +158,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_2022jp,
&mbfl_encoding_2022jpms,
&mbfl_encoding_cp1252,
&mbfl_encoding_cp1254,
&mbfl_encoding_8859_1,
&mbfl_encoding_8859_2,
&mbfl_encoding_8859_3,
@ -181,6 +184,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_cp1251,
&mbfl_encoding_cp866,
&mbfl_encoding_koi8r,
&mbfl_encoding_koi8u,
&mbfl_encoding_armscii8,
NULL
};
@ -196,16 +200,16 @@ mbfl_name2encoding(const char *name)
return NULL;
}
i = 0;
while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL){
i = 0;
while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL){
if (strcasecmp(encoding->name, name) == 0) {
return encoding;
}
}
/* serch MIME charset name */
i = 0;
while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
/* search MIME charset name */
i = 0;
while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
if (encoding->mime_name != NULL) {
if (strcasecmp(encoding->mime_name, name) == 0) {
return encoding;
@ -213,12 +217,12 @@ mbfl_name2encoding(const char *name)
}
}
/* serch aliases */
i = 0;
while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
/* search aliases */
i = 0;
while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
if (encoding->aliases != NULL) {
j = 0;
while ((*encoding->aliases)[j] != NULL) {
j = 0;
while ((*encoding->aliases)[j] != NULL) {
if (strcasecmp((*encoding->aliases)[j], name) == 0) {
return encoding;
}

2
ext/mbstring/libmbfl/mbfl/mbfl_encoding.h

@ -75,6 +75,7 @@ enum mbfl_no_encoding {
mbfl_no_encoding_2022jp,
mbfl_no_encoding_2022jpms,
mbfl_no_encoding_cp1252,
mbfl_no_encoding_cp1254,
mbfl_no_encoding_8859_1,
mbfl_no_encoding_8859_2,
mbfl_no_encoding_8859_3,
@ -99,6 +100,7 @@ enum mbfl_no_encoding {
mbfl_no_encoding_cp1251,
mbfl_no_encoding_cp866,
mbfl_no_encoding_koi8r,
mbfl_no_encoding_koi8u,
mbfl_no_encoding_8859_16,
mbfl_no_encoding_armscii8,
mbfl_no_encoding_charset_max

4
ext/mbstring/libmbfl/mbfl/mbfl_ident.c

@ -56,11 +56,13 @@
#include "filters/mbfilter_euc_jp_win.h"
#include "filters/mbfilter_ascii.h"
#include "filters/mbfilter_koi8r.h"
#include "filters/mbfilter_koi8u.h"
#include "filters/mbfilter_cp866.h"
#include "filters/mbfilter_cp932.h"
#include "filters/mbfilter_cp936.h"
#include "filters/mbfilter_cp1251.h"
#include "filters/mbfilter_cp1252.h"
#include "filters/mbfilter_cp1254.h"
#include "filters/mbfilter_cp51932.h"
#include "filters/mbfilter_iso8859_1.h"
#include "filters/mbfilter_iso8859_2.h"
@ -121,7 +123,9 @@ static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = {
&vtbl_identify_cp1251,
&vtbl_identify_cp866,
&vtbl_identify_koi8r,
&vtbl_identify_koi8u,
&vtbl_identify_cp1252,
&vtbl_identify_cp1254,
&vtbl_identify_8859_1,
&vtbl_identify_8859_2,
&vtbl_identify_8859_3,

2
ext/mbstring/libmbfl/mbfl/mbfl_language.c

@ -57,6 +57,7 @@
#include "nls/nls_uni.h"
#include "nls/nls_de.h"
#include "nls/nls_ru.h"
#include "nls/nls_ua.h"
#include "nls/nls_en.h"
#include "nls/nls_hy.h"
#include "nls/nls_tr.h"
@ -77,6 +78,7 @@ static const mbfl_language *mbfl_language_ptr_table[] = {
&mbfl_language_english,
&mbfl_language_german,
&mbfl_language_russian,
&mbfl_language_ukrainian,
&mbfl_language_armenian,
&mbfl_language_turkish,
&mbfl_language_neutral,

1
ext/mbstring/libmbfl/mbfl/mbfl_language.h

@ -57,6 +57,7 @@ enum mbfl_no_language {
mbfl_no_language_simplified_chinese, /* zh-cn */
mbfl_no_language_traditional_chinese, /* zh-tw */
mbfl_no_language_russian, /* ru */
mbfl_no_language_ukrainian, /* ua */
mbfl_no_language_armenian, /* hy */
mbfl_no_language_turkish, /* tr */
mbfl_no_language_max

23
ext/mbstring/libmbfl/nls/Makefile.am

@ -2,4 +2,25 @@ EXTRA_DIST=Makefile.bcc32
noinst_LTLIBRARIES=libmbfl_nls.la
INCLUDES=-I../mbfl
libmbfl_nls_la_LDFLAGS=-version-info $(SHLIB_VERSION)
libmbfl_nls_la_SOURCES=nls_ja.c nls_de.c nls_en.c nls_hy.c nls_kr.c nls_ru.c nls_zh.c nls_uni.c nls_neutral.c nls_ja.h nls_de.h nls_en.h nls_hy.h nls_kr.h nls_ru.h nls_zh.h nls_uni.h nls_neutral.h
libmbfl_nls_la_SOURCES=nls_ja.c \
nls_de.c \
nls_en.c \
nls_hy.c \
nls_tr.c \
nls_kr.c \
nls_ru.c \
nls_ua.c \
nls_zh.c \
nls_uni.c \
nls_neutral.c \
nls_ja.h \
nls_de.h \
nls_en.h \
nls_hy.h \
nls_tr.h \
nls_kr.h \
nls_ru.h \
nls_ua.h \
nls_zh.h \
nls_uni.h \
nls_neutral.h

12
ext/mbstring/libmbfl/nls/Makefile.bcc32

@ -1,6 +1,16 @@
!include ..\rules.mak.bcc32
INCLUDES=$(INCLUDES) -I..\mbfl
OBJS=nls_ja.obj nls_de.obj nls_en.obj nls_hy.obj nls_kr.obj nls_ru.obj nls_zh.obj nls_uni.obj nls_neutral.obj
OBJS=nls_ja.obj \
nls_de.obj \
nls_en.obj \
nls_hy.obj \
nls_tr.obj \
nls_kr.obj \
nls_ru.obj \
nls_ua.obj \
nls_zh.obj \
nls_uni.obj \
nls_neutral.obj
all: $(OBJS)

2
ext/mbstring/libmbfl/nls/nls_ru.c

@ -2,12 +2,10 @@
#include "config.h"
#endif
#ifdef HAVE_STDDEF_H
#include <stddef.h>
#endif
#include "mbfilter.h"
#include "nls_ru.h"

22
ext/mbstring/libmbfl/nls/nls_ua.c

@ -0,0 +1,22 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_STDDEF_H
#include <stddef.h>
#endif
#include "mbfilter.h"
#include "nls_ua.h"
const mbfl_language mbfl_language_ukrainian = {
mbfl_no_language_ukrainian,
"Ukrainian",
"ua",
NULL,
mbfl_no_encoding_koi8u,
mbfl_no_encoding_qprint,
mbfl_no_encoding_8bit
};

9
ext/mbstring/libmbfl/nls/nls_ua.h

@ -0,0 +1,9 @@
#ifndef MBFL_NLS_UA_H
#define MBFL_NLS_UA_H
#include "mbfilter.h"
#include "nls_ua.h"
extern const mbfl_language mbfl_language_ukrainian;
#endif /* MBFL_NLS_UA_H */

10
ext/mbstring/libmbfl/tests/Makefile.am

@ -0,0 +1,10 @@
SUBDIRS=conv_encoding.tests conv_kana.tests strwidth.tests strcut.tests
noinst_PROGRAMS=conv_encoding conv_kana strwidth strcut
conv_encoding_SOURCES=conv_encoding.c
conv_encoding_LDADD=../mbfl/libmbfl.la
conv_kana_SOURCES=conv_kana.c
conv_kana_LDADD=../mbfl/libmbfl.la
strwidth_SOURCES=strwidth.c
strwidth_LDADD=../mbfl/libmbfl.la
strcut_SOURCES=strcut.c
strcut_LDADD=../mbfl/libmbfl.la

104
ext/mbstring/libmbfl/tests/conv_encoding.c

@ -0,0 +1,104 @@
/**
* this is a small sample script to use libmbfl.
* Rui Hirokawa <hirokawa@php.net>
*
* this file is encoded in EUC-JP.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mbfl/mbfilter.h"
static void hexdump(const mbfl_string *ptr)
{
unsigned int i;
for (i = 0; i < ptr->len; i++) {
printf("%%%02x", ptr->val[i]);
}
printf(" (%u)\n", ptr->len);
}
int main(int argc, char **argv)
{
enum mbfl_no_encoding from_encoding, to_encoding;
enum mbfl_no_language no_language;
mbfl_buffer_converter *convd = NULL;
mbfl_memory_device dev;
mbfl_string string, result, *ret;
int final = 0;
int state = 0;
if (argc < 4) {
fprintf(stderr, "Usage: %s lang to_encoding from_encoding\n", argv[0]);
return EXIT_FAILURE;
}
if ((no_language = mbfl_name2no_language(argv[1])) ==
mbfl_no_language_invalid) {
printf("Unsupported NLS: %s\n", argv[1]);
return EXIT_FAILURE;
}
if ((to_encoding = mbfl_name2no_encoding(argv[2])) ==
mbfl_no_encoding_invalid) {
printf("Unsupported encoding: %s\n", argv[2]);
return EXIT_FAILURE;
}
if ((from_encoding = mbfl_name2no_encoding(argv[3])) ==
mbfl_no_encoding_invalid) {
printf("Unsupported encoding: %s\n", argv[3]);
return EXIT_FAILURE;
}
convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
do {
mbfl_memory_device_init(&dev, 0, 4096);
mbfl_string_init_set(&string, no_language, from_encoding);
for (;;) {
const int c = fgetc(stdin);
if (c == EOF) {
final = 1;
break;
} else if (c == 10) {
if (state == 1) {
state = 0;
continue;
}
break;
} else if (c == 13) {
state = 1;
break;
}
if (dev.pos >= dev.length) {
if (dev.length + dev.allocsz < dev.length) {
printf("Unable to allocate memory\n");
return EXIT_FAILURE;
}
mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
dev.allocsz);
}
dev.buffer[dev.pos++] = (unsigned char)c;
}
mbfl_memory_device_result(&dev, &string);
mbfl_string_init_set(&result, no_language, to_encoding);
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
hexdump(&result);
mbfl_string_clear(&result);
mbfl_string_clear(&string);
} while (!final);
mbfl_buffer_converter_delete(convd);
return EXIT_SUCCESS;
}

1
ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am

@ -0,0 +1 @@
EXTRA_DIST=*.exp

33
ext/mbstring/libmbfl/tests/conv_encoding.tests/cp51932_cp50220raw.exp

@ -0,0 +1,33 @@
#!/usr/bin/expect -f
spawn tests/conv_encoding Japanese CP50220raw eucJP-win
set timeout 1
expect_after {
"\[^\r\n\]*\r\n" { fail $test }
}
set test "81 - 87ku"
send "\xf5\xba\xf6\xec\xf7\xc9\xf8\xb3\xf9\xa1\xfa\xa1\xfb\xa1\r"
expect {
"%1b%24%42%75%3a%76%6c%77%49%78%33%79%21%7a%21%7b%21%1b%28%42 (20)\r\n" { pass $test }
}
set test "kanji + kana"
send "日本語テスト\r"
expect {
"%1b%24%42%46%7c%4b%5c%38%6c%25%46%25%39%25%48%1b%28%42 (18)\r\n" { pass $test }
}
set test "full-width numerics"
send "0123456789\r"
expect {
"%1b%24%42%23%30%23%31%23%32%23%33%23%34%23%35%23%36%23%37%23%38%23%39%1b%28%42 (26)\r\n" { pass $test }
}
set test "full-width numerics"
send "㍊"
expect {
"%1b%24%42%2d%42%1b%28%42 (8)\r\n" { pass $test }
}

35
ext/mbstring/libmbfl/tests/conv_encoding.tests/ujis_sjis.exp

@ -0,0 +1,35 @@
#!/usr/bin/expect -f
spawn tests/conv_encoding Japanese Shift_JIS EUC-JP
set timeout 1
expect_after {
"\[^\r\n\]*\r\n" { fail $test }
}
set test "basic test"
send "testtest\r"
expect {
"%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test }
}
set test "kanji + kana"
send "日本語テスト\r"
expect {
"%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test }
}
set test "full-width numerics"
send "0123456789\r"
expect {
"%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test }
}
set test "full-width numerics"
send "㍊"
expect {
"%3f (1)\r\n" { pass $test }
}
close
# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP

35
ext/mbstring/libmbfl/tests/conv_encoding.tests/utf8_sjis.exp

@ -0,0 +1,35 @@
#!/usr/bin/expect -f
spawn tests/conv_encoding Japanese Shift_JIS UTF-8
set timeout 1
expect_after {
"\[^\r\n\]*\r\n" { fail $test }
}
set test "basic test"
send "testtest\r"
expect {
"%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test }
}
set test "kanji + kana"
send "日本語テスト\r"
expect {
"%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test }
}
set test "full-width numerics"
send "0123456789\r"
expect {
"%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test }
}
set test "full-width numerics"
send "㍊"
expect {
"%3f (1)\r\n" { pass $test }
}
close
# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP

147
ext/mbstring/libmbfl/tests/conv_kana.c

@ -0,0 +1,147 @@
/**
* this is a small sample script to use libmbfl.
* Rui Hirokawa <hirokawa@php.net>
*
* this file is encoded in EUC-JP.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mbfl/mbfilter.h"
static void hexdump(const mbfl_string *ptr)
{
unsigned int i;
for (i = 0; i < ptr->len; i++) {
printf("%%%02x", ptr->val[i]);
}
printf(" (%u)\n", ptr->len);
}
int main(int argc, char **argv)
{
enum mbfl_no_encoding no_enc;
const enum mbfl_no_language no_lang = mbfl_no_language_japanese;
mbfl_memory_device dev;
mbfl_string string, result;
int final = 0;
int state = 0;
int mode = 0;
if (argc < 3) {
fprintf(stderr, "Usage: %s encoding flags\n", argv[0]);
return EXIT_FAILURE;
}
if ((no_enc = mbfl_name2no_encoding(argv[1])) ==
mbfl_no_encoding_invalid) {
printf("Unsupported encoding: %s\n", argv[1]);
return EXIT_FAILURE;
}
{
const char *p;
for (p= argv[2] + strlen(argv[2]); p > argv[2]; ) {
switch (*(--p)) {
case 'A':
mode |= 0x1;
break;
case 'a':
mode |= 0x10;
break;
case 'R':
mode |= 0x2;
break;
case 'r':
mode |= 0x20;
break;
case 'N':
mode |= 0x4;
break;
case 'n':
mode |= 0x40;
break;
case 'S':
mode |= 0x8;
break;
case 's':
mode |= 0x80;
break;
case 'K':
mode |= 0x100;
break;
case 'k':
mode |= 0x1000;
break;
case 'H':
mode |= 0x200;
break;
case 'h':
mode |= 0x2000;
break;
case 'V':
mode |= 0x800;
break;
case 'C':
mode |= 0x10000;
break;
case 'c':
mode |= 0x20000;
break;
case 'M':
mode |= 0x100000;
break;
case 'm':
mode |= 0x200000;
break;
}
}
}
do {
mbfl_memory_device_init(&dev, 0, 4096);
mbfl_string_init_set(&string, no_lang, no_enc);
for (;;) {
const int c = fgetc(stdin);
if (c == EOF) {
final = 1;
break;
} else if (c == 10) {
if (state == 1) {
state = 0;
continue;
}
break;
} else if (c == 13) {
state = 1;
break;
}
if (dev.pos >= dev.length) {
if (dev.length + dev.allocsz < dev.length) {
printf("Unable to allocate memory\n");
return EXIT_FAILURE;
}
mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
dev.allocsz);
}
dev.buffer[dev.pos++] = (unsigned char)c;
}
mbfl_memory_device_result(&dev, &string);
mbfl_ja_jp_hantozen(&string, &result, mode);
hexdump(&result);
mbfl_string_clear(&result);
mbfl_string_clear(&string);
} while (!final);
return EXIT_SUCCESS;
}

1
ext/mbstring/libmbfl/tests/conv_kana.tests/Makefile.am

@ -0,0 +1 @@
EXTRA_DIST=*.exp

1098
ext/mbstring/libmbfl/tests/conv_kana.tests/conv_kana.exp
File diff suppressed because it is too large
View File

113
ext/mbstring/libmbfl/tests/strcut.c

@ -0,0 +1,113 @@
/**
* this is a small sample script to use libmbfl.
* Rui Hirokawa <hirokawa@php.net>
*
* this file is encoded in EUC-JP.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "mbfl/mbfilter.h"
static void hexdump(const mbfl_string *ptr)
{
unsigned int i;
for (i = 0; i < ptr->len; i++) {
printf("%%%02x", ptr->val[i]);
}
printf(" (%u)\n", ptr->len);
}
int main(int argc, char **argv)
{
enum mbfl_no_encoding no_encoding;
enum mbfl_no_language no_language;
mbfl_memory_device dev;
mbfl_string string;
int offset, length;
int final = 0;
int state = 0;
if (argc < 5) {
fprintf(stderr, "Usage: %s lang encoding offset length\n", argv[0]);
return EXIT_FAILURE;
}
if ((no_language = mbfl_name2no_language(argv[1])) ==
mbfl_no_language_invalid) {
printf("Unsupported NLS: %s\n", argv[1]);
return EXIT_FAILURE;
}
if ((no_encoding = mbfl_name2no_encoding(argv[2])) ==
mbfl_no_encoding_invalid) {
printf("Unsupported encoding: %s\n", argv[2]);
return EXIT_FAILURE;
}
errno = 0;
offset = strtol(argv[3], NULL, 10);
if (errno) {
printf("Invalid offset: %s\n", argv[3]);
return EXIT_FAILURE;
}
length = strtol(argv[4], NULL, 10);
if (errno) {
printf("Invalid length: %s\n", argv[4]);
return EXIT_FAILURE;
}
do {
mbfl_string result;
mbfl_memory_device_init(&dev, 0, 4096);
mbfl_string_init_set(&string, no_language, no_encoding);
for (;;) {
const int c = fgetc(stdin);
if (c == EOF) {
final = 1;
break;
} else if (c == 10) {
if (state == 1) {
state = 0;
continue;
}
break;
} else if (c == 13) {
state = 1;
break;
}
if (dev.pos >= dev.length) {
if (dev.length + dev.allocsz < dev.length) {
printf("Unable to allocate memory\n");
return EXIT_FAILURE;
}
mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
dev.allocsz);
}
dev.buffer[dev.pos++] = (unsigned char)c;
}
mbfl_memory_device_result(&dev, &string);
if (mbfl_strcut(&string, &result, offset, length)) {
hexdump(&result);
mbfl_string_clear(&result);
} else {
printf("***ERROR***\n");
}
mbfl_string_clear(&string);
} while (!final);
return EXIT_SUCCESS;
}

1
ext/mbstring/libmbfl/tests/strcut.tests/Makefile.am

@ -0,0 +1 @@
EXTRA_DIST=*.exp

129
ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp

@ -0,0 +1,129 @@
#!/usr/bin/expect -f
proc begin_strcut_test {_from _length} {
global spawn_id from length
set from $_from
set length $_length
spawn tests/strcut Japanese "ISO-2022-JP" $_from $_length
set timeout 10
expect_after {
"\[^\r\n\]*\r\n" { fail $test }
}
}
begin_strcut_test -1 2
set test "asciish characters ($from, $length)"
send "testtest\r"
expect {
-ex "***ERROR***\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
sleep 1
expect {
-ex "***ERROR***\r\n" { pass $test }
}
close
begin_strcut_test 2 -1
set test "asciish characters ($from, $length)"
send "testtest\r"
expect {
-ex "***ERROR***\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
sleep 1
expect {
-ex "***ERROR***\r\n" { pass $test }
}
close
begin_strcut_test 3 2
set test "asciish characters ($from, $length)"
send "testtest\r"
expect {
-ex "%74%74 (2)\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
sleep 1
expect {
-ex " (0)\r\n" { pass $test }
}
close
begin_strcut_test 5 8
set test "asciish characters ($from, $length)"
send "testtest\r"
expect {
-ex "%65%73%74 (3)\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
sleep 1
send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
sleep 1
expect {
-ex "%1b%24%42%25%39%1b%28%42 (8)\r\n" { pass $test }
}
close
begin_strcut_test 1 15
set test "asciish characters ($from, $length)"
send "testestestestestes\r"
expect {
"%65%73%74%65%73%74%65%73%74%65%73%74%65%73%74 (15)\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
sleep 1
expect {
-ex "%1b%24%42%25%46%25%39%25%48%25%46%1b%28%42 (14)\r\n" { pass $test }
}
close
begin_strcut_test 8 20
set test "non-asciish characters (2) ($from, $length)"
send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r"
sleep 1
expect {
-ex "%74%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%24%46%1b%28%42 (20)\r\n" {
pass $test
}
}
begin_strcut_test 8 21
set test "non-asciish characters (2) ($from, $length)"
sleep 1
send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r"
expect {
-ex "%74%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%24%46%1b%28%42 (20)\r\n" {
pass $test
}
}
begin_strcut_test 11 17
set test "non-asciish characters (2) ($from, $length)"
sleep 1
send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r"
expect {
-ex "%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%1b%28%42 (17)\r\n" {
pass $test
}
}
# vim: sts=4 sw=4 ts=4 et

91
ext/mbstring/libmbfl/tests/strcut.tests/ujis.exp

@ -0,0 +1,91 @@
#!/usr/bin/expect -f
proc begin_strcut_test {_from _length} {
global spawn_id from length
set from $_from
set length $_length
spawn tests/strcut Japanese EUC-JP $_from $_length
set timeout 1
expect_after {
"\[^\r\n\]*\r\n" { fail $test }
}
}
begin_strcut_test -1 2
set test "asciish characters ($from, $length)"
send "testtest\r"
expect {
-ex "***ERROR***\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "テストテスト\r"
expect {
-ex "***ERROR***\r\n" { pass $test }
}
close
begin_strcut_test 2 -1
set test "asciish characters ($from, $length)"
send "testtest\r"
expect {
-ex "***ERROR***\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "テストテスト\r"
expect {
-ex "***ERROR***\r\n" { pass $test }
}
close
begin_strcut_test 3 2
set test "asciish characters ($from, $length)"
send "testtest\r"
expect {
-ex "%74%74 (2)\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "テストテスト\r"
expect {
-ex "%a5%b9 (2)\r\n" { pass $test }
}
close
begin_strcut_test 5 8
set test "asciish characters ($from, $length)"
send "testtest\r"
expect {
-ex "%65%73%74 (3)\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "テストテスト\r"
expect {
-ex "%a5%c8%a5%c6%a5%b9%a5%c8 (8)\r\n" { pass $test }
}
close
begin_strcut_test 1 15
set test "asciish characters ($from, $length)"
send "testestestestestes\r"
expect {
"%65%73%74%65%73%74%65%73%74%65%73%74%65%73%74 (15)\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "テストテスト\r"
expect {
-ex "%a5%c6%a5%b9%a5%c8%a5%c6%a5%b9%a5%c8 (12)\r\n" { pass $test }
}
close
# vim: sts=4 sw=4 ts=4 et encoding=EUC-JP

91
ext/mbstring/libmbfl/tests/strcut.tests/utf8.exp

@ -0,0 +1,91 @@
#!/usr/bin/expect -f
proc begin_strcut_test {_from _length} {
global spawn_id from length
set from $_from
set length $_length
spawn tests/strcut Japanese UTF-8 $_from $_length
set timeout 1
expect_after {
"\[^\r\n\]*\r\n" { fail $test }
}
}
begin_strcut_test -1 2
set test "asciish characters ($from, $length)"
send "testtest\r"
expect {
-ex "***ERROR***\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "テストテスト\r"
expect {
-ex "***ERROR***\r\n" { pass $test }
}
close
begin_strcut_test 2 -1
set test "asciish characters ($from, $length)"
send "testtest\r"
expect {
-ex "***ERROR***\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "テストテスト\r"
expect {
-ex "***ERROR***\r\n" { pass $test }
}
close
begin_strcut_test 3 2
set test "asciish characters ($from, $length)"
send "testtest\r"
expect {
-ex "%74%74 (2)\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "テストテスト\r"
expect {
-ex "(0)\r\n" { pass $test }
}
close
begin_strcut_test 5 8
set test "asciish characters ($from, $length)"
send "testtest\r"
expect {
-ex "%65%73%74 (3)\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "テストテスト\r"
expect {
-ex "%e3%82%b9%e3%83%88 (6)\r\n" { pass $test }
}
close
begin_strcut_test 1 15
set test "asciish characters ($from, $length)"
send "testestestestestes\r"
expect {
"%65%73%74%65%73%74%65%73%74%65%73%74%65%73%74 (15)\r\n" { pass $test }
}
set test "non-asciish characters ($from, $length)"
send "テストテスト\r"
expect {
-ex "%e3%83%86%e3%82%b9%e3%83%88%e3%83%86%e3%82%b9 (15)\r\n" { pass $test }
}
close
# vim: sts=4 sw=4 ts=4 et encoding=UTF-8

79
ext/mbstring/libmbfl/tests/strwidth.c

@ -0,0 +1,79 @@
/**
* this is a small sample script to use libmbfl.
* Rui Hirokawa <hirokawa@php.net>
*
* this file is encoded in EUC-JP.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mbfl/mbfilter.h"
int main(int argc, char **argv)
{
enum mbfl_no_encoding no_encoding;
enum mbfl_no_language no_language;
mbfl_memory_device dev;
mbfl_string string;
int final = 0;
int state = 0;
if (argc < 3) {
fprintf(stderr, "Usage: %s lang encoding\n", argv[0]);
return EXIT_FAILURE;
}
if ((no_language = mbfl_name2no_language(argv[1])) ==
mbfl_no_language_invalid) {
printf("Unsupported NLS: %s\n", argv[1]);
return EXIT_FAILURE;
}
if ((no_encoding = mbfl_name2no_encoding(argv[2])) ==
mbfl_no_encoding_invalid) {
printf("Unsupported encoding: %s\n", argv[2]);
return EXIT_FAILURE;
}
do {
mbfl_memory_device_init(&dev, 0, 4096);
mbfl_string_init_set(&string, no_language, no_encoding);
for (;;) {
const int c = fgetc(stdin);
if (c == EOF) {
final = 1;
break;
} else if (c == 10) {
if (state == 1) {
state = 0;
continue;
}
break;
} else if (c == 13) {
state = 1;
break;
}
if (dev.pos >= dev.length) {
if (dev.length + dev.allocsz < dev.length) {
printf("Unable to allocate memory\n");
return EXIT_FAILURE;
}
mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
dev.allocsz);
}
dev.buffer[dev.pos++] = (unsigned char)c;
}
mbfl_memory_device_result(&dev, &string);
printf("%d\n", mbfl_strwidth(&string));
mbfl_string_clear(&string);
} while (!final);
return EXIT_SUCCESS;
}

1
ext/mbstring/libmbfl/tests/strwidth.tests/Makefile.am

@ -0,0 +1 @@
EXTRA_DIST=*.exp

47
ext/mbstring/libmbfl/tests/strwidth.tests/strwidth.exp

@ -0,0 +1,47 @@
#!/usr/bin/expect -f
spawn tests/strwidth Japanese UTF-8
set timeout 1
expect_after {
"\[^\r\n\]*\r\n" { fail $test }
}
set test "basic test"
send "testtest\r"
expect {
"8\r\n" { pass $test }
}
set test "CJK kanji"
send "漢字\r"
expect {
"4\r\n" { pass $test }
}
set test "CJK hiragana"
send "ひらがな\r"
expect {
"8\r\n" { pass $test }
}
set test "CJK katakana"
send "カタカナ\r"
expect {
"8\r\n" { pass $test }
}
set test "Fullwidth symbols (1)"
send "〜!”#$%&’())\r"
expect {
"20\r\n" { pass $test }
}
set test "Halfwidth symbols assumed to be fullwidth in JISX0208 (2)"
send "○●◎\r"
expect {
"3\r\n" { pass $test }
}
close
# vim: sts=4 sw=4 ts=4 et encoding=UTF-8

8
ext/mbstring/mbstring.c

@ -139,9 +139,16 @@ static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_cp1254,
mbfl_no_encoding_8859_9
};
static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_koi8u
};
static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8
@ -156,6 +163,7 @@ static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
{ mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
{ mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
{ mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
{ mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
{ mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
};

Loading…
Cancel
Save