You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4899 lines
137 KiB

24 years ago
25 years ago
21 years ago
20 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
20 years ago
20 years ago
20 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
18 years ago
24 years ago
24 years ago
24 years ago
24 years ago
23 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
19 years ago
19 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
21 years ago
24 years ago
24 years ago
24 years ago
20 years ago
20 years ago
24 years ago
24 years ago
24 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
24 years ago
19 years ago
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2009 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
  16. | Rui Hirokawa <hirokawa@php.net> |
  17. +----------------------------------------------------------------------+
  18. */
  19. /* $Id$ */
  20. /*
  21. * PHP 4 Multibyte String module "mbstring"
  22. *
  23. * History:
  24. * 2000.5.19 Release php-4.0RC2_jstring-1.0
  25. * 2001.4.1 Release php4_jstring-1.0.91
  26. * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
  27. * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
  28. */
  29. /*
  30. * PHP3 Internationalization support program.
  31. *
  32. * Copyright (c) 1999,2000 by the PHP3 internationalization team.
  33. * All rights reserved.
  34. *
  35. * See README_PHP3-i18n-ja for more detail.
  36. *
  37. * Authors:
  38. * Hironori Sato <satoh@jpnnet.com>
  39. * Shigeru Kanemoto <sgk@happysize.co.jp>
  40. * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
  41. * Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
  42. */
  43. /* {{{ includes */
  44. #ifdef HAVE_CONFIG_H
  45. #include "config.h"
  46. #endif
  47. #include "php.h"
  48. #include "php_ini.h"
  49. #include "php_variables.h"
  50. #include "mbstring.h"
  51. #include "ext/standard/php_string.h"
  52. #include "ext/standard/php_mail.h"
  53. #include "ext/standard/exec.h"
  54. #include "ext/standard/php_smart_str.h"
  55. #include "ext/standard/url.h"
  56. #include "main/php_output.h"
  57. #include "ext/standard/info.h"
  58. #include "libmbfl/mbfl/mbfl_allocators.h"
  59. #include "php_variables.h"
  60. #include "php_globals.h"
  61. #include "rfc1867.h"
  62. #include "php_content_types.h"
  63. #include "SAPI.h"
  64. #include "php_unicode.h"
  65. #include "TSRM.h"
  66. #include "mb_gpc.h"
  67. #if HAVE_MBREGEX
  68. #include "php_mbregex.h"
  69. #endif
  70. #ifdef ZEND_MULTIBYTE
  71. #include "zend_multibyte.h"
  72. #endif /* ZEND_MULTIBYTE */
  73. #if HAVE_ONIG
  74. #include "php_onig_compat.h"
  75. #include <oniguruma.h>
  76. #undef UChar
  77. #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
  78. #include "ext/pcre/php_pcre.h"
  79. #endif
  80. /* }}} */
  81. #if HAVE_MBSTRING
  82. /* {{{ prototypes */
  83. ZEND_DECLARE_MODULE_GLOBALS(mbstring)
  84. static PHP_GINIT_FUNCTION(mbstring);
  85. static PHP_GSHUTDOWN_FUNCTION(mbstring);
  86. #ifdef ZEND_MULTIBYTE
  87. static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC);
  88. static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC);
  89. static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC);
  90. static int php_mb_set_zend_encoding(TSRMLS_D);
  91. #endif
  92. /* }}} */
  93. /* {{{ php_mb_default_identify_list */
  94. typedef struct _php_mb_nls_ident_list {
  95. enum mbfl_no_language lang;
  96. const enum mbfl_no_encoding* list;
  97. int list_size;
  98. } php_mb_nls_ident_list;
  99. static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
  100. mbfl_no_encoding_ascii,
  101. mbfl_no_encoding_jis,
  102. mbfl_no_encoding_utf8,
  103. mbfl_no_encoding_euc_jp,
  104. mbfl_no_encoding_sjis
  105. };
  106. static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
  107. mbfl_no_encoding_ascii,
  108. mbfl_no_encoding_utf8,
  109. mbfl_no_encoding_euc_cn,
  110. mbfl_no_encoding_cp936
  111. };
  112. static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
  113. mbfl_no_encoding_ascii,
  114. mbfl_no_encoding_utf8,
  115. mbfl_no_encoding_euc_tw,
  116. mbfl_no_encoding_big5
  117. };
  118. static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
  119. mbfl_no_encoding_ascii,
  120. mbfl_no_encoding_utf8,
  121. mbfl_no_encoding_euc_kr,
  122. mbfl_no_encoding_uhc
  123. };
  124. static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
  125. mbfl_no_encoding_ascii,
  126. mbfl_no_encoding_utf8,
  127. mbfl_no_encoding_koi8r,
  128. mbfl_no_encoding_cp1251,
  129. mbfl_no_encoding_cp866
  130. };
  131. static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
  132. mbfl_no_encoding_ascii,
  133. mbfl_no_encoding_utf8,
  134. mbfl_no_encoding_armscii8
  135. };
  136. static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
  137. mbfl_no_encoding_ascii,
  138. mbfl_no_encoding_utf8,
  139. mbfl_no_encoding_cp1254,
  140. mbfl_no_encoding_8859_9
  141. };
  142. static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
  143. mbfl_no_encoding_ascii,
  144. mbfl_no_encoding_utf8,
  145. mbfl_no_encoding_koi8u
  146. };
  147. static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
  148. mbfl_no_encoding_ascii,
  149. mbfl_no_encoding_utf8
  150. };
  151. static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
  152. { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
  153. { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
  154. { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
  155. { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
  156. { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
  157. { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
  158. { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
  159. { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
  160. { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
  161. };
  162. /* }}} */
  163. /* {{{ mb_overload_def mb_ovld[] */
  164. static const struct mb_overload_def mb_ovld[] = {
  165. {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
  166. {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
  167. {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
  168. {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
  169. {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
  170. {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_stripos"},
  171. {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
  172. {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
  173. {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
  174. {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
  175. {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
  176. {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
  177. {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
  178. #if HAVE_MBREGEX
  179. {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
  180. {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
  181. {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
  182. {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
  183. {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
  184. #endif
  185. {0, NULL, NULL, NULL}
  186. };
  187. /* }}} */
  188. /* {{{ arginfo */
  189. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
  190. ZEND_ARG_INFO(0, language)
  191. ZEND_END_ARG_INFO()
  192. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
  193. ZEND_ARG_INFO(0, encoding)
  194. ZEND_END_ARG_INFO()
  195. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
  196. ZEND_ARG_INFO(0, type)
  197. ZEND_END_ARG_INFO()
  198. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
  199. ZEND_ARG_INFO(0, encoding)
  200. ZEND_END_ARG_INFO()
  201. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
  202. ZEND_ARG_INFO(0, encoding)
  203. ZEND_END_ARG_INFO()
  204. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
  205. ZEND_ARG_INFO(0, substchar)
  206. ZEND_END_ARG_INFO()
  207. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
  208. ZEND_ARG_INFO(0, encoding)
  209. ZEND_END_ARG_INFO()
  210. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
  211. ZEND_ARG_INFO(0, encoded_string)
  212. ZEND_ARG_INFO(1, result)
  213. ZEND_END_ARG_INFO()
  214. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
  215. ZEND_ARG_INFO(0, contents)
  216. ZEND_ARG_INFO(0, status)
  217. ZEND_END_ARG_INFO()
  218. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
  219. ZEND_ARG_INFO(0, str)
  220. ZEND_ARG_INFO(0, encoding)
  221. ZEND_END_ARG_INFO()
  222. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
  223. ZEND_ARG_INFO(0, haystack)
  224. ZEND_ARG_INFO(0, needle)
  225. ZEND_ARG_INFO(0, offset)
  226. ZEND_ARG_INFO(0, encoding)
  227. ZEND_END_ARG_INFO()
  228. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
  229. ZEND_ARG_INFO(0, haystack)
  230. ZEND_ARG_INFO(0, needle)
  231. ZEND_ARG_INFO(0, offset)
  232. ZEND_ARG_INFO(0, encoding)
  233. ZEND_END_ARG_INFO()
  234. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
  235. ZEND_ARG_INFO(0, haystack)
  236. ZEND_ARG_INFO(0, needle)
  237. ZEND_ARG_INFO(0, offset)
  238. ZEND_ARG_INFO(0, encoding)
  239. ZEND_END_ARG_INFO()
  240. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
  241. ZEND_ARG_INFO(0, haystack)
  242. ZEND_ARG_INFO(0, needle)
  243. ZEND_ARG_INFO(0, offset)
  244. ZEND_ARG_INFO(0, encoding)
  245. ZEND_END_ARG_INFO()
  246. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
  247. ZEND_ARG_INFO(0, haystack)
  248. ZEND_ARG_INFO(0, needle)
  249. ZEND_ARG_INFO(0, part)
  250. ZEND_ARG_INFO(0, encoding)
  251. ZEND_END_ARG_INFO()
  252. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
  253. ZEND_ARG_INFO(0, haystack)
  254. ZEND_ARG_INFO(0, needle)
  255. ZEND_ARG_INFO(0, part)
  256. ZEND_ARG_INFO(0, encoding)
  257. ZEND_END_ARG_INFO()
  258. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
  259. ZEND_ARG_INFO(0, haystack)
  260. ZEND_ARG_INFO(0, needle)
  261. ZEND_ARG_INFO(0, part)
  262. ZEND_ARG_INFO(0, encoding)
  263. ZEND_END_ARG_INFO()
  264. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
  265. ZEND_ARG_INFO(0, haystack)
  266. ZEND_ARG_INFO(0, needle)
  267. ZEND_ARG_INFO(0, part)
  268. ZEND_ARG_INFO(0, encoding)
  269. ZEND_END_ARG_INFO()
  270. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
  271. ZEND_ARG_INFO(0, haystack)
  272. ZEND_ARG_INFO(0, needle)
  273. ZEND_ARG_INFO(0, encoding)
  274. ZEND_END_ARG_INFO()
  275. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
  276. ZEND_ARG_INFO(0, str)
  277. ZEND_ARG_INFO(0, start)
  278. ZEND_ARG_INFO(0, length)
  279. ZEND_ARG_INFO(0, encoding)
  280. ZEND_END_ARG_INFO()
  281. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
  282. ZEND_ARG_INFO(0, str)
  283. ZEND_ARG_INFO(0, start)
  284. ZEND_ARG_INFO(0, length)
  285. ZEND_ARG_INFO(0, encoding)
  286. ZEND_END_ARG_INFO()
  287. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
  288. ZEND_ARG_INFO(0, str)
  289. ZEND_ARG_INFO(0, encoding)
  290. ZEND_END_ARG_INFO()
  291. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
  292. ZEND_ARG_INFO(0, str)
  293. ZEND_ARG_INFO(0, start)
  294. ZEND_ARG_INFO(0, width)
  295. ZEND_ARG_INFO(0, trimmarker)
  296. ZEND_ARG_INFO(0, encoding)
  297. ZEND_END_ARG_INFO()
  298. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
  299. ZEND_ARG_INFO(0, str)
  300. ZEND_ARG_INFO(0, to)
  301. ZEND_ARG_INFO(0, from)
  302. ZEND_END_ARG_INFO()
  303. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
  304. ZEND_ARG_INFO(0, sourcestring)
  305. ZEND_ARG_INFO(0, mode)
  306. ZEND_ARG_INFO(0, encoding)
  307. ZEND_END_ARG_INFO()
  308. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
  309. ZEND_ARG_INFO(0, sourcestring)
  310. ZEND_ARG_INFO(0, encoding)
  311. ZEND_END_ARG_INFO()
  312. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
  313. ZEND_ARG_INFO(0, sourcestring)
  314. ZEND_ARG_INFO(0, encoding)
  315. ZEND_END_ARG_INFO()
  316. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
  317. ZEND_ARG_INFO(0, str)
  318. ZEND_ARG_INFO(0, encoding_list)
  319. ZEND_ARG_INFO(0, strict)
  320. ZEND_END_ARG_INFO()
  321. ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
  322. ZEND_END_ARG_INFO()
  323. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
  324. ZEND_ARG_INFO(0, encoding)
  325. ZEND_END_ARG_INFO()
  326. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
  327. ZEND_ARG_INFO(0, str)
  328. ZEND_ARG_INFO(0, charset)
  329. ZEND_ARG_INFO(0, transfer)
  330. ZEND_ARG_INFO(0, linefeed)
  331. ZEND_ARG_INFO(0, indent)
  332. ZEND_END_ARG_INFO()
  333. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
  334. ZEND_ARG_INFO(0, string)
  335. ZEND_END_ARG_INFO()
  336. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
  337. ZEND_ARG_INFO(0, str)
  338. ZEND_ARG_INFO(0, option)
  339. ZEND_ARG_INFO(0, encoding)
  340. ZEND_END_ARG_INFO()
  341. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 1, 0, 3)
  342. ZEND_ARG_INFO(0, to)
  343. ZEND_ARG_INFO(0, from)
  344. ZEND_ARG_INFO(1, ...)
  345. ZEND_END_ARG_INFO()
  346. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
  347. ZEND_ARG_INFO(0, string)
  348. ZEND_ARG_INFO(0, convmap)
  349. ZEND_ARG_INFO(0, encoding)
  350. ZEND_END_ARG_INFO()
  351. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
  352. ZEND_ARG_INFO(0, string)
  353. ZEND_ARG_INFO(0, convmap)
  354. ZEND_ARG_INFO(0, encoding)
  355. ZEND_END_ARG_INFO()
  356. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
  357. ZEND_ARG_INFO(0, to)
  358. ZEND_ARG_INFO(0, subject)
  359. ZEND_ARG_INFO(0, message)
  360. ZEND_ARG_INFO(0, additional_headers)
  361. ZEND_ARG_INFO(0, additional_parameters)
  362. ZEND_END_ARG_INFO()
  363. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
  364. ZEND_ARG_INFO(0, type)
  365. ZEND_END_ARG_INFO()
  366. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
  367. ZEND_ARG_INFO(0, var)
  368. ZEND_ARG_INFO(0, encoding)
  369. ZEND_END_ARG_INFO()
  370. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
  371. ZEND_ARG_INFO(0, encoding)
  372. ZEND_END_ARG_INFO()
  373. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
  374. ZEND_ARG_INFO(0, pattern)
  375. ZEND_ARG_INFO(0, string)
  376. ZEND_ARG_INFO(1, registers)
  377. ZEND_END_ARG_INFO()
  378. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
  379. ZEND_ARG_INFO(0, pattern)
  380. ZEND_ARG_INFO(0, string)
  381. ZEND_ARG_INFO(1, registers)
  382. ZEND_END_ARG_INFO()
  383. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
  384. ZEND_ARG_INFO(0, pattern)
  385. ZEND_ARG_INFO(0, replacement)
  386. ZEND_ARG_INFO(0, string)
  387. ZEND_ARG_INFO(0, option)
  388. ZEND_END_ARG_INFO()
  389. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
  390. ZEND_ARG_INFO(0, pattern)
  391. ZEND_ARG_INFO(0, replacement)
  392. ZEND_ARG_INFO(0, string)
  393. ZEND_END_ARG_INFO()
  394. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
  395. ZEND_ARG_INFO(0, pattern)
  396. ZEND_ARG_INFO(0, string)
  397. ZEND_ARG_INFO(0, limit)
  398. ZEND_END_ARG_INFO()
  399. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
  400. ZEND_ARG_INFO(0, pattern)
  401. ZEND_ARG_INFO(0, string)
  402. ZEND_ARG_INFO(0, option)
  403. ZEND_END_ARG_INFO()
  404. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
  405. ZEND_ARG_INFO(0, pattern)
  406. ZEND_ARG_INFO(0, option)
  407. ZEND_END_ARG_INFO()
  408. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
  409. ZEND_ARG_INFO(0, pattern)
  410. ZEND_ARG_INFO(0, option)
  411. ZEND_END_ARG_INFO()
  412. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
  413. ZEND_ARG_INFO(0, pattern)
  414. ZEND_ARG_INFO(0, option)
  415. ZEND_END_ARG_INFO()
  416. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
  417. ZEND_ARG_INFO(0, string)
  418. ZEND_ARG_INFO(0, pattern)
  419. ZEND_ARG_INFO(0, option)
  420. ZEND_END_ARG_INFO()
  421. ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
  422. ZEND_END_ARG_INFO()
  423. ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
  424. ZEND_END_ARG_INFO()
  425. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
  426. ZEND_ARG_INFO(0, position)
  427. ZEND_END_ARG_INFO()
  428. ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
  429. ZEND_ARG_INFO(0, options)
  430. ZEND_END_ARG_INFO()
  431. /* }}} */
  432. /* {{{ zend_function_entry mbstring_functions[] */
  433. const zend_function_entry mbstring_functions[] = {
  434. PHP_FE(mb_convert_case, arginfo_mb_convert_case)
  435. PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
  436. PHP_FE(mb_strtolower, arginfo_mb_strtolower)
  437. PHP_FE(mb_language, arginfo_mb_language)
  438. PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
  439. PHP_FE(mb_http_input, arginfo_mb_http_input)
  440. PHP_FE(mb_http_output, arginfo_mb_http_output)
  441. PHP_FE(mb_detect_order, arginfo_mb_detect_order)
  442. PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
  443. PHP_FE(mb_parse_str, arginfo_mb_parse_str)
  444. PHP_FE(mb_output_handler, arginfo_mb_output_handler)
  445. PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
  446. PHP_FE(mb_strlen, arginfo_mb_strlen)
  447. PHP_FE(mb_strpos, arginfo_mb_strpos)
  448. PHP_FE(mb_strrpos, arginfo_mb_strrpos)
  449. PHP_FE(mb_stripos, arginfo_mb_stripos)
  450. PHP_FE(mb_strripos, arginfo_mb_strripos)
  451. PHP_FE(mb_strstr, arginfo_mb_strstr)
  452. PHP_FE(mb_strrchr, arginfo_mb_strrchr)
  453. PHP_FE(mb_stristr, arginfo_mb_stristr)
  454. PHP_FE(mb_strrichr, arginfo_mb_strrichr)
  455. PHP_FE(mb_substr_count, arginfo_mb_substr_count)
  456. PHP_FE(mb_substr, arginfo_mb_substr)
  457. PHP_FE(mb_strcut, arginfo_mb_strcut)
  458. PHP_FE(mb_strwidth, arginfo_mb_strwidth)
  459. PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
  460. PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
  461. PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
  462. PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
  463. PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
  464. PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
  465. PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
  466. PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
  467. PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
  468. PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
  469. PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
  470. PHP_FE(mb_send_mail, arginfo_mb_send_mail)
  471. PHP_FE(mb_get_info, arginfo_mb_get_info)
  472. PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
  473. #if HAVE_MBREGEX
  474. PHP_MBREGEX_FUNCTION_ENTRIES
  475. #endif
  476. { NULL, NULL, NULL }
  477. };
  478. /* }}} */
  479. /* {{{ zend_module_entry mbstring_module_entry */
  480. zend_module_entry mbstring_module_entry = {
  481. STANDARD_MODULE_HEADER,
  482. "mbstring",
  483. mbstring_functions,
  484. PHP_MINIT(mbstring),
  485. PHP_MSHUTDOWN(mbstring),
  486. PHP_RINIT(mbstring),
  487. PHP_RSHUTDOWN(mbstring),
  488. PHP_MINFO(mbstring),
  489. NO_VERSION_YET,
  490. PHP_MODULE_GLOBALS(mbstring),
  491. PHP_GINIT(mbstring),
  492. PHP_GSHUTDOWN(mbstring),
  493. NULL,
  494. STANDARD_MODULE_PROPERTIES_EX
  495. };
  496. /* }}} */
  497. /* {{{ static sapi_post_entry php_post_entries[] */
  498. static sapi_post_entry php_post_entries[] = {
  499. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
  500. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  501. { NULL, 0, NULL, NULL }
  502. };
  503. /* }}} */
  504. #ifdef COMPILE_DL_MBSTRING
  505. ZEND_GET_MODULE(mbstring)
  506. #endif
  507. /* {{{ allocators */
  508. static void *_php_mb_allocators_malloc(unsigned int sz)
  509. {
  510. return emalloc(sz);
  511. }
  512. static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
  513. {
  514. return erealloc(ptr, sz);
  515. }
  516. static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
  517. {
  518. return ecalloc(nelems, szelem);
  519. }
  520. static void _php_mb_allocators_free(void *ptr)
  521. {
  522. efree(ptr);
  523. }
  524. static void *_php_mb_allocators_pmalloc(unsigned int sz)
  525. {
  526. return pemalloc(sz, 1);
  527. }
  528. static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
  529. {
  530. return perealloc(ptr, sz, 1);
  531. }
  532. static void _php_mb_allocators_pfree(void *ptr)
  533. {
  534. pefree(ptr, 1);
  535. }
  536. static mbfl_allocators _php_mb_allocators = {
  537. _php_mb_allocators_malloc,
  538. _php_mb_allocators_realloc,
  539. _php_mb_allocators_calloc,
  540. _php_mb_allocators_free,
  541. _php_mb_allocators_pmalloc,
  542. _php_mb_allocators_prealloc,
  543. _php_mb_allocators_pfree
  544. };
  545. /* }}} */
  546. /* {{{ static sapi_post_entry mbstr_post_entries[] */
  547. static sapi_post_entry mbstr_post_entries[] = {
  548. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
  549. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  550. { NULL, 0, NULL, NULL }
  551. };
  552. /* }}} */
  553. /* {{{ static int php_mb_parse_encoding_list()
  554. * Return 0 if input contains any illegal encoding, otherwise 1.
  555. * Even if any illegal encoding is detected the result may contain a list
  556. * of parsed encodings.
  557. */
  558. static int
  559. php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
  560. {
  561. int n, l, size, bauto, ret = 1;
  562. char *p, *p1, *p2, *endp, *tmpstr;
  563. enum mbfl_no_encoding no_encoding;
  564. enum mbfl_no_encoding *src, *entry, *list;
  565. list = NULL;
  566. if (value == NULL || value_length <= 0) {
  567. if (return_list) {
  568. *return_list = NULL;
  569. }
  570. if (return_size) {
  571. *return_size = 0;
  572. }
  573. return 0;
  574. } else {
  575. enum mbfl_no_encoding *identify_list;
  576. int identify_list_size;
  577. identify_list = MBSTRG(default_detect_order_list);
  578. identify_list_size = MBSTRG(default_detect_order_list_size);
  579. /* copy the value string for work */
  580. if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
  581. tmpstr = (char *)estrndup(value+1, value_length-2);
  582. value_length -= 2;
  583. }
  584. else
  585. tmpstr = (char *)estrndup(value, value_length);
  586. if (tmpstr == NULL) {
  587. return 0;
  588. }
  589. /* count the number of listed encoding names */
  590. endp = tmpstr + value_length;
  591. n = 1;
  592. p1 = tmpstr;
  593. while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
  594. p1 = p2 + 1;
  595. n++;
  596. }
  597. size = n + identify_list_size;
  598. /* make list */
  599. list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
  600. if (list != NULL) {
  601. entry = list;
  602. n = 0;
  603. bauto = 0;
  604. p1 = tmpstr;
  605. do {
  606. p2 = p = php_memnstr(p1, ",", 1, endp);
  607. if (p == NULL) {
  608. p = endp;
  609. }
  610. *p = '\0';
  611. /* trim spaces */
  612. while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
  613. p1++;
  614. }
  615. p--;
  616. while (p > p1 && (*p == ' ' || *p == '\t')) {
  617. *p = '\0';
  618. p--;
  619. }
  620. /* convert to the encoding number and check encoding */
  621. if (strcasecmp(p1, "auto") == 0) {
  622. if (!bauto) {
  623. bauto = 1;
  624. l = identify_list_size;
  625. src = identify_list;
  626. while (l > 0) {
  627. *entry++ = *src++;
  628. l--;
  629. n++;
  630. }
  631. }
  632. } else {
  633. no_encoding = mbfl_name2no_encoding(p1);
  634. if (no_encoding != mbfl_no_encoding_invalid) {
  635. *entry++ = no_encoding;
  636. n++;
  637. } else {
  638. ret = 0;
  639. }
  640. }
  641. p1 = p2 + 1;
  642. } while (n < size && p2 != NULL);
  643. if (n > 0) {
  644. if (return_list) {
  645. *return_list = list;
  646. } else {
  647. pefree(list, persistent);
  648. }
  649. } else {
  650. pefree(list, persistent);
  651. if (return_list) {
  652. *return_list = NULL;
  653. }
  654. ret = 0;
  655. }
  656. if (return_size) {
  657. *return_size = n;
  658. }
  659. } else {
  660. if (return_list) {
  661. *return_list = NULL;
  662. }
  663. if (return_size) {
  664. *return_size = 0;
  665. }
  666. ret = 0;
  667. }
  668. efree(tmpstr);
  669. }
  670. return ret;
  671. }
  672. /* }}} */
  673. /* {{{ MBSTRING_API php_mb_check_encoding_list */
  674. MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
  675. return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
  676. }
  677. /* }}} */
  678. /* {{{ static int php_mb_parse_encoding_array()
  679. * Return 0 if input contains any illegal encoding, otherwise 1.
  680. * Even if any illegal encoding is detected the result may contain a list
  681. * of parsed encodings.
  682. */
  683. static int
  684. php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
  685. {
  686. zval **hash_entry;
  687. HashTable *target_hash;
  688. int i, n, l, size, bauto,ret = 1;
  689. enum mbfl_no_encoding no_encoding;
  690. enum mbfl_no_encoding *src, *list, *entry;
  691. list = NULL;
  692. if (Z_TYPE_P(array) == IS_ARRAY) {
  693. enum mbfl_no_encoding *identify_list;
  694. int identify_list_size;
  695. identify_list = MBSTRG(default_detect_order_list);
  696. identify_list_size = MBSTRG(default_detect_order_list_size);
  697. target_hash = Z_ARRVAL_P(array);
  698. zend_hash_internal_pointer_reset(target_hash);
  699. i = zend_hash_num_elements(target_hash);
  700. size = i + identify_list_size;
  701. list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
  702. if (list != NULL) {
  703. entry = list;
  704. bauto = 0;
  705. n = 0;
  706. while (i > 0) {
  707. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  708. break;
  709. }
  710. convert_to_string_ex(hash_entry);
  711. if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
  712. if (!bauto) {
  713. bauto = 1;
  714. l = identify_list_size;
  715. src = identify_list;
  716. while (l > 0) {
  717. *entry++ = *src++;
  718. l--;
  719. n++;
  720. }
  721. }
  722. } else {
  723. no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
  724. if (no_encoding != mbfl_no_encoding_invalid) {
  725. *entry++ = no_encoding;
  726. n++;
  727. } else {
  728. ret = 0;
  729. }
  730. }
  731. zend_hash_move_forward(target_hash);
  732. i--;
  733. }
  734. if (n > 0) {
  735. if (return_list) {
  736. *return_list = list;
  737. } else {
  738. pefree(list, persistent);
  739. }
  740. } else {
  741. pefree(list, persistent);
  742. if (return_list) {
  743. *return_list = NULL;
  744. }
  745. ret = 0;
  746. }
  747. if (return_size) {
  748. *return_size = n;
  749. }
  750. } else {
  751. if (return_list) {
  752. *return_list = NULL;
  753. }
  754. if (return_size) {
  755. *return_size = 0;
  756. }
  757. ret = 0;
  758. }
  759. }
  760. return ret;
  761. }
  762. /* }}} */
  763. static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
  764. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
  765. static void _php_mb_free_regex(void *opaque);
  766. #if HAVE_ONIG
  767. /* {{{ _php_mb_compile_regex */
  768. static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
  769. {
  770. php_mb_regex_t *retval;
  771. OnigErrorInfo err_info;
  772. int err_code;
  773. if ((err_code = onig_new(&retval,
  774. (const OnigUChar *)pattern,
  775. (const OnigUChar *)pattern + strlen(pattern),
  776. ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
  777. ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
  778. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  779. onig_error_code_to_str(err_str, err_code, err_info);
  780. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
  781. retval = NULL;
  782. }
  783. return retval;
  784. }
  785. /* }}} */
  786. /* {{{ _php_mb_match_regex */
  787. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
  788. {
  789. return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
  790. (const OnigUChar*)str + str_len, (const OnigUChar *)str,
  791. (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
  792. }
  793. /* }}} */
  794. /* {{{ _php_mb_free_regex */
  795. static void _php_mb_free_regex(void *opaque)
  796. {
  797. onig_free((php_mb_regex_t *)opaque);
  798. }
  799. /* }}} */
  800. #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
  801. /* {{{ _php_mb_compile_regex */
  802. static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
  803. {
  804. pcre *retval;
  805. const char *err_str;
  806. int err_offset;
  807. if (!(retval = pcre_compile(pattern,
  808. PCRE_CASELESS, &err_str, &err_offset, NULL))) {
  809. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
  810. }
  811. return retval;
  812. }
  813. /* }}} */
  814. /* {{{ _php_mb_match_regex */
  815. static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
  816. {
  817. return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
  818. 0, NULL, 0) >= 0;
  819. }
  820. /* }}} */
  821. /* {{{ _php_mb_free_regex */
  822. static void _php_mb_free_regex(void *opaque)
  823. {
  824. pcre_free(opaque);
  825. }
  826. /* }}} */
  827. #endif
  828. /* {{{ php_mb_nls_get_default_detect_order_list */
  829. static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size)
  830. {
  831. size_t i;
  832. *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  833. *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  834. for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
  835. if (php_mb_default_identify_list[i].lang == lang) {
  836. *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
  837. *plist_size = php_mb_default_identify_list[i].list_size;
  838. return 1;
  839. }
  840. }
  841. return 0;
  842. }
  843. /* }}} */
  844. /* {{{ php.ini directive handler */
  845. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
  846. static PHP_INI_MH(OnUpdate_mbstring_language)
  847. {
  848. enum mbfl_no_language no_language;
  849. no_language = mbfl_name2no_language(new_value);
  850. if (no_language == mbfl_no_language_invalid) {
  851. MBSTRG(language) = mbfl_no_language_neutral;
  852. return FAILURE;
  853. }
  854. MBSTRG(language) = no_language;
  855. php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
  856. return SUCCESS;
  857. }
  858. /* }}} */
  859. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
  860. static PHP_INI_MH(OnUpdate_mbstring_detect_order)
  861. {
  862. enum mbfl_no_encoding *list;
  863. int size;
  864. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  865. if (MBSTRG(detect_order_list)) {
  866. free(MBSTRG(detect_order_list));
  867. }
  868. MBSTRG(detect_order_list) = list;
  869. MBSTRG(detect_order_list_size) = size;
  870. } else {
  871. if (MBSTRG(detect_order_list)) {
  872. free(MBSTRG(detect_order_list));
  873. MBSTRG(detect_order_list) = NULL;
  874. }
  875. return FAILURE;
  876. }
  877. return SUCCESS;
  878. }
  879. /* }}} */
  880. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
  881. static PHP_INI_MH(OnUpdate_mbstring_http_input)
  882. {
  883. enum mbfl_no_encoding *list;
  884. int size;
  885. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  886. if (MBSTRG(http_input_list)) {
  887. free(MBSTRG(http_input_list));
  888. }
  889. MBSTRG(http_input_list) = list;
  890. MBSTRG(http_input_list_size) = size;
  891. } else {
  892. if (MBSTRG(http_input_list)) {
  893. free(MBSTRG(http_input_list));
  894. MBSTRG(http_input_list) = NULL;
  895. }
  896. MBSTRG(http_input_list_size) = 0;
  897. return FAILURE;
  898. }
  899. return SUCCESS;
  900. }
  901. /* }}} */
  902. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
  903. static PHP_INI_MH(OnUpdate_mbstring_http_output)
  904. {
  905. enum mbfl_no_encoding no_encoding;
  906. no_encoding = mbfl_name2no_encoding(new_value);
  907. if (no_encoding != mbfl_no_encoding_invalid) {
  908. MBSTRG(http_output_encoding) = no_encoding;
  909. MBSTRG(current_http_output_encoding) = no_encoding;
  910. } else {
  911. MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
  912. MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
  913. if (new_value != NULL && new_value_length > 0) {
  914. return FAILURE;
  915. }
  916. }
  917. return SUCCESS;
  918. }
  919. /* }}} */
  920. /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
  921. int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
  922. {
  923. enum mbfl_no_encoding no_encoding;
  924. const char *enc_name = NULL;
  925. uint enc_name_len = 0;
  926. no_encoding = new_value ? mbfl_name2no_encoding(new_value):
  927. mbfl_no_encoding_invalid;
  928. if (no_encoding != mbfl_no_encoding_invalid) {
  929. enc_name = new_value;
  930. enc_name_len = new_value_length;
  931. } else {
  932. switch (MBSTRG(language)) {
  933. case mbfl_no_language_uni:
  934. enc_name = "UTF-8";
  935. enc_name_len = sizeof("UTF-8") - 1;
  936. break;
  937. case mbfl_no_language_japanese:
  938. enc_name = "EUC-JP";
  939. enc_name_len = sizeof("EUC-JP") - 1;
  940. break;
  941. case mbfl_no_language_korean:
  942. enc_name = "EUC-KR";
  943. enc_name_len = sizeof("EUC-KR") - 1;
  944. break;
  945. case mbfl_no_language_simplified_chinese:
  946. enc_name = "EUC-CN";
  947. enc_name_len = sizeof("EUC-CN") - 1;
  948. break;
  949. case mbfl_no_language_traditional_chinese:
  950. enc_name = "EUC-TW";
  951. enc_name_len = sizeof("EUC-TW") - 1;
  952. break;
  953. case mbfl_no_language_russian:
  954. enc_name = "KOI8-R";
  955. enc_name_len = sizeof("KOI8-R") - 1;
  956. break;
  957. case mbfl_no_language_german:
  958. enc_name = "ISO-8859-15";
  959. enc_name_len = sizeof("ISO-8859-15") - 1;
  960. break;
  961. case mbfl_no_language_armenian:
  962. enc_name = "ArmSCII-8";
  963. enc_name_len = sizeof("ArmSCII-8") - 1;
  964. break;
  965. case mbfl_no_language_turkish:
  966. enc_name = "ISO-8859-9";
  967. enc_name_len = sizeof("ISO-8859-9") - 1;
  968. break;
  969. default:
  970. enc_name = "ISO-8859-1";
  971. enc_name_len = sizeof("ISO-8859-1") - 1;
  972. break;
  973. }
  974. no_encoding = mbfl_name2no_encoding(enc_name);
  975. }
  976. MBSTRG(internal_encoding) = no_encoding;
  977. MBSTRG(current_internal_encoding) = no_encoding;
  978. #if HAVE_MBREGEX
  979. {
  980. const char *enc_name = new_value;
  981. if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
  982. /* falls back to EUC-JP if an unknown encoding name is given */
  983. enc_name = "EUC-JP";
  984. php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
  985. }
  986. php_mb_regex_set_mbctype(new_value TSRMLS_CC);
  987. }
  988. #endif
  989. return SUCCESS;
  990. }
  991. /* }}} */
  992. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
  993. static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
  994. {
  995. if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
  996. || stage == PHP_INI_STAGE_RUNTIME) {
  997. return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
  998. } else {
  999. /* the corresponding mbstring globals needs to be set according to the
  1000. * ini value in the later stage because it never falls back to the
  1001. * default value if 1. no value for mbstring.internal_encoding is given,
  1002. * 2. mbstring.language directive is processed in per-dir or runtime
  1003. * context and 3. call to the handler for mbstring.language is done
  1004. * after mbstring.internal_encoding is handled. */
  1005. return SUCCESS;
  1006. }
  1007. }
  1008. /* }}} */
  1009. #ifdef ZEND_MULTIBYTE
  1010. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */
  1011. static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
  1012. {
  1013. int *list, size;
  1014. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  1015. if (MBSTRG(script_encoding_list) != NULL) {
  1016. free(MBSTRG(script_encoding_list));
  1017. }
  1018. MBSTRG(script_encoding_list) = list;
  1019. MBSTRG(script_encoding_list_size) = size;
  1020. } else {
  1021. if (MBSTRG(script_encoding_list) != NULL) {
  1022. free(MBSTRG(script_encoding_list));
  1023. }
  1024. MBSTRG(script_encoding_list) = NULL;
  1025. MBSTRG(script_encoding_list_size) = 0;
  1026. return FAILURE;
  1027. }
  1028. return SUCCESS;
  1029. }
  1030. /* }}} */
  1031. #endif /* ZEND_MULTIBYTE */
  1032. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
  1033. static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
  1034. {
  1035. int c;
  1036. char *endptr = NULL;
  1037. if (new_value != NULL) {
  1038. if (strcasecmp("none", new_value) == 0) {
  1039. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1040. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1041. } else if (strcasecmp("long", new_value) == 0) {
  1042. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1043. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1044. } else if (strcasecmp("entity", new_value) == 0) {
  1045. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  1046. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  1047. } else {
  1048. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1049. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1050. if (new_value_length >0) {
  1051. c = strtol(new_value, &endptr, 0);
  1052. if (*endptr == '\0') {
  1053. MBSTRG(filter_illegal_substchar) = c;
  1054. MBSTRG(current_filter_illegal_substchar) = c;
  1055. }
  1056. }
  1057. }
  1058. } else {
  1059. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1060. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1061. MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
  1062. MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
  1063. }
  1064. return SUCCESS;
  1065. }
  1066. /* }}} */
  1067. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
  1068. static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
  1069. {
  1070. if (new_value == NULL) {
  1071. return FAILURE;
  1072. }
  1073. OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
  1074. if (MBSTRG(encoding_translation)) {
  1075. sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
  1076. sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
  1077. } else {
  1078. sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
  1079. sapi_register_post_entries(php_post_entries TSRMLS_CC);
  1080. }
  1081. return SUCCESS;
  1082. }
  1083. /* }}} */
  1084. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
  1085. static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
  1086. {
  1087. zval tmp;
  1088. void *re = NULL;
  1089. if (!new_value) {
  1090. new_value = entry->orig_value;
  1091. new_value_length = entry->orig_value_length;
  1092. }
  1093. php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
  1094. if (Z_STRLEN(tmp) > 0) {
  1095. if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
  1096. zval_dtor(&tmp);
  1097. return FAILURE;
  1098. }
  1099. }
  1100. if (MBSTRG(http_output_conv_mimetypes)) {
  1101. _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
  1102. }
  1103. MBSTRG(http_output_conv_mimetypes) = re;
  1104. zval_dtor(&tmp);
  1105. return SUCCESS;
  1106. }
  1107. /* }}} */
  1108. /* }}} */
  1109. /* {{{ php.ini directive registration */
  1110. PHP_INI_BEGIN()
  1111. PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
  1112. PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
  1113. PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
  1114. PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
  1115. PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
  1116. #ifdef ZEND_MULTIBYTE
  1117. PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
  1118. #endif /* ZEND_MULTIBYTE */
  1119. PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
  1120. STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
  1121. PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
  1122. STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
  1123. PHP_INI_SYSTEM | PHP_INI_PERDIR,
  1124. OnUpdate_mbstring_encoding_translation,
  1125. encoding_translation, zend_mbstring_globals, mbstring_globals)
  1126. PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
  1127. "^(text/|application/xhtml\\+xml)",
  1128. PHP_INI_ALL,
  1129. OnUpdate_mbstring_http_output_conv_mimetypes)
  1130. STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
  1131. PHP_INI_ALL,
  1132. OnUpdateLong,
  1133. strict_detection, zend_mbstring_globals, mbstring_globals)
  1134. PHP_INI_END()
  1135. /* }}} */
  1136. /* {{{ module global initialize handler */
  1137. static PHP_GINIT_FUNCTION(mbstring)
  1138. {
  1139. mbstring_globals->language = mbfl_no_language_uni;
  1140. mbstring_globals->internal_encoding = mbfl_no_encoding_invalid;
  1141. mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
  1142. #ifdef ZEND_MULTIBYTE
  1143. mbstring_globals->script_encoding_list = NULL;
  1144. mbstring_globals->script_encoding_list_size = 0;
  1145. #endif /* ZEND_MULTIBYTE */
  1146. mbstring_globals->http_output_encoding = mbfl_no_encoding_pass;
  1147. mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass;
  1148. mbstring_globals->http_input_identify = mbfl_no_encoding_invalid;
  1149. mbstring_globals->http_input_identify_get = mbfl_no_encoding_invalid;
  1150. mbstring_globals->http_input_identify_post = mbfl_no_encoding_invalid;
  1151. mbstring_globals->http_input_identify_cookie = mbfl_no_encoding_invalid;
  1152. mbstring_globals->http_input_identify_string = mbfl_no_encoding_invalid;
  1153. mbstring_globals->http_input_list = NULL;
  1154. mbstring_globals->http_input_list_size = 0;
  1155. mbstring_globals->detect_order_list = NULL;
  1156. mbstring_globals->detect_order_list_size = 0;
  1157. mbstring_globals->current_detect_order_list = NULL;
  1158. mbstring_globals->current_detect_order_list_size = 0;
  1159. mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  1160. mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  1161. mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1162. mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
  1163. mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1164. mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
  1165. mbstring_globals->illegalchars = 0;
  1166. mbstring_globals->func_overload = 0;
  1167. mbstring_globals->encoding_translation = 0;
  1168. mbstring_globals->strict_detection = 0;
  1169. mbstring_globals->outconv = NULL;
  1170. mbstring_globals->http_output_conv_mimetypes = NULL;
  1171. #if HAVE_MBREGEX
  1172. mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
  1173. #endif
  1174. }
  1175. /* }}} */
  1176. /* {{{ PHP_GSHUTDOWN_FUNCTION */
  1177. static PHP_GSHUTDOWN_FUNCTION(mbstring)
  1178. {
  1179. if (mbstring_globals->http_input_list) {
  1180. free(mbstring_globals->http_input_list);
  1181. }
  1182. #ifdef ZEND_MULTIBYTE
  1183. if (mbstring_globals->script_encoding_list) {
  1184. free(mbstring_globals->script_encoding_list);
  1185. }
  1186. #endif /* ZEND_MULTIBYTE */
  1187. if (mbstring_globals->detect_order_list) {
  1188. free(mbstring_globals->detect_order_list);
  1189. }
  1190. if (mbstring_globals->http_output_conv_mimetypes) {
  1191. _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
  1192. }
  1193. #if HAVE_MBREGEX
  1194. php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
  1195. #endif
  1196. }
  1197. /* }}} */
  1198. /* {{{ PHP_MINIT_FUNCTION(mbstring) */
  1199. PHP_MINIT_FUNCTION(mbstring)
  1200. {
  1201. __mbfl_allocators = &_php_mb_allocators;
  1202. REGISTER_INI_ENTRIES();
  1203. /* This is a global handler. Should not be set in a per-request handler. */
  1204. sapi_register_treat_data(mbstr_treat_data);
  1205. /* Post handlers are stored in the thread-local context. */
  1206. if (MBSTRG(encoding_translation)) {
  1207. sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
  1208. }
  1209. REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
  1210. REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
  1211. REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
  1212. REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
  1213. REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
  1214. REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
  1215. #if HAVE_MBREGEX
  1216. PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1217. #endif
  1218. return SUCCESS;
  1219. }
  1220. /* }}} */
  1221. /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
  1222. PHP_MSHUTDOWN_FUNCTION(mbstring)
  1223. {
  1224. UNREGISTER_INI_ENTRIES();
  1225. #if HAVE_MBREGEX
  1226. PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1227. #endif
  1228. return SUCCESS;
  1229. }
  1230. /* }}} */
  1231. /* {{{ PHP_RINIT_FUNCTION(mbstring) */
  1232. PHP_RINIT_FUNCTION(mbstring)
  1233. {
  1234. int n;
  1235. enum mbfl_no_encoding *list=NULL, *entry;
  1236. zend_function *func, *orig;
  1237. const struct mb_overload_def *p;
  1238. MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
  1239. MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
  1240. MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
  1241. MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
  1242. MBSTRG(illegalchars) = 0;
  1243. n = 0;
  1244. if (MBSTRG(detect_order_list)) {
  1245. list = MBSTRG(detect_order_list);
  1246. n = MBSTRG(detect_order_list_size);
  1247. }
  1248. if (n <= 0) {
  1249. list = MBSTRG(default_detect_order_list);
  1250. n = MBSTRG(default_detect_order_list_size);
  1251. }
  1252. entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0);
  1253. MBSTRG(current_detect_order_list) = entry;
  1254. MBSTRG(current_detect_order_list_size) = n;
  1255. while (n > 0) {
  1256. *entry++ = *list++;
  1257. n--;
  1258. }
  1259. /* override original function. */
  1260. if (MBSTRG(func_overload)){
  1261. p = &(mb_ovld[0]);
  1262. while (p->type > 0) {
  1263. if ((MBSTRG(func_overload) & p->type) == p->type &&
  1264. zend_hash_find(EG(function_table), p->save_func,
  1265. strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
  1266. zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
  1267. if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
  1268. php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
  1269. return FAILURE;
  1270. } else {
  1271. zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
  1272. if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
  1273. NULL) == FAILURE) {
  1274. php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
  1275. return FAILURE;
  1276. }
  1277. }
  1278. }
  1279. p++;
  1280. }
  1281. }
  1282. #if HAVE_MBREGEX
  1283. PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1284. #endif
  1285. #ifdef ZEND_MULTIBYTE
  1286. zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC);
  1287. php_mb_set_zend_encoding(TSRMLS_C);
  1288. #endif /* ZEND_MULTIBYTE */
  1289. return SUCCESS;
  1290. }
  1291. /* }}} */
  1292. /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
  1293. PHP_RSHUTDOWN_FUNCTION(mbstring)
  1294. {
  1295. const struct mb_overload_def *p;
  1296. zend_function *orig;
  1297. if (MBSTRG(current_detect_order_list) != NULL) {
  1298. efree(MBSTRG(current_detect_order_list));
  1299. MBSTRG(current_detect_order_list) = NULL;
  1300. MBSTRG(current_detect_order_list_size) = 0;
  1301. }
  1302. if (MBSTRG(outconv) != NULL) {
  1303. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1304. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1305. MBSTRG(outconv) = NULL;
  1306. }
  1307. /* clear http input identification. */
  1308. MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
  1309. MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
  1310. MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
  1311. MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
  1312. MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
  1313. /* clear overloaded function. */
  1314. if (MBSTRG(func_overload)){
  1315. p = &(mb_ovld[0]);
  1316. while (p->type > 0) {
  1317. if ((MBSTRG(func_overload) & p->type) == p->type &&
  1318. zend_hash_find(EG(function_table), p->save_func,
  1319. strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
  1320. zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
  1321. zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
  1322. }
  1323. p++;
  1324. }
  1325. }
  1326. #if HAVE_MBREGEX
  1327. PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  1328. #endif
  1329. return SUCCESS;
  1330. }
  1331. /* }}} */
  1332. /* {{{ PHP_MINFO_FUNCTION(mbstring) */
  1333. PHP_MINFO_FUNCTION(mbstring)
  1334. {
  1335. php_info_print_table_start();
  1336. php_info_print_table_row(2, "Multibyte Support", "enabled");
  1337. php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
  1338. php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
  1339. php_info_print_table_end();
  1340. php_info_print_table_start();
  1341. php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
  1342. php_info_print_table_end();
  1343. #if HAVE_MBREGEX
  1344. PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
  1345. #endif
  1346. DISPLAY_INI_ENTRIES();
  1347. }
  1348. /* }}} */
  1349. /* {{{ proto string mb_language([string language])
  1350. Sets the current language or Returns the current language as a string */
  1351. PHP_FUNCTION(mb_language)
  1352. {
  1353. char *name = NULL;
  1354. int name_len = 0;
  1355. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
  1356. return;
  1357. }
  1358. if (name == NULL) {
  1359. RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
  1360. } else {
  1361. if (FAILURE == zend_alter_ini_entry(
  1362. "mbstring.language", sizeof("mbstring.language"),
  1363. name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
  1364. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
  1365. RETVAL_FALSE;
  1366. } else {
  1367. RETVAL_TRUE;
  1368. }
  1369. }
  1370. }
  1371. /* }}} */
  1372. /* {{{ proto string mb_internal_encoding([string encoding])
  1373. Sets the current internal encoding or Returns the current internal encoding as a string */
  1374. PHP_FUNCTION(mb_internal_encoding)
  1375. {
  1376. char *name = NULL;
  1377. int name_len;
  1378. enum mbfl_no_encoding no_encoding;
  1379. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
  1380. RETURN_FALSE;
  1381. }
  1382. if (name == NULL) {
  1383. name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
  1384. if (name != NULL) {
  1385. RETURN_STRING(name, 1);
  1386. } else {
  1387. RETURN_FALSE;
  1388. }
  1389. } else {
  1390. no_encoding = mbfl_name2no_encoding(name);
  1391. if (no_encoding == mbfl_no_encoding_invalid) {
  1392. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1393. RETURN_FALSE;
  1394. } else {
  1395. MBSTRG(current_internal_encoding) = no_encoding;
  1396. #ifdef ZEND_MULTIBYTE
  1397. /* TODO: make independent from mbstring.encoding_translation? */
  1398. if (MBSTRG(encoding_translation)) {
  1399. zend_multibyte_set_internal_encoding(name TSRMLS_CC);
  1400. }
  1401. #endif /* ZEND_MULTIBYTE */
  1402. RETURN_TRUE;
  1403. }
  1404. }
  1405. }
  1406. /* }}} */
  1407. /* {{{ proto mixed mb_http_input([string type])
  1408. Returns the input encoding */
  1409. PHP_FUNCTION(mb_http_input)
  1410. {
  1411. char *typ = NULL;
  1412. int typ_len;
  1413. int retname, n;
  1414. char *name, *list, *temp;
  1415. enum mbfl_no_encoding *entry;
  1416. enum mbfl_no_encoding result = mbfl_no_encoding_invalid;
  1417. retname = 1;
  1418. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
  1419. RETURN_FALSE;
  1420. }
  1421. if (typ == NULL) {
  1422. result = MBSTRG(http_input_identify);
  1423. } else {
  1424. switch (*typ) {
  1425. case 'G':
  1426. case 'g':
  1427. result = MBSTRG(http_input_identify_get);
  1428. break;
  1429. case 'P':
  1430. case 'p':
  1431. result = MBSTRG(http_input_identify_post);
  1432. break;
  1433. case 'C':
  1434. case 'c':
  1435. result = MBSTRG(http_input_identify_cookie);
  1436. break;
  1437. case 'S':
  1438. case 's':
  1439. result = MBSTRG(http_input_identify_string);
  1440. break;
  1441. case 'I':
  1442. case 'i':
  1443. array_init(return_value);
  1444. entry = MBSTRG(http_input_list);
  1445. n = MBSTRG(http_input_list_size);
  1446. while (n > 0) {
  1447. name = (char *)mbfl_no_encoding2name(*entry);
  1448. if (name) {
  1449. add_next_index_string(return_value, name, 1);
  1450. }
  1451. entry++;
  1452. n--;
  1453. }
  1454. retname = 0;
  1455. break;
  1456. case 'L':
  1457. case 'l':
  1458. entry = MBSTRG(http_input_list);
  1459. n = MBSTRG(http_input_list_size);
  1460. list = NULL;
  1461. while (n > 0) {
  1462. name = (char *)mbfl_no_encoding2name(*entry);
  1463. if (name) {
  1464. if (list) {
  1465. temp = list;
  1466. spprintf(&list, 0, "%s,%s", temp, name);
  1467. efree(temp);
  1468. if (!list) {
  1469. break;
  1470. }
  1471. } else {
  1472. list = estrdup(name);
  1473. }
  1474. }
  1475. entry++;
  1476. n--;
  1477. }
  1478. if (!list) {
  1479. RETURN_FALSE;
  1480. }
  1481. RETVAL_STRING(list, 0);
  1482. retname = 0;
  1483. break;
  1484. default:
  1485. result = MBSTRG(http_input_identify);
  1486. break;
  1487. }
  1488. }
  1489. if (retname) {
  1490. if (result != mbfl_no_encoding_invalid &&
  1491. (name = (char *)mbfl_no_encoding2name(result)) != NULL) {
  1492. RETVAL_STRING(name, 1);
  1493. } else {
  1494. RETVAL_FALSE;
  1495. }
  1496. }
  1497. }
  1498. /* }}} */
  1499. /* {{{ proto string mb_http_output([string encoding])
  1500. Sets the current output_encoding or returns the current output_encoding as a string */
  1501. PHP_FUNCTION(mb_http_output)
  1502. {
  1503. char *name = NULL;
  1504. int name_len;
  1505. enum mbfl_no_encoding no_encoding;
  1506. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
  1507. RETURN_FALSE;
  1508. }
  1509. if (name == NULL) {
  1510. name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding));
  1511. if (name != NULL) {
  1512. RETURN_STRING(name, 1);
  1513. } else {
  1514. RETURN_FALSE;
  1515. }
  1516. } else {
  1517. no_encoding = mbfl_name2no_encoding(name);
  1518. if (no_encoding == mbfl_no_encoding_invalid) {
  1519. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1520. RETURN_FALSE;
  1521. } else {
  1522. MBSTRG(current_http_output_encoding) = no_encoding;
  1523. RETURN_TRUE;
  1524. }
  1525. }
  1526. }
  1527. /* }}} */
  1528. /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
  1529. Sets the current detect_order or Return the current detect_order as a array */
  1530. PHP_FUNCTION(mb_detect_order)
  1531. {
  1532. zval **arg1 = NULL;
  1533. int n, size;
  1534. enum mbfl_no_encoding *list, *entry;
  1535. char *name;
  1536. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
  1537. return;
  1538. }
  1539. if (!arg1) {
  1540. array_init(return_value);
  1541. entry = MBSTRG(current_detect_order_list);
  1542. n = MBSTRG(current_detect_order_list_size);
  1543. while (n > 0) {
  1544. name = (char *)mbfl_no_encoding2name(*entry);
  1545. if (name) {
  1546. add_next_index_string(return_value, name, 1);
  1547. }
  1548. entry++;
  1549. n--;
  1550. }
  1551. } else {
  1552. list = NULL;
  1553. size = 0;
  1554. switch (Z_TYPE_PP(arg1)) {
  1555. case IS_ARRAY:
  1556. if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
  1557. if (list) {
  1558. efree(list);
  1559. }
  1560. RETURN_FALSE;
  1561. }
  1562. break;
  1563. default:
  1564. convert_to_string_ex(arg1);
  1565. if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
  1566. if (list) {
  1567. efree(list);
  1568. }
  1569. RETURN_FALSE;
  1570. }
  1571. break;
  1572. }
  1573. if (list == NULL) {
  1574. RETURN_FALSE;
  1575. }
  1576. if (MBSTRG(current_detect_order_list)) {
  1577. efree(MBSTRG(current_detect_order_list));
  1578. }
  1579. MBSTRG(current_detect_order_list) = list;
  1580. MBSTRG(current_detect_order_list_size) = size;
  1581. RETURN_TRUE;
  1582. }
  1583. }
  1584. /* }}} */
  1585. /* {{{ proto mixed mb_substitute_character([mixed substchar])
  1586. Sets the current substitute_character or returns the current substitute_character */
  1587. PHP_FUNCTION(mb_substitute_character)
  1588. {
  1589. zval **arg1 = NULL;
  1590. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
  1591. return;
  1592. }
  1593. if (!arg1) {
  1594. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  1595. RETURN_STRING("none", 1);
  1596. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  1597. RETURN_STRING("long", 1);
  1598. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  1599. RETURN_STRING("entity", 1);
  1600. } else {
  1601. RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
  1602. }
  1603. } else {
  1604. RETVAL_TRUE;
  1605. switch (Z_TYPE_PP(arg1)) {
  1606. case IS_STRING:
  1607. if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
  1608. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1609. } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
  1610. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1611. } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
  1612. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
  1613. } else {
  1614. convert_to_long_ex(arg1);
  1615. if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
  1616. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1617. MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
  1618. } else {
  1619. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
  1620. RETURN_FALSE;
  1621. }
  1622. }
  1623. break;
  1624. default:
  1625. convert_to_long_ex(arg1);
  1626. if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
  1627. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1628. MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
  1629. } else {
  1630. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
  1631. RETURN_FALSE;
  1632. }
  1633. break;
  1634. }
  1635. }
  1636. }
  1637. /* }}} */
  1638. /* {{{ proto string mb_preferred_mime_name(string encoding)
  1639. Return the preferred MIME name (charset) as a string */
  1640. PHP_FUNCTION(mb_preferred_mime_name)
  1641. {
  1642. enum mbfl_no_encoding no_encoding;
  1643. char *name = NULL;
  1644. int name_len;
  1645. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
  1646. return;
  1647. } else {
  1648. no_encoding = mbfl_name2no_encoding(name);
  1649. if (no_encoding == mbfl_no_encoding_invalid) {
  1650. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1651. RETVAL_FALSE;
  1652. } else {
  1653. const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
  1654. if (preferred_name == NULL || *preferred_name == '\0') {
  1655. php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
  1656. RETVAL_FALSE;
  1657. } else {
  1658. RETVAL_STRING((char *)preferred_name, 1);
  1659. }
  1660. }
  1661. }
  1662. }
  1663. /* }}} */
  1664. #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
  1665. #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
  1666. /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
  1667. Parses GET/POST/COOKIE data and sets global variables */
  1668. PHP_FUNCTION(mb_parse_str)
  1669. {
  1670. zval *track_vars_array = NULL;
  1671. char *encstr = NULL;
  1672. int encstr_len;
  1673. php_mb_encoding_handler_info_t info;
  1674. enum mbfl_no_encoding detected;
  1675. track_vars_array = NULL;
  1676. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
  1677. return;
  1678. }
  1679. /* Clear out the array */
  1680. if (track_vars_array != NULL) {
  1681. zval_dtor(track_vars_array);
  1682. array_init(track_vars_array);
  1683. }
  1684. encstr = estrndup(encstr, encstr_len);
  1685. info.data_type = PARSE_STRING;
  1686. info.separator = PG(arg_separator).input;
  1687. info.force_register_globals = (track_vars_array == NULL);
  1688. info.report_errors = 1;
  1689. info.to_encoding = MBSTRG(current_internal_encoding);
  1690. info.to_language = MBSTRG(language);
  1691. info.from_encodings = MBSTRG(http_input_list);
  1692. info.num_from_encodings = MBSTRG(http_input_list_size);
  1693. info.from_language = MBSTRG(language);
  1694. detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
  1695. MBSTRG(http_input_identify) = detected;
  1696. RETVAL_BOOL(detected != mbfl_no_encoding_invalid);
  1697. if (encstr != NULL) efree(encstr);
  1698. }
  1699. /* }}} */
  1700. /* {{{ proto string mb_output_handler(string contents, int status)
  1701. Returns string in output buffer converted to the http_output encoding */
  1702. PHP_FUNCTION(mb_output_handler)
  1703. {
  1704. char *arg_string;
  1705. int arg_string_len;
  1706. long arg_status;
  1707. mbfl_string string, result;
  1708. const char *charset;
  1709. char *p;
  1710. enum mbfl_no_encoding encoding;
  1711. int last_feed, len;
  1712. unsigned char send_text_mimetype = 0;
  1713. char *s, *mimetype = NULL;
  1714. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
  1715. return;
  1716. }
  1717. encoding = MBSTRG(current_http_output_encoding);
  1718. /* start phase only */
  1719. if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
  1720. /* delete the converter just in case. */
  1721. if (MBSTRG(outconv)) {
  1722. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1723. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1724. MBSTRG(outconv) = NULL;
  1725. }
  1726. if (encoding == mbfl_no_encoding_pass) {
  1727. RETURN_STRINGL(arg_string, arg_string_len, 1);
  1728. }
  1729. /* analyze mime type */
  1730. if (SG(sapi_headers).mimetype &&
  1731. _php_mb_match_regex(
  1732. MBSTRG(http_output_conv_mimetypes),
  1733. SG(sapi_headers).mimetype,
  1734. strlen(SG(sapi_headers).mimetype))) {
  1735. if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
  1736. mimetype = estrdup(SG(sapi_headers).mimetype);
  1737. } else {
  1738. mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
  1739. }
  1740. send_text_mimetype = 1;
  1741. } else if (SG(sapi_headers).send_default_content_type) {
  1742. mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
  1743. }
  1744. /* if content-type is not yet set, set it and activate the converter */
  1745. if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
  1746. charset = mbfl_no2preferred_mime_name(encoding);
  1747. if (charset) {
  1748. len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
  1749. if (sapi_add_header(p, len, 0) != FAILURE) {
  1750. SG(sapi_headers).send_default_content_type = 0;
  1751. }
  1752. }
  1753. /* activate the converter */
  1754. MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
  1755. if (send_text_mimetype){
  1756. efree(mimetype);
  1757. }
  1758. }
  1759. }
  1760. /* just return if the converter is not activated. */
  1761. if (MBSTRG(outconv) == NULL) {
  1762. RETURN_STRINGL(arg_string, arg_string_len, 1);
  1763. }
  1764. /* flag */
  1765. last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
  1766. /* mode */
  1767. mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
  1768. mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
  1769. /* feed the string */
  1770. mbfl_string_init(&string);
  1771. string.no_language = MBSTRG(language);
  1772. string.no_encoding = MBSTRG(current_internal_encoding);
  1773. string.val = (unsigned char *)arg_string;
  1774. string.len = arg_string_len;
  1775. mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
  1776. if (last_feed) {
  1777. mbfl_buffer_converter_flush(MBSTRG(outconv));
  1778. }
  1779. /* get the converter output, and return it */
  1780. mbfl_buffer_converter_result(MBSTRG(outconv), &result);
  1781. RETVAL_STRINGL((char *)result.val, result.len, 0); /* the string is already strdup()'ed */
  1782. /* delete the converter if it is the last feed. */
  1783. if (last_feed) {
  1784. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
  1785. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1786. MBSTRG(outconv) = NULL;
  1787. }
  1788. }
  1789. /* }}} */
  1790. /* {{{ proto int mb_strlen(string str [, string encoding])
  1791. Get character numbers of a string */
  1792. PHP_FUNCTION(mb_strlen)
  1793. {
  1794. int n;
  1795. mbfl_string string;
  1796. char *enc_name = NULL;
  1797. int enc_name_len;
  1798. mbfl_string_init(&string);
  1799. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
  1800. RETURN_FALSE;
  1801. }
  1802. string.no_language = MBSTRG(language);
  1803. if (enc_name == NULL) {
  1804. string.no_encoding = MBSTRG(current_internal_encoding);
  1805. } else {
  1806. string.no_encoding = mbfl_name2no_encoding(enc_name);
  1807. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1808. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1809. RETURN_FALSE;
  1810. }
  1811. }
  1812. n = mbfl_strlen(&string);
  1813. if (n >= 0) {
  1814. RETVAL_LONG(n);
  1815. } else {
  1816. RETVAL_FALSE;
  1817. }
  1818. }
  1819. /* }}} */
  1820. /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
  1821. Find position of first occurrence of a string within another */
  1822. PHP_FUNCTION(mb_strpos)
  1823. {
  1824. int n, reverse = 0;
  1825. long offset;
  1826. mbfl_string haystack, needle;
  1827. char *enc_name = NULL;
  1828. int enc_name_len;
  1829. mbfl_string_init(&haystack);
  1830. mbfl_string_init(&needle);
  1831. haystack.no_language = MBSTRG(language);
  1832. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1833. needle.no_language = MBSTRG(language);
  1834. needle.no_encoding = MBSTRG(current_internal_encoding);
  1835. offset = 0;
  1836. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
  1837. RETURN_FALSE;
  1838. }
  1839. if (enc_name != NULL) {
  1840. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1841. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1842. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1843. RETURN_FALSE;
  1844. }
  1845. }
  1846. if (offset < 0 || offset > mbfl_strlen(&haystack)) {
  1847. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
  1848. RETURN_FALSE;
  1849. }
  1850. if (needle.len == 0) {
  1851. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
  1852. RETURN_FALSE;
  1853. }
  1854. n = mbfl_strpos(&haystack, &needle, offset, reverse);
  1855. if (n >= 0) {
  1856. RETVAL_LONG(n);
  1857. } else {
  1858. switch (-n) {
  1859. case 1:
  1860. break;
  1861. case 2:
  1862. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
  1863. break;
  1864. case 4:
  1865. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
  1866. break;
  1867. case 8:
  1868. php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
  1869. break;
  1870. default:
  1871. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
  1872. break;
  1873. }
  1874. RETVAL_FALSE;
  1875. }
  1876. }
  1877. /* }}} */
  1878. /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
  1879. Find position of last occurrence of a string within another */
  1880. PHP_FUNCTION(mb_strrpos)
  1881. {
  1882. int n;
  1883. mbfl_string haystack, needle;
  1884. char *enc_name = NULL;
  1885. int enc_name_len;
  1886. zval **zoffset = NULL;
  1887. long offset = 0, str_flg;
  1888. char *enc_name2 = NULL;
  1889. int enc_name_len2;
  1890. mbfl_string_init(&haystack);
  1891. mbfl_string_init(&needle);
  1892. haystack.no_language = MBSTRG(language);
  1893. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1894. needle.no_language = MBSTRG(language);
  1895. needle.no_encoding = MBSTRG(current_internal_encoding);
  1896. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
  1897. RETURN_FALSE;
  1898. }
  1899. if (zoffset) {
  1900. if (Z_TYPE_PP(zoffset) == IS_STRING) {
  1901. enc_name2 = Z_STRVAL_PP(zoffset);
  1902. enc_name_len2 = Z_STRLEN_PP(zoffset);
  1903. str_flg = 1;
  1904. if (enc_name2 != NULL) {
  1905. switch (*enc_name2) {
  1906. case '0':
  1907. case '1':
  1908. case '2':
  1909. case '3':
  1910. case '4':
  1911. case '5':
  1912. case '6':
  1913. case '7':
  1914. case '8':
  1915. case '9':
  1916. case ' ':
  1917. case '-':
  1918. case '.':
  1919. break;
  1920. default :
  1921. str_flg = 0;
  1922. break;
  1923. }
  1924. }
  1925. if (str_flg) {
  1926. convert_to_long_ex(zoffset);
  1927. offset = Z_LVAL_PP(zoffset);
  1928. } else {
  1929. enc_name = enc_name2;
  1930. enc_name_len = enc_name_len2;
  1931. }
  1932. } else {
  1933. convert_to_long_ex(zoffset);
  1934. offset = Z_LVAL_PP(zoffset);
  1935. }
  1936. }
  1937. if (enc_name != NULL) {
  1938. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1939. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1940. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1941. RETURN_FALSE;
  1942. }
  1943. }
  1944. if (haystack.len <= 0) {
  1945. RETURN_FALSE;
  1946. }
  1947. if (needle.len <= 0) {
  1948. RETURN_FALSE;
  1949. }
  1950. {
  1951. int haystack_char_len = mbfl_strlen(&haystack);
  1952. if ((offset > 0 && offset > haystack_char_len) ||
  1953. (offset < 0 && -offset > haystack_char_len)) {
  1954. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
  1955. RETURN_FALSE;
  1956. }
  1957. }
  1958. n = mbfl_strpos(&haystack, &needle, offset, 1);
  1959. if (n >= 0) {
  1960. RETVAL_LONG(n);
  1961. } else {
  1962. RETVAL_FALSE;
  1963. }
  1964. }
  1965. /* }}} */
  1966. /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
  1967. Finds position of first occurrence of a string within another, case insensitive */
  1968. PHP_FUNCTION(mb_stripos)
  1969. {
  1970. int n;
  1971. long offset;
  1972. mbfl_string haystack, needle;
  1973. char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1974. int from_encoding_len;
  1975. n = -1;
  1976. offset = 0;
  1977. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
  1978. RETURN_FALSE;
  1979. }
  1980. if (needle.len == 0) {
  1981. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
  1982. RETURN_FALSE;
  1983. }
  1984. n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
  1985. if (n >= 0) {
  1986. RETVAL_LONG(n);
  1987. } else {
  1988. RETVAL_FALSE;
  1989. }
  1990. }
  1991. /* }}} */
  1992. /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
  1993. Finds position of last occurrence of a string within another, case insensitive */
  1994. PHP_FUNCTION(mb_strripos)
  1995. {
  1996. int n;
  1997. long offset;
  1998. mbfl_string haystack, needle;
  1999. const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2000. int from_encoding_len;
  2001. n = -1;
  2002. offset = 0;
  2003. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
  2004. RETURN_FALSE;
  2005. }
  2006. n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
  2007. if (n >= 0) {
  2008. RETVAL_LONG(n);
  2009. } else {
  2010. RETVAL_FALSE;
  2011. }
  2012. }
  2013. /* }}} */
  2014. /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
  2015. Finds first occurrence of a string within another */
  2016. PHP_FUNCTION(mb_strstr)
  2017. {
  2018. int n, len, mblen;
  2019. mbfl_string haystack, needle, result, *ret = NULL;
  2020. char *enc_name = NULL;
  2021. int enc_name_len;
  2022. zend_bool part = 0;
  2023. mbfl_string_init(&haystack);
  2024. mbfl_string_init(&needle);
  2025. haystack.no_language = MBSTRG(language);
  2026. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2027. needle.no_language = MBSTRG(language);
  2028. needle.no_encoding = MBSTRG(current_internal_encoding);
  2029. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
  2030. RETURN_FALSE;
  2031. }
  2032. if (enc_name != NULL) {
  2033. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  2034. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2035. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  2036. RETURN_FALSE;
  2037. }
  2038. }
  2039. if (needle.len <= 0) {
  2040. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
  2041. RETURN_FALSE;
  2042. }
  2043. n = mbfl_strpos(&haystack, &needle, 0, 0);
  2044. if (n >= 0) {
  2045. mblen = mbfl_strlen(&haystack);
  2046. if (part) {
  2047. ret = mbfl_substr(&haystack, &result, 0, n);
  2048. if (ret != NULL) {
  2049. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2050. } else {
  2051. RETVAL_FALSE;
  2052. }
  2053. } else {
  2054. len = (mblen - n);
  2055. ret = mbfl_substr(&haystack, &result, n, len);
  2056. if (ret != NULL) {
  2057. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2058. } else {
  2059. RETVAL_FALSE;
  2060. }
  2061. }
  2062. } else {
  2063. RETVAL_FALSE;
  2064. }
  2065. }
  2066. /* }}} */
  2067. /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
  2068. Finds the last occurrence of a character in a string within another */
  2069. PHP_FUNCTION(mb_strrchr)
  2070. {
  2071. int n, len, mblen;
  2072. mbfl_string haystack, needle, result, *ret = NULL;
  2073. char *enc_name = NULL;
  2074. int enc_name_len;
  2075. zend_bool part = 0;
  2076. mbfl_string_init(&haystack);
  2077. mbfl_string_init(&needle);
  2078. haystack.no_language = MBSTRG(language);
  2079. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2080. needle.no_language = MBSTRG(language);
  2081. needle.no_encoding = MBSTRG(current_internal_encoding);
  2082. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
  2083. RETURN_FALSE;
  2084. }
  2085. if (enc_name != NULL) {
  2086. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  2087. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2088. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  2089. RETURN_FALSE;
  2090. }
  2091. }
  2092. if (haystack.len <= 0) {
  2093. RETURN_FALSE;
  2094. }
  2095. if (needle.len <= 0) {
  2096. RETURN_FALSE;
  2097. }
  2098. n = mbfl_strpos(&haystack, &needle, 0, 1);
  2099. if (n >= 0) {
  2100. mblen = mbfl_strlen(&haystack);
  2101. if (part) {
  2102. ret = mbfl_substr(&haystack, &result, 0, n);
  2103. if (ret != NULL) {
  2104. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2105. } else {
  2106. RETVAL_FALSE;
  2107. }
  2108. } else {
  2109. len = (mblen - n);
  2110. ret = mbfl_substr(&haystack, &result, n, len);
  2111. if (ret != NULL) {
  2112. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2113. } else {
  2114. RETVAL_FALSE;
  2115. }
  2116. }
  2117. } else {
  2118. RETVAL_FALSE;
  2119. }
  2120. }
  2121. /* }}} */
  2122. /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
  2123. Finds first occurrence of a string within another, case insensitive */
  2124. PHP_FUNCTION(mb_stristr)
  2125. {
  2126. zend_bool part = 0;
  2127. unsigned int from_encoding_len, len, mblen;
  2128. int n;
  2129. mbfl_string haystack, needle, result, *ret = NULL;
  2130. const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2131. mbfl_string_init(&haystack);
  2132. mbfl_string_init(&needle);
  2133. haystack.no_language = MBSTRG(language);
  2134. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2135. needle.no_language = MBSTRG(language);
  2136. needle.no_encoding = MBSTRG(current_internal_encoding);
  2137. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
  2138. RETURN_FALSE;
  2139. }
  2140. if (!needle.len) {
  2141. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
  2142. RETURN_FALSE;
  2143. }
  2144. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
  2145. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2146. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
  2147. RETURN_FALSE;
  2148. }
  2149. n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
  2150. if (n <0) {
  2151. RETURN_FALSE;
  2152. }
  2153. mblen = mbfl_strlen(&haystack);
  2154. if (part) {
  2155. ret = mbfl_substr(&haystack, &result, 0, n);
  2156. if (ret != NULL) {
  2157. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2158. } else {
  2159. RETVAL_FALSE;
  2160. }
  2161. } else {
  2162. len = (mblen - n);
  2163. ret = mbfl_substr(&haystack, &result, n, len);
  2164. if (ret != NULL) {
  2165. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2166. } else {
  2167. RETVAL_FALSE;
  2168. }
  2169. }
  2170. }
  2171. /* }}} */
  2172. /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
  2173. Finds the last occurrence of a character in a string within another, case insensitive */
  2174. PHP_FUNCTION(mb_strrichr)
  2175. {
  2176. zend_bool part = 0;
  2177. int n, from_encoding_len, len, mblen;
  2178. mbfl_string haystack, needle, result, *ret = NULL;
  2179. char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2180. mbfl_string_init(&haystack);
  2181. mbfl_string_init(&needle);
  2182. haystack.no_language = MBSTRG(language);
  2183. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2184. needle.no_language = MBSTRG(language);
  2185. needle.no_encoding = MBSTRG(current_internal_encoding);
  2186. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
  2187. RETURN_FALSE;
  2188. }
  2189. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
  2190. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2191. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
  2192. RETURN_FALSE;
  2193. }
  2194. n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
  2195. if (n <0) {
  2196. RETURN_FALSE;
  2197. }
  2198. mblen = mbfl_strlen(&haystack);
  2199. if (part) {
  2200. ret = mbfl_substr(&haystack, &result, 0, n);
  2201. if (ret != NULL) {
  2202. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2203. } else {
  2204. RETVAL_FALSE;
  2205. }
  2206. } else {
  2207. len = (mblen - n);
  2208. ret = mbfl_substr(&haystack, &result, n, len);
  2209. if (ret != NULL) {
  2210. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2211. } else {
  2212. RETVAL_FALSE;
  2213. }
  2214. }
  2215. }
  2216. /* }}} */
  2217. /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
  2218. Count the number of substring occurrences */
  2219. PHP_FUNCTION(mb_substr_count)
  2220. {
  2221. int n;
  2222. mbfl_string haystack, needle;
  2223. char *enc_name = NULL;
  2224. int enc_name_len;
  2225. mbfl_string_init(&haystack);
  2226. mbfl_string_init(&needle);
  2227. haystack.no_language = MBSTRG(language);
  2228. haystack.no_encoding = MBSTRG(current_internal_encoding);
  2229. needle.no_language = MBSTRG(language);
  2230. needle.no_encoding = MBSTRG(current_internal_encoding);
  2231. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
  2232. return;
  2233. }
  2234. if (enc_name != NULL) {
  2235. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  2236. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  2237. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  2238. RETURN_FALSE;
  2239. }
  2240. }
  2241. if (needle.len <= 0) {
  2242. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
  2243. RETURN_FALSE;
  2244. }
  2245. n = mbfl_substr_count(&haystack, &needle);
  2246. if (n >= 0) {
  2247. RETVAL_LONG(n);
  2248. } else {
  2249. RETVAL_FALSE;
  2250. }
  2251. }
  2252. /* }}} */
  2253. /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
  2254. Returns part of a string */
  2255. PHP_FUNCTION(mb_substr)
  2256. {
  2257. size_t argc = ZEND_NUM_ARGS();
  2258. char *str, *encoding;
  2259. long from, len;
  2260. int mblen, str_len, encoding_len;
  2261. mbfl_string string, result, *ret;
  2262. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", &str, &str_len, &from, &len, &encoding, &encoding_len) == FAILURE) {
  2263. return;
  2264. }
  2265. mbfl_string_init(&string);
  2266. string.no_language = MBSTRG(language);
  2267. string.no_encoding = MBSTRG(current_internal_encoding);
  2268. if (argc == 4) {
  2269. string.no_encoding = mbfl_name2no_encoding(encoding);
  2270. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2271. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  2272. RETURN_FALSE;
  2273. }
  2274. }
  2275. string.val = (unsigned char *)str;
  2276. string.len = str_len;
  2277. if (argc < 3) {
  2278. len = str_len;
  2279. }
  2280. /* measures length */
  2281. mblen = 0;
  2282. if (from < 0 || len < 0) {
  2283. mblen = mbfl_strlen(&string);
  2284. }
  2285. /* if "from" position is negative, count start position from the end
  2286. * of the string
  2287. */
  2288. if (from < 0) {
  2289. from = mblen + from;
  2290. if (from < 0) {
  2291. from = 0;
  2292. }
  2293. }
  2294. /* if "length" position is negative, set it to the length
  2295. * needed to stop that many chars from the end of the string
  2296. */
  2297. if (len < 0) {
  2298. len = (mblen - from) + len;
  2299. if (len < 0) {
  2300. len = 0;
  2301. }
  2302. }
  2303. if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
  2304. && (from >= mbfl_strlen(&string))) {
  2305. RETURN_FALSE;
  2306. }
  2307. ret = mbfl_substr(&string, &result, from, len);
  2308. if (NULL == ret) {
  2309. RETURN_FALSE;
  2310. }
  2311. RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2312. }
  2313. /* }}} */
  2314. /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
  2315. Returns part of a string */
  2316. PHP_FUNCTION(mb_strcut)
  2317. {
  2318. size_t argc = ZEND_NUM_ARGS();
  2319. char *encoding;
  2320. long from, len;
  2321. int encoding_len;
  2322. mbfl_string string, result, *ret;
  2323. mbfl_string_init(&string);
  2324. string.no_language = MBSTRG(language);
  2325. string.no_encoding = MBSTRG(current_internal_encoding);
  2326. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) {
  2327. return;
  2328. }
  2329. if (argc == 4) {
  2330. string.no_encoding = mbfl_name2no_encoding(encoding);
  2331. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2332. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  2333. RETURN_FALSE;
  2334. }
  2335. }
  2336. if (argc < 3) {
  2337. len = string.len;
  2338. }
  2339. /* if "from" position is negative, count start position from the end
  2340. * of the string
  2341. */
  2342. if (from < 0) {
  2343. from = string.len + from;
  2344. if (from < 0) {
  2345. from = 0;
  2346. }
  2347. }
  2348. /* if "length" position is negative, set it to the length
  2349. * needed to stop that many chars from the end of the string
  2350. */
  2351. if (len < 0) {
  2352. len = (string.len - from) + len;
  2353. if (len < 0) {
  2354. len = 0;
  2355. }
  2356. }
  2357. if ((unsigned int)from > string.len) {
  2358. RETURN_FALSE;
  2359. }
  2360. if (((unsigned int)from + (unsigned int)len) > string.len) {
  2361. len = string.len - from;
  2362. }
  2363. ret = mbfl_strcut(&string, &result, from, len);
  2364. if (ret == NULL) {
  2365. RETURN_FALSE;
  2366. }
  2367. RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2368. }
  2369. /* }}} */
  2370. /* {{{ proto int mb_strwidth(string str [, string encoding])
  2371. Gets terminal width of a string */
  2372. PHP_FUNCTION(mb_strwidth)
  2373. {
  2374. int n;
  2375. mbfl_string string;
  2376. char *enc_name = NULL;
  2377. int enc_name_len;
  2378. mbfl_string_init(&string);
  2379. string.no_language = MBSTRG(language);
  2380. string.no_encoding = MBSTRG(current_internal_encoding);
  2381. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
  2382. return;
  2383. }
  2384. if (enc_name != NULL) {
  2385. string.no_encoding = mbfl_name2no_encoding(enc_name);
  2386. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2387. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  2388. RETURN_FALSE;
  2389. }
  2390. }
  2391. n = mbfl_strwidth(&string);
  2392. if (n >= 0) {
  2393. RETVAL_LONG(n);
  2394. } else {
  2395. RETVAL_FALSE;
  2396. }
  2397. }
  2398. /* }}} */
  2399. /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
  2400. Trim the string in terminal width */
  2401. PHP_FUNCTION(mb_strimwidth)
  2402. {
  2403. char *str, *trimmarker, *encoding;
  2404. long from, width;
  2405. int str_len, trimmarker_len, encoding_len;
  2406. mbfl_string string, result, marker, *ret;
  2407. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
  2408. return;
  2409. }
  2410. mbfl_string_init(&string);
  2411. mbfl_string_init(&marker);
  2412. string.no_language = MBSTRG(language);
  2413. string.no_encoding = MBSTRG(current_internal_encoding);
  2414. marker.no_language = MBSTRG(language);
  2415. marker.no_encoding = MBSTRG(current_internal_encoding);
  2416. marker.val = NULL;
  2417. marker.len = 0;
  2418. if (ZEND_NUM_ARGS() == 5) {
  2419. string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
  2420. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2421. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  2422. RETURN_FALSE;
  2423. }
  2424. }
  2425. string.val = (unsigned char *)str;
  2426. string.len = str_len;
  2427. if (from < 0 || from > str_len) {
  2428. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
  2429. RETURN_FALSE;
  2430. }
  2431. if (width < 0) {
  2432. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
  2433. RETURN_FALSE;
  2434. }
  2435. if (ZEND_NUM_ARGS() >= 4) {
  2436. marker.val = (unsigned char *)trimmarker;
  2437. marker.len = trimmarker_len;
  2438. }
  2439. ret = mbfl_strimwidth(&string, &marker, &result, from, width);
  2440. if (ret == NULL) {
  2441. RETURN_FALSE;
  2442. }
  2443. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2444. }
  2445. /* }}} */
  2446. /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
  2447. MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
  2448. {
  2449. mbfl_string string, result, *ret;
  2450. enum mbfl_no_encoding from_encoding, to_encoding;
  2451. mbfl_buffer_converter *convd;
  2452. int size, *list;
  2453. char *output=NULL;
  2454. if (output_len) {
  2455. *output_len = 0;
  2456. }
  2457. if (!input) {
  2458. return NULL;
  2459. }
  2460. /* new encoding */
  2461. if (_to_encoding && strlen(_to_encoding)) {
  2462. to_encoding = mbfl_name2no_encoding(_to_encoding);
  2463. if (to_encoding == mbfl_no_encoding_invalid) {
  2464. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
  2465. return NULL;
  2466. }
  2467. } else {
  2468. to_encoding = MBSTRG(current_internal_encoding);
  2469. }
  2470. /* initialize string */
  2471. mbfl_string_init(&string);
  2472. mbfl_string_init(&result);
  2473. from_encoding = MBSTRG(current_internal_encoding);
  2474. string.no_encoding = from_encoding;
  2475. string.no_language = MBSTRG(language);
  2476. string.val = (unsigned char *)input;
  2477. string.len = length;
  2478. /* pre-conversion encoding */
  2479. if (_from_encodings) {
  2480. list = NULL;
  2481. size = 0;
  2482. php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
  2483. if (size == 1) {
  2484. from_encoding = *list;
  2485. string.no_encoding = from_encoding;
  2486. } else if (size > 1) {
  2487. /* auto detect */
  2488. from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection));
  2489. if (from_encoding != mbfl_no_encoding_invalid) {
  2490. string.no_encoding = from_encoding;
  2491. } else {
  2492. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
  2493. from_encoding = mbfl_no_encoding_pass;
  2494. to_encoding = from_encoding;
  2495. string.no_encoding = from_encoding;
  2496. }
  2497. } else {
  2498. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
  2499. }
  2500. if (list != NULL) {
  2501. efree((void *)list);
  2502. }
  2503. }
  2504. /* initialize converter */
  2505. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  2506. if (convd == NULL) {
  2507. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
  2508. return NULL;
  2509. }
  2510. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  2511. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  2512. /* do it */
  2513. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  2514. if (ret) {
  2515. if (output_len) {
  2516. *output_len = ret->len;
  2517. }
  2518. output = (char *)ret->val;
  2519. }
  2520. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  2521. mbfl_buffer_converter_delete(convd);
  2522. return output;
  2523. }
  2524. /* }}} */
  2525. /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
  2526. Returns converted string in desired encoding */
  2527. PHP_FUNCTION(mb_convert_encoding)
  2528. {
  2529. char *arg_str, *arg_new;
  2530. int str_len, new_len;
  2531. zval *arg_old;
  2532. int i;
  2533. size_t size, l, n;
  2534. char *_from_encodings = NULL, *ret, *s_free = NULL;
  2535. zval **hash_entry;
  2536. HashTable *target_hash;
  2537. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
  2538. return;
  2539. }
  2540. if (ZEND_NUM_ARGS() == 3) {
  2541. switch (Z_TYPE_P(arg_old)) {
  2542. case IS_ARRAY:
  2543. target_hash = Z_ARRVAL_P(arg_old);
  2544. zend_hash_internal_pointer_reset(target_hash);
  2545. i = zend_hash_num_elements(target_hash);
  2546. _from_encodings = NULL;
  2547. while (i > 0) {
  2548. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  2549. break;
  2550. }
  2551. convert_to_string_ex(hash_entry);
  2552. if ( _from_encodings) {
  2553. l = strlen(_from_encodings);
  2554. n = strlen(Z_STRVAL_PP(hash_entry));
  2555. _from_encodings = erealloc(_from_encodings, l+n+2);
  2556. strcpy(_from_encodings+l, ",");
  2557. strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
  2558. } else {
  2559. _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
  2560. }
  2561. zend_hash_move_forward(target_hash);
  2562. i--;
  2563. }
  2564. if (_from_encodings != NULL && !strlen(_from_encodings)) {
  2565. efree(_from_encodings);
  2566. _from_encodings = NULL;
  2567. }
  2568. s_free = _from_encodings;
  2569. break;
  2570. default:
  2571. convert_to_string(arg_old);
  2572. _from_encodings = Z_STRVAL_P(arg_old);
  2573. break;
  2574. }
  2575. }
  2576. /* new encoding */
  2577. ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
  2578. if (ret != NULL) {
  2579. RETVAL_STRINGL(ret, size, 0); /* the string is already strdup()'ed */
  2580. } else {
  2581. RETVAL_FALSE;
  2582. }
  2583. if ( s_free) {
  2584. efree(s_free);
  2585. }
  2586. }
  2587. /* }}} */
  2588. /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
  2589. Returns a case-folded version of sourcestring */
  2590. PHP_FUNCTION(mb_convert_case)
  2591. {
  2592. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2593. int str_len, from_encoding_len;
  2594. long case_mode = 0;
  2595. char *newstr;
  2596. size_t ret_len;
  2597. RETVAL_FALSE;
  2598. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
  2599. &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
  2600. RETURN_FALSE;
  2601. newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  2602. if (newstr) {
  2603. RETVAL_STRINGL(newstr, ret_len, 0);
  2604. }
  2605. }
  2606. /* }}} */
  2607. /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
  2608. * Returns a uppercased version of sourcestring
  2609. */
  2610. PHP_FUNCTION(mb_strtoupper)
  2611. {
  2612. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2613. int str_len, from_encoding_len;
  2614. char *newstr;
  2615. size_t ret_len;
  2616. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
  2617. &from_encoding, &from_encoding_len) == FAILURE) {
  2618. return;
  2619. }
  2620. newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  2621. if (newstr) {
  2622. RETURN_STRINGL(newstr, ret_len, 0);
  2623. }
  2624. RETURN_FALSE;
  2625. }
  2626. /* }}} */
  2627. /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
  2628. * Returns a lowercased version of sourcestring
  2629. */
  2630. PHP_FUNCTION(mb_strtolower)
  2631. {
  2632. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  2633. int str_len, from_encoding_len;
  2634. char *newstr;
  2635. size_t ret_len;
  2636. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
  2637. &from_encoding, &from_encoding_len) == FAILURE) {
  2638. return;
  2639. }
  2640. newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  2641. if (newstr) {
  2642. RETURN_STRINGL(newstr, ret_len, 0);
  2643. }
  2644. RETURN_FALSE;
  2645. }
  2646. /* }}} */
  2647. /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
  2648. Encodings of the given string is returned (as a string) */
  2649. PHP_FUNCTION(mb_detect_encoding)
  2650. {
  2651. char *str;
  2652. int str_len;
  2653. zend_bool strict=0;
  2654. zval *encoding_list;
  2655. mbfl_string string;
  2656. const char *ret;
  2657. enum mbfl_no_encoding *elist;
  2658. int size, *list;
  2659. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
  2660. return;
  2661. }
  2662. /* make encoding list */
  2663. list = NULL;
  2664. size = 0;
  2665. if (ZEND_NUM_ARGS() >= 2 && Z_STRVAL_P(encoding_list)) {
  2666. switch (Z_TYPE_P(encoding_list)) {
  2667. case IS_ARRAY:
  2668. if (!php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
  2669. if (list) {
  2670. efree(list);
  2671. list = NULL;
  2672. size = 0;
  2673. }
  2674. }
  2675. break;
  2676. default:
  2677. convert_to_string(encoding_list);
  2678. if (!php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
  2679. if (list) {
  2680. efree(list);
  2681. list = NULL;
  2682. size = 0;
  2683. }
  2684. }
  2685. break;
  2686. }
  2687. if (size <= 0) {
  2688. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
  2689. }
  2690. }
  2691. if (ZEND_NUM_ARGS() < 3) {
  2692. strict = (zend_bool)MBSTRG(strict_detection);
  2693. }
  2694. if (size > 0 && list != NULL) {
  2695. elist = list;
  2696. } else {
  2697. elist = MBSTRG(current_detect_order_list);
  2698. size = MBSTRG(current_detect_order_list_size);
  2699. }
  2700. mbfl_string_init(&string);
  2701. string.no_language = MBSTRG(language);
  2702. string.val = (unsigned char *)str;
  2703. string.len = str_len;
  2704. ret = mbfl_identify_encoding_name(&string, elist, size, strict);
  2705. if (list != NULL) {
  2706. efree((void *)list);
  2707. }
  2708. if (ret == NULL) {
  2709. RETURN_FALSE;
  2710. }
  2711. RETVAL_STRING((char *)ret, 1);
  2712. }
  2713. /* }}} */
  2714. /* {{{ proto mixed mb_list_encodings()
  2715. Returns an array of all supported entity encodings */
  2716. PHP_FUNCTION(mb_list_encodings)
  2717. {
  2718. const mbfl_encoding **encodings;
  2719. const mbfl_encoding *encoding;
  2720. int i;
  2721. array_init(return_value);
  2722. i = 0;
  2723. encodings = mbfl_get_supported_encodings();
  2724. while ((encoding = encodings[i++]) != NULL) {
  2725. add_next_index_string(return_value, (char *) encoding->name, 1);
  2726. }
  2727. }
  2728. /* }}} */
  2729. /* {{{ proto array mb_encoding_aliases(string encoding)
  2730. Returns an array of the aliases of a given encoding name */
  2731. PHP_FUNCTION(mb_encoding_aliases)
  2732. {
  2733. const mbfl_encoding *encoding;
  2734. char *name = NULL;
  2735. int name_len;
  2736. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
  2737. RETURN_FALSE;
  2738. }
  2739. encoding = mbfl_name2encoding(name);
  2740. if (!encoding) {
  2741. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  2742. RETURN_FALSE;
  2743. }
  2744. array_init(return_value);
  2745. if (encoding->aliases != NULL) {
  2746. const char **alias;
  2747. for (alias = *encoding->aliases; *alias; ++alias) {
  2748. add_next_index_string(return_value, (char *)*alias, 1);
  2749. }
  2750. }
  2751. }
  2752. /* }}} */
  2753. /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
  2754. Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
  2755. PHP_FUNCTION(mb_encode_mimeheader)
  2756. {
  2757. enum mbfl_no_encoding charset, transenc;
  2758. mbfl_string string, result, *ret;
  2759. char *charset_name = NULL;
  2760. int charset_name_len;
  2761. char *trans_enc_name = NULL;
  2762. int trans_enc_name_len;
  2763. char *linefeed = "\r\n";
  2764. int linefeed_len;
  2765. long indent = 0;
  2766. mbfl_string_init(&string);
  2767. string.no_language = MBSTRG(language);
  2768. string.no_encoding = MBSTRG(current_internal_encoding);
  2769. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
  2770. return;
  2771. }
  2772. charset = mbfl_no_encoding_pass;
  2773. transenc = mbfl_no_encoding_base64;
  2774. if (charset_name != NULL) {
  2775. charset = mbfl_name2no_encoding(charset_name);
  2776. if (charset == mbfl_no_encoding_invalid) {
  2777. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
  2778. RETURN_FALSE;
  2779. }
  2780. } else {
  2781. const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
  2782. if (lang != NULL) {
  2783. charset = lang->mail_charset;
  2784. transenc = lang->mail_header_encoding;
  2785. }
  2786. }
  2787. if (trans_enc_name != NULL) {
  2788. if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
  2789. transenc = mbfl_no_encoding_base64;
  2790. } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
  2791. transenc = mbfl_no_encoding_qprint;
  2792. }
  2793. }
  2794. mbfl_string_init(&result);
  2795. ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
  2796. if (ret != NULL) {
  2797. RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
  2798. } else {
  2799. RETVAL_FALSE;
  2800. }
  2801. }
  2802. /* }}} */
  2803. /* {{{ proto string mb_decode_mimeheader(string string)
  2804. Decodes the MIME "encoded-word" in the string */
  2805. PHP_FUNCTION(mb_decode_mimeheader)
  2806. {
  2807. mbfl_string string, result, *ret;
  2808. mbfl_string_init(&string);
  2809. string.no_language = MBSTRG(language);
  2810. string.no_encoding = MBSTRG(current_internal_encoding);
  2811. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
  2812. return;
  2813. }
  2814. mbfl_string_init(&result);
  2815. ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
  2816. if (ret != NULL) {
  2817. RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
  2818. } else {
  2819. RETVAL_FALSE;
  2820. }
  2821. }
  2822. /* }}} */
  2823. /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
  2824. Conversion between full-width character and half-width character (Japanese) */
  2825. PHP_FUNCTION(mb_convert_kana)
  2826. {
  2827. int opt, i;
  2828. mbfl_string string, result, *ret;
  2829. char *optstr = NULL;
  2830. int optstr_len;
  2831. char *encname = NULL;
  2832. int encname_len;
  2833. mbfl_string_init(&string);
  2834. string.no_language = MBSTRG(language);
  2835. string.no_encoding = MBSTRG(current_internal_encoding);
  2836. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
  2837. return;
  2838. }
  2839. /* option */
  2840. if (optstr != NULL) {
  2841. char *p = optstr;
  2842. int n = optstr_len;
  2843. i = 0;
  2844. opt = 0;
  2845. while (i < n) {
  2846. i++;
  2847. switch (*p++) {
  2848. case 'A':
  2849. opt |= 0x1;
  2850. break;
  2851. case 'a':
  2852. opt |= 0x10;
  2853. break;
  2854. case 'R':
  2855. opt |= 0x2;
  2856. break;
  2857. case 'r':
  2858. opt |= 0x20;
  2859. break;
  2860. case 'N':
  2861. opt |= 0x4;
  2862. break;
  2863. case 'n':
  2864. opt |= 0x40;
  2865. break;
  2866. case 'S':
  2867. opt |= 0x8;
  2868. break;
  2869. case 's':
  2870. opt |= 0x80;
  2871. break;
  2872. case 'K':
  2873. opt |= 0x100;
  2874. break;
  2875. case 'k':
  2876. opt |= 0x1000;
  2877. break;
  2878. case 'H':
  2879. opt |= 0x200;
  2880. break;
  2881. case 'h':
  2882. opt |= 0x2000;
  2883. break;
  2884. case 'V':
  2885. opt |= 0x800;
  2886. break;
  2887. case 'C':
  2888. opt |= 0x10000;
  2889. break;
  2890. case 'c':
  2891. opt |= 0x20000;
  2892. break;
  2893. case 'M':
  2894. opt |= 0x100000;
  2895. break;
  2896. case 'm':
  2897. opt |= 0x200000;
  2898. break;
  2899. }
  2900. }
  2901. } else {
  2902. opt = 0x900;
  2903. }
  2904. /* encoding */
  2905. if (encname != NULL) {
  2906. string.no_encoding = mbfl_name2no_encoding(encname);
  2907. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2908. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
  2909. RETURN_FALSE;
  2910. }
  2911. }
  2912. ret = mbfl_ja_jp_hantozen(&string, &result, opt);
  2913. if (ret != NULL) {
  2914. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2915. } else {
  2916. RETVAL_FALSE;
  2917. }
  2918. }
  2919. /* }}} */
  2920. #define PHP_MBSTR_STACK_BLOCK_SIZE 32
  2921. /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
  2922. Converts the string resource in variables to desired encoding */
  2923. PHP_FUNCTION(mb_convert_variables)
  2924. {
  2925. zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
  2926. HashTable *target_hash;
  2927. mbfl_string string, result, *ret;
  2928. enum mbfl_no_encoding from_encoding, to_encoding;
  2929. mbfl_encoding_detector *identd;
  2930. mbfl_buffer_converter *convd;
  2931. int n, to_enc_len, argc, stack_level, stack_max, elistsz;
  2932. enum mbfl_no_encoding *elist;
  2933. char *name, *to_enc;
  2934. void *ptmp;
  2935. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
  2936. return;
  2937. }
  2938. /* new encoding */
  2939. to_encoding = mbfl_name2no_encoding(to_enc);
  2940. if (to_encoding == mbfl_no_encoding_invalid) {
  2941. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
  2942. efree(args);
  2943. RETURN_FALSE;
  2944. }
  2945. /* initialize string */
  2946. mbfl_string_init(&string);
  2947. mbfl_string_init(&result);
  2948. from_encoding = MBSTRG(current_internal_encoding);
  2949. string.no_encoding = from_encoding;
  2950. string.no_language = MBSTRG(language);
  2951. /* pre-conversion encoding */
  2952. elist = NULL;
  2953. elistsz = 0;
  2954. switch (Z_TYPE_PP(zfrom_enc)) {
  2955. case IS_ARRAY:
  2956. php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
  2957. break;
  2958. default:
  2959. convert_to_string_ex(zfrom_enc);
  2960. php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
  2961. break;
  2962. }
  2963. if (elistsz <= 0) {
  2964. from_encoding = mbfl_no_encoding_pass;
  2965. } else if (elistsz == 1) {
  2966. from_encoding = *elist;
  2967. } else {
  2968. /* auto detect */
  2969. from_encoding = mbfl_no_encoding_invalid;
  2970. stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
  2971. stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
  2972. stack_level = 0;
  2973. identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
  2974. if (identd != NULL) {
  2975. n = 0;
  2976. while (n < argc || stack_level > 0) {
  2977. if (stack_level <= 0) {
  2978. var = args[n++];
  2979. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2980. target_hash = HASH_OF(*var);
  2981. if (target_hash != NULL) {
  2982. zend_hash_internal_pointer_reset(target_hash);
  2983. }
  2984. }
  2985. } else {
  2986. stack_level--;
  2987. var = stack[stack_level];
  2988. }
  2989. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2990. target_hash = HASH_OF(*var);
  2991. if (target_hash != NULL) {
  2992. while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
  2993. zend_hash_move_forward(target_hash);
  2994. if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
  2995. if (stack_level >= stack_max) {
  2996. stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
  2997. ptmp = erealloc(stack, sizeof(zval **)*stack_max);
  2998. stack = (zval ***)ptmp;
  2999. }
  3000. stack[stack_level] = var;
  3001. stack_level++;
  3002. var = hash_entry;
  3003. target_hash = HASH_OF(*var);
  3004. if (target_hash != NULL) {
  3005. zend_hash_internal_pointer_reset(target_hash);
  3006. continue;
  3007. }
  3008. } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
  3009. string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
  3010. string.len = Z_STRLEN_PP(hash_entry);
  3011. if (mbfl_encoding_detector_feed(identd, &string)) {
  3012. goto detect_end; /* complete detecting */
  3013. }
  3014. }
  3015. }
  3016. }
  3017. } else if (Z_TYPE_PP(var) == IS_STRING) {
  3018. string.val = (unsigned char *)Z_STRVAL_PP(var);
  3019. string.len = Z_STRLEN_PP(var);
  3020. if (mbfl_encoding_detector_feed(identd, &string)) {
  3021. goto detect_end; /* complete detecting */
  3022. }
  3023. }
  3024. }
  3025. detect_end:
  3026. from_encoding = mbfl_encoding_detector_judge(identd);
  3027. mbfl_encoding_detector_delete(identd);
  3028. }
  3029. efree(stack);
  3030. if (from_encoding == mbfl_no_encoding_invalid) {
  3031. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
  3032. from_encoding = mbfl_no_encoding_pass;
  3033. }
  3034. }
  3035. if (elist != NULL) {
  3036. efree((void *)elist);
  3037. }
  3038. /* create converter */
  3039. convd = NULL;
  3040. if (from_encoding != mbfl_no_encoding_pass) {
  3041. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
  3042. if (convd == NULL) {
  3043. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
  3044. RETURN_FALSE;
  3045. }
  3046. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  3047. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  3048. }
  3049. /* convert */
  3050. if (convd != NULL) {
  3051. stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
  3052. stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
  3053. stack_level = 0;
  3054. n = 0;
  3055. while (n < argc || stack_level > 0) {
  3056. if (stack_level <= 0) {
  3057. var = args[n++];
  3058. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  3059. target_hash = HASH_OF(*var);
  3060. if (target_hash != NULL) {
  3061. zend_hash_internal_pointer_reset(target_hash);
  3062. }
  3063. }
  3064. } else {
  3065. stack_level--;
  3066. var = stack[stack_level];
  3067. }
  3068. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  3069. target_hash = HASH_OF(*var);
  3070. if (target_hash != NULL) {
  3071. while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
  3072. zend_hash_move_forward(target_hash);
  3073. if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
  3074. if (stack_level >= stack_max) {
  3075. stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
  3076. ptmp = erealloc(stack, sizeof(zval **)*stack_max);
  3077. stack = (zval ***)ptmp;
  3078. }
  3079. stack[stack_level] = var;
  3080. stack_level++;
  3081. var = hash_entry;
  3082. SEPARATE_ZVAL(hash_entry);
  3083. target_hash = HASH_OF(*var);
  3084. if (target_hash != NULL) {
  3085. zend_hash_internal_pointer_reset(target_hash);
  3086. continue;
  3087. }
  3088. } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
  3089. string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
  3090. string.len = Z_STRLEN_PP(hash_entry);
  3091. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3092. if (ret != NULL) {
  3093. if (Z_REFCOUNT_PP(hash_entry) > 1) {
  3094. Z_DELREF_PP(hash_entry);
  3095. MAKE_STD_ZVAL(*hash_entry);
  3096. } else {
  3097. zval_dtor(*hash_entry);
  3098. }
  3099. ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
  3100. }
  3101. }
  3102. }
  3103. }
  3104. } else if (Z_TYPE_PP(var) == IS_STRING) {
  3105. string.val = (unsigned char *)Z_STRVAL_PP(var);
  3106. string.len = Z_STRLEN_PP(var);
  3107. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3108. if (ret != NULL) {
  3109. zval_dtor(*var);
  3110. ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
  3111. }
  3112. }
  3113. }
  3114. efree(stack);
  3115. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  3116. mbfl_buffer_converter_delete(convd);
  3117. }
  3118. efree(args);
  3119. name = (char *)mbfl_no_encoding2name(from_encoding);
  3120. if (name != NULL) {
  3121. RETURN_STRING(name, 1);
  3122. } else {
  3123. RETURN_FALSE;
  3124. }
  3125. }
  3126. /* }}} */
  3127. /* {{{ HTML numeric entity */
  3128. /* {{{ static void php_mb_numericentity_exec() */
  3129. static void
  3130. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
  3131. {
  3132. char *str, *encoding;
  3133. int str_len, encoding_len;
  3134. zval *zconvmap, **hash_entry;
  3135. HashTable *target_hash;
  3136. size_t argc = ZEND_NUM_ARGS();
  3137. int i, *convmap, *mapelm, mapsize=0;
  3138. mbfl_string string, result, *ret;
  3139. enum mbfl_no_encoding no_encoding;
  3140. if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) {
  3141. return;
  3142. }
  3143. mbfl_string_init(&string);
  3144. string.no_language = MBSTRG(language);
  3145. string.no_encoding = MBSTRG(current_internal_encoding);
  3146. string.val = (unsigned char *)str;
  3147. string.len = str_len;
  3148. /* encoding */
  3149. if (argc == 3) {
  3150. no_encoding = mbfl_name2no_encoding(encoding);
  3151. if (no_encoding == mbfl_no_encoding_invalid) {
  3152. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  3153. RETURN_FALSE;
  3154. } else {
  3155. string.no_encoding = no_encoding;
  3156. }
  3157. }
  3158. /* conversion map */
  3159. convmap = NULL;
  3160. if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
  3161. target_hash = Z_ARRVAL_P(zconvmap);
  3162. zend_hash_internal_pointer_reset(target_hash);
  3163. i = zend_hash_num_elements(target_hash);
  3164. if (i > 0) {
  3165. convmap = (int *)safe_emalloc(i, sizeof(int), 0);
  3166. mapelm = convmap;
  3167. mapsize = 0;
  3168. while (i > 0) {
  3169. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  3170. break;
  3171. }
  3172. convert_to_long_ex(hash_entry);
  3173. *mapelm++ = Z_LVAL_PP(hash_entry);
  3174. mapsize++;
  3175. i--;
  3176. zend_hash_move_forward(target_hash);
  3177. }
  3178. }
  3179. }
  3180. if (convmap == NULL) {
  3181. RETURN_FALSE;
  3182. }
  3183. mapsize /= 4;
  3184. ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
  3185. if (ret != NULL) {
  3186. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  3187. } else {
  3188. RETVAL_FALSE;
  3189. }
  3190. efree((void *)convmap);
  3191. }
  3192. /* }}} */
  3193. /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
  3194. Converts specified characters to HTML numeric entities */
  3195. PHP_FUNCTION(mb_encode_numericentity)
  3196. {
  3197. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  3198. }
  3199. /* }}} */
  3200. /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
  3201. Converts HTML numeric entities to character code */
  3202. PHP_FUNCTION(mb_decode_numericentity)
  3203. {
  3204. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  3205. }
  3206. /* }}} */
  3207. /* }}} */
  3208. /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
  3209. * Sends an email message with MIME scheme
  3210. */
  3211. #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
  3212. if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
  3213. pos += 2; \
  3214. while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
  3215. pos++; \
  3216. } \
  3217. continue; \
  3218. }
  3219. #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
  3220. pp = str; \
  3221. ee = pp + len; \
  3222. while ((pp = memchr(pp, '\0', (ee - pp)))) { \
  3223. *pp = ' '; \
  3224. } \
  3225. #define APPEND_ONE_CHAR(ch) do { \
  3226. if (token.a > 0) { \
  3227. smart_str_appendc(&token, ch); \
  3228. } else {\
  3229. token.len++; \
  3230. } \
  3231. } while (0)
  3232. #define SEPARATE_SMART_STR(str) do {\
  3233. if ((str)->a == 0) { \
  3234. char *tmp_ptr; \
  3235. (str)->a = 1; \
  3236. while ((str)->a < (str)->len) { \
  3237. (str)->a <<= 1; \
  3238. } \
  3239. tmp_ptr = emalloc((str)->a + 1); \
  3240. memcpy(tmp_ptr, (str)->c, (str)->len); \
  3241. (str)->c = tmp_ptr; \
  3242. } \
  3243. } while (0)
  3244. static void my_smart_str_dtor(smart_str *s)
  3245. {
  3246. if (s->a > 0) {
  3247. smart_str_free(s);
  3248. }
  3249. }
  3250. static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
  3251. {
  3252. const char *ps;
  3253. size_t icnt;
  3254. int state = 0;
  3255. int crlf_state = -1;
  3256. smart_str token = { 0, 0, 0 };
  3257. smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
  3258. ps = str;
  3259. icnt = str_len;
  3260. /*
  3261. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  3262. * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
  3263. * state 0 1 2 3
  3264. *
  3265. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  3266. * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
  3267. * crlf_state -1 0 1 -1
  3268. *
  3269. */
  3270. while (icnt > 0) {
  3271. switch (*ps) {
  3272. case ':':
  3273. if (crlf_state == 1) {
  3274. APPEND_ONE_CHAR('\r');
  3275. }
  3276. if (state == 0 || state == 1) {
  3277. fld_name = token;
  3278. state = 2;
  3279. } else {
  3280. APPEND_ONE_CHAR(*ps);
  3281. }
  3282. crlf_state = 0;
  3283. break;
  3284. case '\n':
  3285. if (crlf_state == -1) {
  3286. goto out;
  3287. }
  3288. crlf_state = -1;
  3289. break;
  3290. case '\r':
  3291. if (crlf_state == 1) {
  3292. APPEND_ONE_CHAR('\r');
  3293. } else {
  3294. crlf_state = 1;
  3295. }
  3296. break;
  3297. case ' ': case '\t':
  3298. if (crlf_state == -1) {
  3299. if (state == 3) {
  3300. /* continuing from the previous line */
  3301. SEPARATE_SMART_STR(&token);
  3302. state = 4;
  3303. } else {
  3304. /* simply skipping this new line */
  3305. state = 5;
  3306. }
  3307. } else {
  3308. if (crlf_state == 1) {
  3309. APPEND_ONE_CHAR('\r');
  3310. }
  3311. if (state == 1 || state == 3) {
  3312. APPEND_ONE_CHAR(*ps);
  3313. }
  3314. }
  3315. crlf_state = 0;
  3316. break;
  3317. default:
  3318. switch (state) {
  3319. case 0:
  3320. token.c = (char *)ps;
  3321. token.len = 0;
  3322. token.a = 0;
  3323. state = 1;
  3324. break;
  3325. case 2:
  3326. if (crlf_state != -1) {
  3327. token.c = (char *)ps;
  3328. token.len = 0;
  3329. token.a = 0;
  3330. state = 3;
  3331. break;
  3332. }
  3333. /* break is missing intentionally */
  3334. case 3:
  3335. if (crlf_state == -1) {
  3336. fld_val = token;
  3337. if (fld_name.c != NULL && fld_val.c != NULL) {
  3338. char *dummy;
  3339. /* FIXME: some locale free implementation is
  3340. * really required here,,, */
  3341. SEPARATE_SMART_STR(&fld_name);
  3342. php_strtoupper(fld_name.c, fld_name.len);
  3343. zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
  3344. my_smart_str_dtor(&fld_name);
  3345. }
  3346. memset(&fld_name, 0, sizeof(smart_str));
  3347. memset(&fld_val, 0, sizeof(smart_str));
  3348. token.c = (char *)ps;
  3349. token.len = 0;
  3350. token.a = 0;
  3351. state = 1;
  3352. }
  3353. break;
  3354. case 4:
  3355. APPEND_ONE_CHAR(' ');
  3356. state = 3;
  3357. break;
  3358. }
  3359. if (crlf_state == 1) {
  3360. APPEND_ONE_CHAR('\r');
  3361. }
  3362. APPEND_ONE_CHAR(*ps);
  3363. crlf_state = 0;
  3364. break;
  3365. }
  3366. ps++, icnt--;
  3367. }
  3368. out:
  3369. if (state == 2) {
  3370. token.c = "";
  3371. token.len = 0;
  3372. token.a = 0;
  3373. state = 3;
  3374. }
  3375. if (state == 3) {
  3376. fld_val = token;
  3377. if (fld_name.c != NULL && fld_val.c != NULL) {
  3378. void *dummy;
  3379. /* FIXME: some locale free implementation is
  3380. * really required here,,, */
  3381. SEPARATE_SMART_STR(&fld_name);
  3382. php_strtoupper(fld_name.c, fld_name.len);
  3383. zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
  3384. my_smart_str_dtor(&fld_name);
  3385. }
  3386. }
  3387. return state;
  3388. }
  3389. PHP_FUNCTION(mb_send_mail)
  3390. {
  3391. int n;
  3392. char *to = NULL;
  3393. int to_len;
  3394. char *message = NULL;
  3395. int message_len;
  3396. char *headers = NULL;
  3397. int headers_len;
  3398. char *subject = NULL;
  3399. int subject_len;
  3400. char *extra_cmd = NULL;
  3401. int extra_cmd_len;
  3402. int i;
  3403. char *to_r = NULL;
  3404. char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
  3405. struct {
  3406. int cnt_type:1;
  3407. int cnt_trans_enc:1;
  3408. } suppressed_hdrs = { 0, 0 };
  3409. char *message_buf = NULL, *subject_buf = NULL, *p;
  3410. mbfl_string orig_str, conv_str;
  3411. mbfl_string *pstr; /* pointer to mbfl string for return value */
  3412. enum mbfl_no_encoding
  3413. tran_cs, /* transfar text charset */
  3414. head_enc, /* header transfar encoding */
  3415. body_enc; /* body transfar encoding */
  3416. mbfl_memory_device device; /* automatic allocateable buffer for additional header */
  3417. const mbfl_language *lang;
  3418. int err = 0;
  3419. HashTable ht_headers;
  3420. smart_str *s;
  3421. extern void mbfl_memory_device_unput(mbfl_memory_device *device);
  3422. char *pp, *ee;
  3423. if (PG(safe_mode) && (ZEND_NUM_ARGS() == 5)) {
  3424. php_error_docref(NULL TSRMLS_CC, E_WARNING, "SAFE MODE Restriction in effect. The fifth parameter is disabled in SAFE MODE.");
  3425. RETURN_FALSE;
  3426. }
  3427. /* initialize */
  3428. mbfl_memory_device_init(&device, 0, 0);
  3429. mbfl_string_init(&orig_str);
  3430. mbfl_string_init(&conv_str);
  3431. /* character-set, transfer-encoding */
  3432. tran_cs = mbfl_no_encoding_utf8;
  3433. head_enc = mbfl_no_encoding_base64;
  3434. body_enc = mbfl_no_encoding_base64;
  3435. lang = mbfl_no2language(MBSTRG(language));
  3436. if (lang != NULL) {
  3437. tran_cs = lang->mail_charset;
  3438. head_enc = lang->mail_header_encoding;
  3439. body_enc = lang->mail_body_encoding;
  3440. }
  3441. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
  3442. return;
  3443. }
  3444. /* ASCIIZ check */
  3445. MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
  3446. MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
  3447. MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
  3448. if (headers) {
  3449. MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
  3450. }
  3451. if (extra_cmd) {
  3452. MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
  3453. }
  3454. zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
  3455. if (headers != NULL) {
  3456. _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
  3457. }
  3458. if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
  3459. char *tmp;
  3460. char *param_name;
  3461. char *charset = NULL;
  3462. SEPARATE_SMART_STR(s);
  3463. smart_str_0(s);
  3464. p = strchr(s->c, ';');
  3465. if (p != NULL) {
  3466. /* skipping the padded spaces */
  3467. do {
  3468. ++p;
  3469. } while (*p == ' ' || *p == '\t');
  3470. if (*p != '\0') {
  3471. if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
  3472. if (strcasecmp(param_name, "charset") == 0) {
  3473. enum mbfl_no_encoding _tran_cs = tran_cs;
  3474. charset = php_strtok_r(NULL, "= \"", &tmp);
  3475. if (charset != NULL) {
  3476. _tran_cs = mbfl_name2no_encoding(charset);
  3477. }
  3478. if (_tran_cs == mbfl_no_encoding_invalid) {
  3479. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
  3480. _tran_cs = mbfl_no_encoding_ascii;
  3481. }
  3482. tran_cs = _tran_cs;
  3483. }
  3484. }
  3485. }
  3486. }
  3487. suppressed_hdrs.cnt_type = 1;
  3488. }
  3489. if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
  3490. enum mbfl_no_encoding _body_enc;
  3491. SEPARATE_SMART_STR(s);
  3492. smart_str_0(s);
  3493. _body_enc = mbfl_name2no_encoding(s->c);
  3494. switch (_body_enc) {
  3495. case mbfl_no_encoding_base64:
  3496. case mbfl_no_encoding_7bit:
  3497. case mbfl_no_encoding_8bit:
  3498. body_enc = _body_enc;
  3499. break;
  3500. default:
  3501. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
  3502. body_enc = mbfl_no_encoding_8bit;
  3503. break;
  3504. }
  3505. suppressed_hdrs.cnt_trans_enc = 1;
  3506. }
  3507. /* To: */
  3508. if (to != NULL) {
  3509. if (to_len > 0) {
  3510. to_r = estrndup(to, to_len);
  3511. for (; to_len; to_len--) {
  3512. if (!isspace((unsigned char) to_r[to_len - 1])) {
  3513. break;
  3514. }
  3515. to_r[to_len - 1] = '\0';
  3516. }
  3517. for (i = 0; to_r[i]; i++) {
  3518. if (iscntrl((unsigned char) to_r[i])) {
  3519. /* According to RFC 822, section 3.1.1 long headers may be separated into
  3520. * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
  3521. * To prevent these separators from being replaced with a space, we use the
  3522. * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
  3523. */
  3524. SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
  3525. to_r[i] = ' ';
  3526. }
  3527. }
  3528. } else {
  3529. to_r = to;
  3530. }
  3531. } else {
  3532. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
  3533. err = 1;
  3534. }
  3535. /* Subject: */
  3536. if (subject != NULL && subject_len >= 0) {
  3537. orig_str.no_language = MBSTRG(language);
  3538. orig_str.val = (unsigned char *)subject;
  3539. orig_str.len = subject_len;
  3540. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  3541. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  3542. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  3543. orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  3544. }
  3545. pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
  3546. if (pstr != NULL) {
  3547. subject_buf = subject = (char *)pstr->val;
  3548. }
  3549. } else {
  3550. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
  3551. err = 1;
  3552. }
  3553. /* message body */
  3554. if (message != NULL) {
  3555. orig_str.no_language = MBSTRG(language);
  3556. orig_str.val = (unsigned char *)message;
  3557. orig_str.len = (unsigned int)message_len;
  3558. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  3559. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  3560. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  3561. orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  3562. }
  3563. pstr = NULL;
  3564. {
  3565. mbfl_string tmpstr;
  3566. if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
  3567. tmpstr.no_encoding=mbfl_no_encoding_8bit;
  3568. pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
  3569. efree(tmpstr.val);
  3570. }
  3571. }
  3572. if (pstr != NULL) {
  3573. message_buf = message = (char *)pstr->val;
  3574. }
  3575. } else {
  3576. /* this is not really an error, so it is allowed. */
  3577. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
  3578. message = NULL;
  3579. }
  3580. /* other headers */
  3581. #define PHP_MBSTR_MAIL_MIME_HEADER1 "Mime-Version: 1.0"
  3582. #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
  3583. #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
  3584. #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
  3585. if (headers != NULL) {
  3586. p = headers;
  3587. n = headers_len;
  3588. mbfl_memory_device_strncat(&device, p, n);
  3589. if (n > 0 && p[n - 1] != '\n') {
  3590. mbfl_memory_device_strncat(&device, "\n", 1);
  3591. }
  3592. }
  3593. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
  3594. mbfl_memory_device_strncat(&device, "\n", 1);
  3595. if (!suppressed_hdrs.cnt_type) {
  3596. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
  3597. p = (char *)mbfl_no2preferred_mime_name(tran_cs);
  3598. if (p != NULL) {
  3599. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
  3600. mbfl_memory_device_strcat(&device, p);
  3601. }
  3602. mbfl_memory_device_strncat(&device, "\n", 1);
  3603. }
  3604. if (!suppressed_hdrs.cnt_trans_enc) {
  3605. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
  3606. p = (char *)mbfl_no2preferred_mime_name(body_enc);
  3607. if (p == NULL) {
  3608. p = "7bit";
  3609. }
  3610. mbfl_memory_device_strcat(&device, p);
  3611. mbfl_memory_device_strncat(&device, "\n", 1);
  3612. }
  3613. mbfl_memory_device_unput(&device);
  3614. mbfl_memory_device_output('\0', &device);
  3615. headers = (char *)device.buffer;
  3616. if (force_extra_parameters) {
  3617. extra_cmd = php_escape_shell_cmd(force_extra_parameters);
  3618. } else if (extra_cmd) {
  3619. extra_cmd = php_escape_shell_cmd(extra_cmd);
  3620. }
  3621. if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
  3622. RETVAL_TRUE;
  3623. } else {
  3624. RETVAL_FALSE;
  3625. }
  3626. if (extra_cmd) {
  3627. efree(extra_cmd);
  3628. }
  3629. if (to_r != to) {
  3630. efree(to_r);
  3631. }
  3632. if (subject_buf) {
  3633. efree((void *)subject_buf);
  3634. }
  3635. if (message_buf) {
  3636. efree((void *)message_buf);
  3637. }
  3638. mbfl_memory_device_clear(&device);
  3639. zend_hash_destroy(&ht_headers);
  3640. }
  3641. #undef SKIP_LONG_HEADER_SEP_MBSTRING
  3642. #undef MAIL_ASCIIZ_CHECK_MBSTRING
  3643. #undef APPEND_ONE_CHAR
  3644. #undef SEPARATE_SMART_STR
  3645. #undef PHP_MBSTR_MAIL_MIME_HEADER1
  3646. #undef PHP_MBSTR_MAIL_MIME_HEADER2
  3647. #undef PHP_MBSTR_MAIL_MIME_HEADER3
  3648. #undef PHP_MBSTR_MAIL_MIME_HEADER4
  3649. /* }}} */
  3650. /* {{{ proto mixed mb_get_info([string type])
  3651. Returns the current settings of mbstring */
  3652. PHP_FUNCTION(mb_get_info)
  3653. {
  3654. char *typ = NULL;
  3655. int typ_len, n;
  3656. char *name;
  3657. const struct mb_overload_def *over_func;
  3658. zval *row1, *row2;
  3659. const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
  3660. enum mbfl_no_encoding *entry;
  3661. #ifdef ZEND_MULTIBYTE
  3662. zval *row3;
  3663. #endif /* ZEND_MULTIBYTE */
  3664. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
  3665. RETURN_FALSE;
  3666. }
  3667. if (!typ || !strcasecmp("all", typ)) {
  3668. array_init(return_value);
  3669. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
  3670. add_assoc_string(return_value, "internal_encoding", name, 1);
  3671. }
  3672. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
  3673. add_assoc_string(return_value, "http_input", name, 1);
  3674. }
  3675. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
  3676. add_assoc_string(return_value, "http_output", name, 1);
  3677. }
  3678. if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
  3679. add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
  3680. }
  3681. add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
  3682. if (MBSTRG(func_overload)){
  3683. over_func = &(mb_ovld[0]);
  3684. MAKE_STD_ZVAL(row1);
  3685. array_init(row1);
  3686. while (over_func->type > 0) {
  3687. if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
  3688. add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
  3689. }
  3690. over_func++;
  3691. }
  3692. add_assoc_zval(return_value, "func_overload_list", row1);
  3693. } else {
  3694. add_assoc_string(return_value, "func_overload_list", "no overload", 1);
  3695. }
  3696. if (lang != NULL) {
  3697. if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
  3698. add_assoc_string(return_value, "mail_charset", name, 1);
  3699. }
  3700. if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
  3701. add_assoc_string(return_value, "mail_header_encoding", name, 1);
  3702. }
  3703. if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
  3704. add_assoc_string(return_value, "mail_body_encoding", name, 1);
  3705. }
  3706. }
  3707. add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
  3708. if (MBSTRG(encoding_translation)) {
  3709. add_assoc_string(return_value, "encoding_translation", "On", 1);
  3710. } else {
  3711. add_assoc_string(return_value, "encoding_translation", "Off", 1);
  3712. }
  3713. if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
  3714. add_assoc_string(return_value, "language", name, 1);
  3715. }
  3716. n = MBSTRG(current_detect_order_list_size);
  3717. entry = MBSTRG(current_detect_order_list);
  3718. if(n > 0) {
  3719. MAKE_STD_ZVAL(row2);
  3720. array_init(row2);
  3721. while (n > 0) {
  3722. if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
  3723. add_next_index_string(row2, name, 1);
  3724. }
  3725. entry++;
  3726. n--;
  3727. }
  3728. add_assoc_zval(return_value, "detect_order", row2);
  3729. }
  3730. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  3731. add_assoc_string(return_value, "substitute_character", "none", 1);
  3732. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  3733. add_assoc_string(return_value, "substitute_character", "long", 1);
  3734. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  3735. add_assoc_string(return_value, "substitute_character", "entity", 1);
  3736. } else {
  3737. add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
  3738. }
  3739. if (MBSTRG(strict_detection)) {
  3740. add_assoc_string(return_value, "strict_detection", "On", 1);
  3741. } else {
  3742. add_assoc_string(return_value, "strict_detection", "Off", 1);
  3743. }
  3744. #ifdef ZEND_MULTIBYTE
  3745. entry = MBSTRG(script_encoding_list);
  3746. n = MBSTRG(script_encoding_list_size);
  3747. if(n > 0) {
  3748. MAKE_STD_ZVAL(row3);
  3749. array_init(row3);
  3750. while (n > 0) {
  3751. if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
  3752. add_next_index_string(row3, name, 1);
  3753. }
  3754. entry++;
  3755. n--;
  3756. }
  3757. add_assoc_zval(return_value, "script_encoding", row3);
  3758. }
  3759. #endif /* ZEND_MULTIBYTE */
  3760. } else if (!strcasecmp("internal_encoding", typ)) {
  3761. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
  3762. RETVAL_STRING(name, 1);
  3763. }
  3764. } else if (!strcasecmp("http_input", typ)) {
  3765. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
  3766. RETVAL_STRING(name, 1);
  3767. }
  3768. } else if (!strcasecmp("http_output", typ)) {
  3769. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
  3770. RETVAL_STRING(name, 1);
  3771. }
  3772. } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
  3773. if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
  3774. RETVAL_STRING(name, 1);
  3775. }
  3776. } else if (!strcasecmp("func_overload", typ)) {
  3777. RETVAL_LONG(MBSTRG(func_overload));
  3778. } else if (!strcasecmp("func_overload_list", typ)) {
  3779. if (MBSTRG(func_overload)){
  3780. over_func = &(mb_ovld[0]);
  3781. array_init(return_value);
  3782. while (over_func->type > 0) {
  3783. if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
  3784. add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
  3785. }
  3786. over_func++;
  3787. }
  3788. } else {
  3789. RETVAL_STRING("no overload", 1);
  3790. }
  3791. } else if (!strcasecmp("mail_charset", typ)) {
  3792. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
  3793. RETVAL_STRING(name, 1);
  3794. }
  3795. } else if (!strcasecmp("mail_header_encoding", typ)) {
  3796. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
  3797. RETVAL_STRING(name, 1);
  3798. }
  3799. } else if (!strcasecmp("mail_body_encoding", typ)) {
  3800. if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
  3801. RETVAL_STRING(name, 1);
  3802. }
  3803. } else if (!strcasecmp("illegal_chars", typ)) {
  3804. RETVAL_LONG(MBSTRG(illegalchars));
  3805. } else if (!strcasecmp("encoding_translation", typ)) {
  3806. if (MBSTRG(encoding_translation)) {
  3807. RETVAL_STRING("On", 1);
  3808. } else {
  3809. RETVAL_STRING("Off", 1);
  3810. }
  3811. } else if (!strcasecmp("language", typ)) {
  3812. if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
  3813. RETVAL_STRING(name, 1);
  3814. }
  3815. } else if (!strcasecmp("detect_order", typ)) {
  3816. n = MBSTRG(current_detect_order_list_size);
  3817. entry = MBSTRG(current_detect_order_list);
  3818. if(n > 0) {
  3819. array_init(return_value);
  3820. while (n > 0) {
  3821. name = (char *)mbfl_no_encoding2name(*entry);
  3822. if (name) {
  3823. add_next_index_string(return_value, name, 1);
  3824. }
  3825. entry++;
  3826. n--;
  3827. }
  3828. }
  3829. } else if (!strcasecmp("substitute_character", typ)) {
  3830. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  3831. RETVAL_STRING("none", 1);
  3832. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  3833. RETVAL_STRING("long", 1);
  3834. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
  3835. RETVAL_STRING("entity", 1);
  3836. } else {
  3837. RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
  3838. }
  3839. } else if (!strcasecmp("strict_detection", typ)) {
  3840. if (MBSTRG(strict_detection)) {
  3841. RETVAL_STRING("On", 1);
  3842. } else {
  3843. RETVAL_STRING("Off", 1);
  3844. }
  3845. } else {
  3846. #ifdef ZEND_MULTIBYTE
  3847. if (!strcasecmp("script_encoding", typ)) {
  3848. entry = MBSTRG(script_encoding_list);
  3849. n = MBSTRG(script_encoding_list_size);
  3850. if(n > 0) {
  3851. array_init(return_value);
  3852. while (n > 0) {
  3853. name = (char *)mbfl_no_encoding2name(*entry);
  3854. if (name) {
  3855. add_next_index_string(return_value, name, 1);
  3856. }
  3857. entry++;
  3858. n--;
  3859. }
  3860. }
  3861. return;
  3862. }
  3863. #endif /* ZEND_MULTIBYTE */
  3864. RETURN_FALSE;
  3865. }
  3866. }
  3867. /* }}} */
  3868. /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
  3869. Check if the string is valid for the specified encoding */
  3870. PHP_FUNCTION(mb_check_encoding)
  3871. {
  3872. char *var = NULL;
  3873. int var_len;
  3874. char *enc = NULL;
  3875. int enc_len;
  3876. mbfl_buffer_converter *convd;
  3877. enum mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding);
  3878. mbfl_string string, result, *ret = NULL;
  3879. long illegalchars = 0;
  3880. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
  3881. RETURN_FALSE;
  3882. }
  3883. if (var == NULL) {
  3884. RETURN_BOOL(MBSTRG(illegalchars) == 0);
  3885. }
  3886. if (enc != NULL) {
  3887. no_encoding = mbfl_name2no_encoding(enc);
  3888. if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) {
  3889. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
  3890. RETURN_FALSE;
  3891. }
  3892. }
  3893. convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0);
  3894. if (convd == NULL) {
  3895. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
  3896. RETURN_FALSE;
  3897. }
  3898. mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
  3899. mbfl_buffer_converter_illegal_substchar(convd, 0);
  3900. /* initialize string */
  3901. mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding);
  3902. mbfl_string_init(&result);
  3903. string.val = (unsigned char *)var;
  3904. string.len = var_len;
  3905. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3906. illegalchars = mbfl_buffer_illegalchars(convd);
  3907. mbfl_buffer_converter_delete(convd);
  3908. RETVAL_FALSE;
  3909. if (ret != NULL) {
  3910. if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
  3911. RETVAL_TRUE;
  3912. }
  3913. mbfl_string_clear(&result);
  3914. }
  3915. }
  3916. /* }}} */
  3917. /* {{{ MBSTRING_API int php_mb_encoding_translation() */
  3918. MBSTRING_API int php_mb_encoding_translation(TSRMLS_D)
  3919. {
  3920. return MBSTRG(encoding_translation);
  3921. }
  3922. /* }}} */
  3923. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
  3924. MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
  3925. {
  3926. if (enc != NULL) {
  3927. if (enc->flag & MBFL_ENCTYPE_MBCS) {
  3928. if (enc->mblen_table != NULL) {
  3929. if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
  3930. }
  3931. } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
  3932. return 2;
  3933. } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
  3934. return 4;
  3935. }
  3936. }
  3937. return 1;
  3938. }
  3939. /* }}} */
  3940. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
  3941. MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
  3942. {
  3943. return php_mb_mbchar_bytes_ex(s,
  3944. mbfl_no2encoding(MBSTRG(internal_encoding)));
  3945. }
  3946. /* }}} */
  3947. /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
  3948. MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
  3949. {
  3950. register const char *p = s;
  3951. char *last=NULL;
  3952. if (nbytes == (size_t)-1) {
  3953. size_t nb = 0;
  3954. while (*p != '\0') {
  3955. if (nb == 0) {
  3956. if ((unsigned char)*p == (unsigned char)c) {
  3957. last = (char *)p;
  3958. }
  3959. nb = php_mb_mbchar_bytes_ex(p, enc);
  3960. if (nb == 0) {
  3961. return NULL; /* something is going wrong! */
  3962. }
  3963. }
  3964. --nb;
  3965. ++p;
  3966. }
  3967. } else {
  3968. register size_t bcnt = nbytes;
  3969. register size_t nbytes_char;
  3970. while (bcnt > 0) {
  3971. if ((unsigned char)*p == (unsigned char)c) {
  3972. last = (char *)p;
  3973. }
  3974. nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
  3975. if (bcnt < nbytes_char) {
  3976. return NULL;
  3977. }
  3978. p += nbytes_char;
  3979. bcnt -= nbytes_char;
  3980. }
  3981. }
  3982. return last;
  3983. }
  3984. /* }}} */
  3985. /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
  3986. MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
  3987. {
  3988. return php_mb_safe_strrchr_ex(s, c, nbytes,
  3989. mbfl_no2encoding(MBSTRG(internal_encoding)));
  3990. }
  3991. /* }}} */
  3992. /* {{{ MBSTRING_API char *php_mb_strrchr() */
  3993. MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC)
  3994. {
  3995. return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC);
  3996. }
  3997. /* }}} */
  3998. /* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */
  3999. MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
  4000. {
  4001. if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){
  4002. return php_mb_mbchar_bytes_ex(s,
  4003. mbfl_no2encoding(MBSTRG(http_input_identify)));
  4004. } else {
  4005. return php_mb_mbchar_bytes_ex(s,
  4006. mbfl_no2encoding(MBSTRG(internal_encoding)));
  4007. }
  4008. }
  4009. /* }}} */
  4010. /* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
  4011. MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC)
  4012. {
  4013. int i;
  4014. mbfl_string string, result, *ret = NULL;
  4015. enum mbfl_no_encoding from_encoding, to_encoding;
  4016. mbfl_buffer_converter *convd;
  4017. if (encoding_to) {
  4018. /* new encoding */
  4019. to_encoding = mbfl_name2no_encoding(encoding_to);
  4020. if (to_encoding == mbfl_no_encoding_invalid) {
  4021. return -1;
  4022. }
  4023. } else {
  4024. to_encoding = MBSTRG(current_internal_encoding);
  4025. }
  4026. if (encoding_from) {
  4027. /* old encoding */
  4028. from_encoding = mbfl_name2no_encoding(encoding_from);
  4029. if (from_encoding == mbfl_no_encoding_invalid) {
  4030. return -1;
  4031. }
  4032. } else {
  4033. from_encoding = MBSTRG(http_input_identify);
  4034. }
  4035. if (from_encoding == mbfl_no_encoding_pass) {
  4036. return 0;
  4037. }
  4038. /* initialize string */
  4039. mbfl_string_init(&string);
  4040. mbfl_string_init(&result);
  4041. string.no_encoding = from_encoding;
  4042. string.no_language = MBSTRG(language);
  4043. for (i=0; i<num; i++){
  4044. string.val = (unsigned char *)str[i];
  4045. string.len = len[i];
  4046. /* initialize converter */
  4047. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  4048. if (convd == NULL) {
  4049. return -1;
  4050. }
  4051. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  4052. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  4053. /* do it */
  4054. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  4055. if (ret != NULL) {
  4056. efree(str[i]);
  4057. str[i] = (char *)ret->val;
  4058. len[i] = (int)ret->len;
  4059. }
  4060. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  4061. mbfl_buffer_converter_delete(convd);
  4062. }
  4063. return ret ? 0 : -1;
  4064. }
  4065. /* }}} */
  4066. /* {{{ MBSTRING_API int php_mb_gpc_encoding_detector()
  4067. */
  4068. MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC)
  4069. {
  4070. mbfl_string string;
  4071. enum mbfl_no_encoding *elist;
  4072. enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid;
  4073. mbfl_encoding_detector *identd = NULL;
  4074. int size;
  4075. enum mbfl_no_encoding *list;
  4076. if (MBSTRG(http_input_list_size) == 1 &&
  4077. MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) {
  4078. MBSTRG(http_input_identify) = mbfl_no_encoding_pass;
  4079. return SUCCESS;
  4080. }
  4081. if (MBSTRG(http_input_list_size) == 1 &&
  4082. MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto &&
  4083. mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) {
  4084. MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0];
  4085. return SUCCESS;
  4086. }
  4087. if (arg_list && strlen(arg_list)>0) {
  4088. /* make encoding list */
  4089. list = NULL;
  4090. size = 0;
  4091. php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
  4092. if (size > 0 && list != NULL) {
  4093. elist = list;
  4094. } else {
  4095. elist = MBSTRG(current_detect_order_list);
  4096. size = MBSTRG(current_detect_order_list_size);
  4097. if (size <= 0){
  4098. elist = MBSTRG(default_detect_order_list);
  4099. size = MBSTRG(default_detect_order_list_size);
  4100. }
  4101. }
  4102. } else {
  4103. elist = MBSTRG(current_detect_order_list);
  4104. size = MBSTRG(current_detect_order_list_size);
  4105. if (size <= 0){
  4106. elist = MBSTRG(default_detect_order_list);
  4107. size = MBSTRG(default_detect_order_list_size);
  4108. }
  4109. }
  4110. mbfl_string_init(&string);
  4111. string.no_language = MBSTRG(language);
  4112. identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection));
  4113. if (identd) {
  4114. int n = 0;
  4115. while(n < num){
  4116. string.val = (unsigned char *)arg_string[n];
  4117. string.len = arg_length[n];
  4118. if (mbfl_encoding_detector_feed(identd, &string)) {
  4119. break;
  4120. }
  4121. n++;
  4122. }
  4123. encoding = mbfl_encoding_detector_judge(identd);
  4124. mbfl_encoding_detector_delete(identd);
  4125. }
  4126. if (encoding != mbfl_no_encoding_invalid) {
  4127. MBSTRG(http_input_identify) = encoding;
  4128. return SUCCESS;
  4129. } else {
  4130. return FAILURE;
  4131. }
  4132. }
  4133. /* }}} */
  4134. /* {{{ MBSTRING_API int php_mb_stripos()
  4135. */
  4136. MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
  4137. {
  4138. int n;
  4139. mbfl_string haystack, needle;
  4140. n = -1;
  4141. mbfl_string_init(&haystack);
  4142. mbfl_string_init(&needle);
  4143. haystack.no_language = MBSTRG(language);
  4144. haystack.no_encoding = MBSTRG(current_internal_encoding);
  4145. needle.no_language = MBSTRG(language);
  4146. needle.no_encoding = MBSTRG(current_internal_encoding);
  4147. do {
  4148. size_t len = 0;
  4149. haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
  4150. haystack.len = len;
  4151. if (!haystack.val) {
  4152. break;
  4153. }
  4154. if (haystack.len <= 0) {
  4155. break;
  4156. }
  4157. needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
  4158. needle.len = len;
  4159. if (!needle.val) {
  4160. break;
  4161. }
  4162. if (needle.len <= 0) {
  4163. break;
  4164. }
  4165. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
  4166. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  4167. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
  4168. break;
  4169. }
  4170. {
  4171. int haystack_char_len = mbfl_strlen(&haystack);
  4172. if (mode) {
  4173. if ((offset > 0 && offset > haystack_char_len) ||
  4174. (offset < 0 && -offset > haystack_char_len)) {
  4175. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
  4176. break;
  4177. }
  4178. } else {
  4179. if (offset < 0 || offset > haystack_char_len) {
  4180. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
  4181. break;
  4182. }
  4183. }
  4184. }
  4185. n = mbfl_strpos(&haystack, &needle, offset, mode);
  4186. } while(0);
  4187. if (haystack.val) {
  4188. efree(haystack.val);
  4189. }
  4190. if (needle.val) {
  4191. efree(needle.val);
  4192. }
  4193. return n;
  4194. }
  4195. /* }}} */
  4196. #ifdef ZEND_MULTIBYTE
  4197. /* {{{ php_mb_set_zend_encoding() */
  4198. static int php_mb_set_zend_encoding(TSRMLS_D)
  4199. {
  4200. /* 'd better use mbfl_memory_device? */
  4201. char *name, *list = NULL;
  4202. int n, *entry, list_size = 0;
  4203. zend_encoding_detector encoding_detector;
  4204. zend_encoding_converter encoding_converter;
  4205. zend_encoding_oddlen encoding_oddlen;
  4206. /* notify script encoding to Zend Engine */
  4207. entry = MBSTRG(script_encoding_list);
  4208. n = MBSTRG(script_encoding_list_size);
  4209. while (n > 0) {
  4210. name = (char *)mbfl_no_encoding2name(*entry);
  4211. if (name) {
  4212. list_size += strlen(name) + 1;
  4213. if (!list) {
  4214. list = (char*)emalloc(list_size);
  4215. *list = '\0';
  4216. } else {
  4217. list = (char*)erealloc(list, list_size);
  4218. strcat(list, ",");
  4219. }
  4220. strcat(list, name);
  4221. }
  4222. entry++;
  4223. n--;
  4224. }
  4225. zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
  4226. if (list) {
  4227. efree(list);
  4228. }
  4229. encoding_detector = php_mb_encoding_detector;
  4230. encoding_converter = php_mb_encoding_converter;
  4231. encoding_oddlen = php_mb_oddlen;
  4232. /* TODO: make independent from mbstring.encoding_translation? */
  4233. if (MBSTRG(encoding_translation)) {
  4234. /* notify internal encoding to Zend Engine */
  4235. name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
  4236. zend_multibyte_set_internal_encoding(name TSRMLS_CC);
  4237. }
  4238. zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
  4239. return 0;
  4240. }
  4241. /* }}} */
  4242. /* {{{ char *php_mb_encoding_detector()
  4243. * Interface for Zend Engine
  4244. */
  4245. static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC)
  4246. {
  4247. mbfl_string string;
  4248. const char *ret;
  4249. enum mbfl_no_encoding *elist;
  4250. int size, *list;
  4251. /* make encoding list */
  4252. list = NULL;
  4253. size = 0;
  4254. php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
  4255. if (size <= 0) {
  4256. return NULL;
  4257. }
  4258. if (size > 0 && list != NULL) {
  4259. elist = list;
  4260. } else {
  4261. elist = MBSTRG(current_detect_order_list);
  4262. size = MBSTRG(current_detect_order_list_size);
  4263. }
  4264. mbfl_string_init(&string);
  4265. string.no_language = MBSTRG(language);
  4266. string.val = (unsigned char *)arg_string;
  4267. string.len = arg_length;
  4268. ret = mbfl_identify_encoding_name(&string, elist, size, 0);
  4269. if (list != NULL) {
  4270. efree((void *)list);
  4271. }
  4272. if (ret != NULL) {
  4273. return estrdup(ret);
  4274. } else {
  4275. return NULL;
  4276. }
  4277. }
  4278. /* }}} */
  4279. /* {{{ int php_mb_encoding_converter() */
  4280. static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC)
  4281. {
  4282. mbfl_string string, result, *ret;
  4283. enum mbfl_no_encoding from_encoding, to_encoding;
  4284. mbfl_buffer_converter *convd;
  4285. /* new encoding */
  4286. to_encoding = mbfl_name2no_encoding(encoding_to);
  4287. if (to_encoding == mbfl_no_encoding_invalid) {
  4288. return -1;
  4289. }
  4290. /* old encoding */
  4291. from_encoding = mbfl_name2no_encoding(encoding_from);
  4292. if (from_encoding == mbfl_no_encoding_invalid) {
  4293. return -1;
  4294. }
  4295. /* initialize string */
  4296. mbfl_string_init(&string);
  4297. mbfl_string_init(&result);
  4298. string.no_encoding = from_encoding;
  4299. string.no_language = MBSTRG(language);
  4300. string.val = (unsigned char*)from;
  4301. string.len = from_length;
  4302. /* initialize converter */
  4303. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  4304. if (convd == NULL) {
  4305. return -1;
  4306. }
  4307. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  4308. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  4309. /* do it */
  4310. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  4311. if (ret != NULL) {
  4312. *to = ret->val;
  4313. *to_length = ret->len;
  4314. }
  4315. MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
  4316. mbfl_buffer_converter_delete(convd);
  4317. return ret ? 0 : -1;
  4318. }
  4319. /* }}} */
  4320. /* {{{ int php_mb_oddlen()
  4321. * returns number of odd (e.g. appears only first byte of multibyte
  4322. * character) chars
  4323. */
  4324. static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
  4325. {
  4326. mbfl_string mb_string;
  4327. mbfl_string_init(&mb_string);
  4328. mb_string.no_language = MBSTRG(language);
  4329. mb_string.no_encoding = mbfl_name2no_encoding(encoding);
  4330. mb_string.val = (unsigned char *)string;
  4331. mb_string.len = length;
  4332. if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
  4333. return 0;
  4334. }
  4335. return mbfl_oddlen(&mb_string);
  4336. }
  4337. /* }}} */
  4338. #endif /* ZEND_MULTIBYTE */
  4339. #endif /* HAVE_MBSTRING */
  4340. /*
  4341. * Local variables:
  4342. * tab-width: 4
  4343. * c-basic-offset: 4
  4344. * End:
  4345. * vim600: fdm=marker
  4346. * vim: noet sw=4 ts=4
  4347. */