You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3799 lines
104 KiB

24 years ago
25 years ago
21 years ago
23 years ago
21 years ago
23 years ago
23 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
23 years ago
23 years ago
24 years ago
24 years ago
24 years ago
24 years ago
23 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
23 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
21 years ago
20 years ago
24 years ago
24 years ago
24 years ago
24 years ago
20 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2006 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
  16. | Rui Hirokawa <hirokawa@php.net> |
  17. +----------------------------------------------------------------------+
  18. */
  19. /* $Id$ */
  20. /*
  21. * PHP 4 Multibyte String module "mbstring"
  22. *
  23. * History:
  24. * 2000.5.19 Release php-4.0RC2_jstring-1.0
  25. * 2001.4.1 Release php4_jstring-1.0.91
  26. * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
  27. * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
  28. */
  29. /*
  30. * PHP3 Internationalization support program.
  31. *
  32. * Copyright (c) 1999,2000 by the PHP3 internationalization team.
  33. * All rights reserved.
  34. *
  35. * See README_PHP3-i18n-ja for more detail.
  36. *
  37. * Authors:
  38. * Hironori Sato <satoh@jpnnet.com>
  39. * Shigeru Kanemoto <sgk@happysize.co.jp>
  40. * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
  41. * Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
  42. */
  43. /* {{{ includes */
  44. #ifdef HAVE_CONFIG_H
  45. #include "config.h"
  46. #endif
  47. #include "php.h"
  48. #include "php_ini.h"
  49. #include "php_variables.h"
  50. #include "mbstring.h"
  51. #include "ext/standard/php_string.h"
  52. #include "ext/standard/php_mail.h"
  53. #include "ext/standard/exec.h"
  54. #include "ext/standard/php_smart_str.h"
  55. #include "ext/standard/url.h"
  56. #include "main/php_output.h"
  57. #include "ext/standard/info.h"
  58. #include "libmbfl/mbfl/mbfl_allocators.h"
  59. #include "php_variables.h"
  60. #include "php_globals.h"
  61. #include "rfc1867.h"
  62. #include "php_content_types.h"
  63. #include "SAPI.h"
  64. #include "php_unicode.h"
  65. #include "TSRM.h"
  66. #include "mb_gpc.h"
  67. #ifdef ZEND_MULTIBYTE
  68. #include "zend_multibyte.h"
  69. #endif /* ZEND_MULTIBYTE */
  70. #if HAVE_MBSTRING
  71. /* }}} */
  72. /* {{{ prototypes */
  73. static void _php_mb_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC);
  74. static void _php_mb_globals_dtor(zend_mbstring_globals *pglobals TSRMLS_DC);
  75. /* }}} */
  76. /* {{{ php_mb_default_identify_list */
  77. typedef struct _php_mb_nls_ident_list {
  78. enum mbfl_no_language lang;
  79. enum mbfl_no_encoding* list;
  80. int list_size;
  81. } php_mb_nls_ident_list;
  82. static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
  83. mbfl_no_encoding_ascii,
  84. mbfl_no_encoding_jis,
  85. mbfl_no_encoding_utf8,
  86. mbfl_no_encoding_euc_jp,
  87. mbfl_no_encoding_sjis
  88. };
  89. static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
  90. mbfl_no_encoding_ascii,
  91. mbfl_no_encoding_utf8,
  92. mbfl_no_encoding_euc_cn,
  93. mbfl_no_encoding_cp936
  94. };
  95. static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
  96. mbfl_no_encoding_ascii,
  97. mbfl_no_encoding_utf8,
  98. mbfl_no_encoding_euc_tw,
  99. mbfl_no_encoding_big5
  100. };
  101. static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
  102. mbfl_no_encoding_ascii,
  103. mbfl_no_encoding_utf8,
  104. mbfl_no_encoding_euc_kr,
  105. mbfl_no_encoding_uhc
  106. };
  107. static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
  108. mbfl_no_encoding_ascii,
  109. mbfl_no_encoding_utf8,
  110. mbfl_no_encoding_koi8r,
  111. mbfl_no_encoding_cp1251,
  112. mbfl_no_encoding_cp866
  113. };
  114. static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
  115. mbfl_no_encoding_ascii,
  116. mbfl_no_encoding_utf8,
  117. mbfl_no_encoding_armscii8
  118. };
  119. static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
  120. mbfl_no_encoding_ascii,
  121. mbfl_no_encoding_utf8,
  122. mbfl_no_encoding_8859_9
  123. };
  124. static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
  125. mbfl_no_encoding_ascii,
  126. mbfl_no_encoding_utf8
  127. };
  128. static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
  129. { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
  130. { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
  131. { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
  132. { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
  133. { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
  134. { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
  135. { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
  136. { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
  137. };
  138. /* }}} */
  139. static
  140. ZEND_BEGIN_ARG_INFO(third_and_rest_force_ref, 1)
  141. ZEND_ARG_PASS_INFO(0)
  142. ZEND_ARG_PASS_INFO(0)
  143. ZEND_END_ARG_INFO()
  144. /* {{{ mb_overload_def mb_ovld[] */
  145. static const struct mb_overload_def mb_ovld[] = {
  146. {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
  147. {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
  148. {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
  149. {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
  150. {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
  151. {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
  152. {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
  153. {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
  154. #if HAVE_MBREGEX
  155. {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
  156. {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
  157. {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
  158. {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
  159. {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
  160. #endif
  161. {0, NULL, NULL, NULL}
  162. };
  163. /* }}} */
  164. /* {{{ zend_function_entry mbstring_functions[] */
  165. zend_function_entry mbstring_functions[] = {
  166. PHP_FE(mb_convert_case, NULL)
  167. PHP_FE(mb_strtoupper, NULL)
  168. PHP_FE(mb_strtolower, NULL)
  169. PHP_FE(mb_language, NULL)
  170. PHP_FE(mb_internal_encoding, NULL)
  171. PHP_FE(mb_http_input, NULL)
  172. PHP_FE(mb_http_output, NULL)
  173. PHP_FE(mb_detect_order, NULL)
  174. PHP_FE(mb_substitute_character, NULL)
  175. PHP_FE(mb_parse_str, second_arg_force_ref)
  176. PHP_FE(mb_output_handler, NULL)
  177. PHP_FE(mb_preferred_mime_name, NULL)
  178. PHP_FE(mb_strlen, NULL)
  179. PHP_FE(mb_strpos, NULL)
  180. PHP_FE(mb_strrpos, NULL)
  181. PHP_FE(mb_substr_count, NULL)
  182. PHP_FE(mb_substr, NULL)
  183. PHP_FE(mb_strcut, NULL)
  184. PHP_FE(mb_strwidth, NULL)
  185. PHP_FE(mb_strimwidth, NULL)
  186. PHP_FE(mb_convert_encoding, NULL)
  187. PHP_FE(mb_detect_encoding, NULL)
  188. PHP_FE(mb_list_encodings, NULL)
  189. PHP_FE(mb_convert_kana, NULL)
  190. PHP_FE(mb_encode_mimeheader, NULL)
  191. PHP_FE(mb_decode_mimeheader, NULL)
  192. PHP_FE(mb_convert_variables, third_and_rest_force_ref)
  193. PHP_FE(mb_encode_numericentity, NULL)
  194. PHP_FE(mb_decode_numericentity, NULL)
  195. PHP_FE(mb_send_mail, NULL)
  196. PHP_FE(mb_get_info, NULL)
  197. #if HAVE_MBREGEX
  198. PHP_MBREGEX_FUNCTION_ENTRIES
  199. #endif
  200. { NULL, NULL, NULL }
  201. };
  202. /* }}} */
  203. /* {{{ zend_module_entry mbstring_module_entry */
  204. zend_module_entry mbstring_module_entry = {
  205. STANDARD_MODULE_HEADER,
  206. "mbstring",
  207. mbstring_functions,
  208. PHP_MINIT(mbstring),
  209. PHP_MSHUTDOWN(mbstring),
  210. PHP_RINIT(mbstring),
  211. PHP_RSHUTDOWN(mbstring),
  212. PHP_MINFO(mbstring),
  213. NO_VERSION_YET,
  214. STANDARD_MODULE_PROPERTIES
  215. };
  216. /* }}} */
  217. /* {{{ static sapi_post_entry php_post_entries[] */
  218. static sapi_post_entry php_post_entries[] = {
  219. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
  220. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  221. { NULL, 0, NULL, NULL }
  222. };
  223. /* }}} */
  224. ZEND_DECLARE_MODULE_GLOBALS(mbstring)
  225. #ifdef COMPILE_DL_MBSTRING
  226. ZEND_GET_MODULE(mbstring)
  227. # ifdef PHP_WIN32
  228. # include "zend_arg_defs.c"
  229. # endif
  230. #endif
  231. /* {{{ allocators */
  232. static void *_php_mb_allocators_malloc(unsigned int sz)
  233. {
  234. return emalloc(sz);
  235. }
  236. static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
  237. {
  238. return erealloc(ptr, sz);
  239. }
  240. static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
  241. {
  242. return ecalloc(nelems, szelem);
  243. }
  244. static void _php_mb_allocators_free(void *ptr)
  245. {
  246. efree(ptr);
  247. }
  248. static void *_php_mb_allocators_pmalloc(unsigned int sz)
  249. {
  250. return pemalloc(sz, 1);
  251. }
  252. static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
  253. {
  254. return perealloc(ptr, sz, 1);
  255. }
  256. static void _php_mb_allocators_pfree(void *ptr)
  257. {
  258. pefree(ptr, 1);
  259. }
  260. static mbfl_allocators _php_mb_allocators = {
  261. _php_mb_allocators_malloc,
  262. _php_mb_allocators_realloc,
  263. _php_mb_allocators_calloc,
  264. _php_mb_allocators_free,
  265. _php_mb_allocators_pmalloc,
  266. _php_mb_allocators_prealloc,
  267. _php_mb_allocators_pfree
  268. };
  269. /* }}} */
  270. /* {{{ static sapi_post_entry mbstr_post_entries[] */
  271. static sapi_post_entry mbstr_post_entries[] = {
  272. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
  273. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  274. { NULL, 0, NULL, NULL }
  275. };
  276. /* }}} */
  277. /* {{{ static int php_mb_parse_encoding_list()
  278. * Return 0 if input contains any illegal encoding, otherwise 1.
  279. * Even if any illegal encoding is detected the result may contain a list
  280. * of parsed encodings.
  281. */
  282. static int
  283. php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
  284. {
  285. int n, l, size, bauto, ret = 1;
  286. char *p, *p1, *p2, *endp, *tmpstr;
  287. enum mbfl_no_encoding no_encoding;
  288. enum mbfl_no_encoding *src, *entry, *list;
  289. list = NULL;
  290. if (value == NULL || value_length <= 0) {
  291. if (return_list) {
  292. *return_list = NULL;
  293. }
  294. if (return_size) {
  295. *return_size = 0;
  296. }
  297. return 0;
  298. } else {
  299. enum mbfl_no_encoding *identify_list;
  300. int identify_list_size;
  301. identify_list = MBSTRG(default_detect_order_list);
  302. identify_list_size = MBSTRG(default_detect_order_list_size);
  303. /* copy the value string for work */
  304. if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
  305. tmpstr = (char *)estrndup(value+1, value_length-2);
  306. value_length -= 2;
  307. }
  308. else
  309. tmpstr = (char *)estrndup(value, value_length);
  310. if (tmpstr == NULL) {
  311. return 0;
  312. }
  313. /* count the number of listed encoding names */
  314. endp = tmpstr + value_length;
  315. n = 1;
  316. p1 = tmpstr;
  317. while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
  318. p1 = p2 + 1;
  319. n++;
  320. }
  321. size = n + identify_list_size;
  322. /* make list */
  323. list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
  324. if (list != NULL) {
  325. entry = list;
  326. n = 0;
  327. bauto = 0;
  328. p1 = tmpstr;
  329. do {
  330. p2 = p = php_memnstr(p1, ",", 1, endp);
  331. if (p == NULL) {
  332. p = endp;
  333. }
  334. *p = '\0';
  335. /* trim spaces */
  336. while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
  337. p1++;
  338. }
  339. p--;
  340. while (p > p1 && (*p == ' ' || *p == '\t')) {
  341. *p = '\0';
  342. p--;
  343. }
  344. /* convert to the encoding number and check encoding */
  345. if (strcasecmp(p1, "auto") == 0) {
  346. if (!bauto) {
  347. bauto = 1;
  348. l = identify_list_size;
  349. src = identify_list;
  350. while (l > 0) {
  351. *entry++ = *src++;
  352. l--;
  353. n++;
  354. }
  355. }
  356. } else {
  357. no_encoding = mbfl_name2no_encoding(p1);
  358. if (no_encoding != mbfl_no_encoding_invalid) {
  359. *entry++ = no_encoding;
  360. n++;
  361. } else {
  362. ret = 0;
  363. }
  364. }
  365. p1 = p2 + 1;
  366. } while (n < size && p2 != NULL);
  367. if (n > 0) {
  368. if (return_list) {
  369. *return_list = list;
  370. } else {
  371. pefree(list, persistent);
  372. }
  373. } else {
  374. pefree(list, persistent);
  375. if (return_list) {
  376. *return_list = NULL;
  377. }
  378. ret = 0;
  379. }
  380. if (return_size) {
  381. *return_size = n;
  382. }
  383. } else {
  384. if (return_list) {
  385. *return_list = NULL;
  386. }
  387. if (return_size) {
  388. *return_size = 0;
  389. }
  390. ret = 0;
  391. }
  392. efree(tmpstr);
  393. }
  394. return ret;
  395. }
  396. /* }}} */
  397. /* {{{ MBSTRING_API php_mb_check_encoding_list */
  398. MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
  399. return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
  400. }
  401. /* }}} */
  402. /* {{{ static int php_mb_parse_encoding_array()
  403. * Return 0 if input contains any illegal encoding, otherwise 1.
  404. * Even if any illegal encoding is detected the result may contain a list
  405. * of parsed encodings.
  406. */
  407. static int
  408. php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
  409. {
  410. zval **hash_entry;
  411. HashTable *target_hash;
  412. int i, n, l, size, bauto,ret = 1;
  413. enum mbfl_no_encoding no_encoding;
  414. enum mbfl_no_encoding *src, *list, *entry;
  415. list = NULL;
  416. if (Z_TYPE_P(array) == IS_ARRAY) {
  417. enum mbfl_no_encoding *identify_list;
  418. int identify_list_size;
  419. identify_list = MBSTRG(default_detect_order_list);
  420. identify_list_size = MBSTRG(default_detect_order_list_size);
  421. target_hash = Z_ARRVAL_P(array);
  422. zend_hash_internal_pointer_reset(target_hash);
  423. i = zend_hash_num_elements(target_hash);
  424. size = i + identify_list_size;
  425. list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
  426. if (list != NULL) {
  427. entry = list;
  428. bauto = 0;
  429. n = 0;
  430. while (i > 0) {
  431. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  432. break;
  433. }
  434. convert_to_string_ex(hash_entry);
  435. if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
  436. if (!bauto) {
  437. bauto = 1;
  438. l = identify_list_size;
  439. src = identify_list;
  440. while (l > 0) {
  441. *entry++ = *src++;
  442. l--;
  443. n++;
  444. }
  445. }
  446. } else {
  447. no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
  448. if (no_encoding != mbfl_no_encoding_invalid) {
  449. *entry++ = no_encoding;
  450. n++;
  451. } else {
  452. ret = 0;
  453. }
  454. }
  455. zend_hash_move_forward(target_hash);
  456. i--;
  457. }
  458. if (n > 0) {
  459. if (return_list) {
  460. *return_list = list;
  461. } else {
  462. pefree(list, persistent);
  463. }
  464. } else {
  465. pefree(list, persistent);
  466. if (return_list) {
  467. *return_list = NULL;
  468. }
  469. ret = 0;
  470. }
  471. if (return_size) {
  472. *return_size = n;
  473. }
  474. } else {
  475. if (return_list) {
  476. *return_list = NULL;
  477. }
  478. if (return_size) {
  479. *return_size = 0;
  480. }
  481. ret = 0;
  482. }
  483. }
  484. return ret;
  485. }
  486. /* }}} */
  487. /* {{{ php_mb_nls_get_default_detect_order_list */
  488. static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size)
  489. {
  490. size_t i;
  491. *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  492. *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  493. for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
  494. if (php_mb_default_identify_list[i].lang == lang) {
  495. *plist = php_mb_default_identify_list[i].list;
  496. *plist_size = php_mb_default_identify_list[i].list_size;
  497. return 1;
  498. }
  499. }
  500. return 0;
  501. }
  502. /* }}} */
  503. /* {{{ php.ini directive handler */
  504. static PHP_INI_MH(OnUpdate_mbstring_language)
  505. {
  506. enum mbfl_no_language no_language;
  507. no_language = mbfl_name2no_language(new_value);
  508. if (no_language == mbfl_no_language_invalid) {
  509. return FAILURE;
  510. }
  511. MBSTRG(language) = no_language;
  512. php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
  513. return SUCCESS;
  514. }
  515. /* }}} */
  516. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
  517. static PHP_INI_MH(OnUpdate_mbstring_detect_order)
  518. {
  519. enum mbfl_no_encoding *list;
  520. int size;
  521. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  522. if (MBSTRG(detect_order_list) != NULL) {
  523. free(MBSTRG(detect_order_list));
  524. }
  525. MBSTRG(detect_order_list) = list;
  526. MBSTRG(detect_order_list_size) = size;
  527. } else {
  528. return FAILURE;
  529. }
  530. return SUCCESS;
  531. }
  532. /* }}} */
  533. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
  534. static PHP_INI_MH(OnUpdate_mbstring_http_input)
  535. {
  536. enum mbfl_no_encoding *list;
  537. int size;
  538. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  539. if (MBSTRG(http_input_list) != NULL) {
  540. free(MBSTRG(http_input_list));
  541. }
  542. MBSTRG(http_input_list) = list;
  543. MBSTRG(http_input_list_size) = size;
  544. } else {
  545. return FAILURE;
  546. }
  547. return SUCCESS;
  548. }
  549. /* }}} */
  550. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
  551. static PHP_INI_MH(OnUpdate_mbstring_http_output)
  552. {
  553. enum mbfl_no_encoding no_encoding;
  554. no_encoding = mbfl_name2no_encoding(new_value);
  555. if (no_encoding != mbfl_no_encoding_invalid) {
  556. MBSTRG(http_output_encoding) = no_encoding;
  557. MBSTRG(current_http_output_encoding) = no_encoding;
  558. } else {
  559. if (new_value != NULL && new_value_length > 0) {
  560. return FAILURE;
  561. }
  562. }
  563. return SUCCESS;
  564. }
  565. /* }}} */
  566. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
  567. static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
  568. {
  569. enum mbfl_no_encoding no_encoding;
  570. if (new_value == NULL) {
  571. return SUCCESS;
  572. }
  573. no_encoding = mbfl_name2no_encoding(new_value);
  574. if (no_encoding != mbfl_no_encoding_invalid) {
  575. MBSTRG(internal_encoding) = no_encoding;
  576. MBSTRG(current_internal_encoding) = no_encoding;
  577. #if HAVE_MBREGEX
  578. {
  579. OnigEncoding mbctype;
  580. mbctype = php_mb_regex_name2mbctype(new_value);
  581. if (mbctype == ONIG_ENCODING_UNDEF) {
  582. mbctype = ONIG_ENCODING_EUC_JP;
  583. }
  584. MBSTRG(current_mbctype) = MBSTRG(default_mbctype) = mbctype;
  585. }
  586. #endif
  587. #ifdef ZEND_MULTIBYTE
  588. zend_multibyte_set_internal_encoding(new_value, new_value_length TSRMLS_CC);
  589. #endif /* ZEND_MULTIBYTE */
  590. } else {
  591. if (new_value != NULL && new_value_length > 0) {
  592. return FAILURE;
  593. }
  594. }
  595. return SUCCESS;
  596. }
  597. /* }}} */
  598. #ifdef ZEND_MULTIBYTE
  599. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */
  600. static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
  601. {
  602. int *list, size;
  603. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  604. if (MBSTRG(script_encoding_list) != NULL) {
  605. free(MBSTRG(script_encoding_list));
  606. }
  607. MBSTRG(script_encoding_list) = list;
  608. MBSTRG(script_encoding_list_size) = size;
  609. } else {
  610. return FAILURE;
  611. }
  612. return SUCCESS;
  613. }
  614. /* }}} */
  615. #endif /* ZEND_MULTIBYTE */
  616. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
  617. static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
  618. {
  619. if (new_value != NULL) {
  620. if (strcasecmp("none", new_value) == 0) {
  621. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  622. } else if (strcasecmp("long", new_value) == 0) {
  623. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  624. } else {
  625. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  626. MBSTRG(filter_illegal_substchar) = zend_atoi(new_value, new_value_length);
  627. }
  628. }
  629. return SUCCESS;
  630. }
  631. /* }}} */
  632. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
  633. static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
  634. {
  635. if (new_value == NULL) {
  636. return FAILURE;
  637. }
  638. OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
  639. if (MBSTRG(encoding_translation)) {
  640. sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
  641. sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
  642. sapi_register_treat_data(mbstr_treat_data);
  643. } else {
  644. sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
  645. sapi_register_post_entries(php_post_entries TSRMLS_CC);
  646. }
  647. return SUCCESS;
  648. }
  649. /* }}} */
  650. /* {{{ php.ini directive registration */
  651. PHP_INI_BEGIN()
  652. PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_SYSTEM | PHP_INI_PERDIR, OnUpdate_mbstring_language)
  653. PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
  654. PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
  655. PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
  656. PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
  657. #ifdef ZEND_MULTIBYTE
  658. PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
  659. #endif /* ZEND_MULTIBYTE */
  660. PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
  661. STD_PHP_INI_ENTRY("mbstring.func_overload", "0", PHP_INI_SYSTEM |
  662. PHP_INI_PERDIR, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
  663. STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
  664. PHP_INI_SYSTEM | PHP_INI_PERDIR, OnUpdate_mbstring_encoding_translation,
  665. encoding_translation, zend_mbstring_globals, mbstring_globals)
  666. STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
  667. PHP_INI_ALL, OnUpdateLong, strict_detection, zend_mbstring_globals, mbstring_globals)
  668. PHP_INI_END()
  669. /* }}} */
  670. /* {{{ module global initialize handler */
  671. static void _php_mb_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC)
  672. {
  673. MBSTRG(language) = mbfl_no_language_uni;
  674. MBSTRG(current_language) = MBSTRG(language);
  675. MBSTRG(internal_encoding) = mbfl_no_encoding_invalid;
  676. MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
  677. #ifdef ZEND_MULTIBYTE
  678. MBSTRG(script_encoding_list) = NULL;
  679. MBSTRG(script_encoding_list_size) = 0;
  680. #endif /* ZEND_MULTIBYTE */
  681. MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
  682. MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
  683. MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
  684. MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
  685. MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
  686. MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
  687. MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
  688. MBSTRG(http_input_list) = NULL;
  689. MBSTRG(http_input_list_size) = 0;
  690. MBSTRG(detect_order_list) = NULL;
  691. MBSTRG(detect_order_list_size) = 0;
  692. MBSTRG(current_detect_order_list) = NULL;
  693. MBSTRG(current_detect_order_list_size) = 0;
  694. MBSTRG(default_detect_order_list) = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  695. MBSTRG(default_detect_order_list_size) = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  696. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  697. MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
  698. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  699. MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
  700. MBSTRG(func_overload) = 0;
  701. MBSTRG(encoding_translation) = 0;
  702. MBSTRG(strict_detection) = 0;
  703. pglobals->outconv = NULL;
  704. #if HAVE_MBREGEX
  705. _php_mb_regex_globals_ctor(pglobals TSRMLS_CC);
  706. #endif
  707. }
  708. /* }}} */
  709. /* {{{ static void _php_mb_globals_dtor() */
  710. static void _php_mb_globals_dtor(zend_mbstring_globals *pglobals TSRMLS_DC)
  711. {
  712. #if HAVE_MBREGEX
  713. _php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
  714. #endif
  715. }
  716. /* }}} */
  717. /* {{{ PHP_MINIT_FUNCTION(mbstring) */
  718. PHP_MINIT_FUNCTION(mbstring)
  719. {
  720. __mbfl_allocators = &_php_mb_allocators;
  721. #ifdef ZTS
  722. ts_allocate_id(&mbstring_globals_id, sizeof(zend_mbstring_globals),
  723. (ts_allocate_ctor) _php_mb_globals_ctor,
  724. (ts_allocate_dtor) _php_mb_globals_dtor);
  725. #else
  726. _php_mb_globals_ctor(&mbstring_globals TSRMLS_CC);
  727. #endif
  728. REGISTER_INI_ENTRIES();
  729. if (MBSTRG(encoding_translation)) {
  730. sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
  731. sapi_register_treat_data(mbstr_treat_data);
  732. }
  733. REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
  734. REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
  735. REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
  736. REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
  737. REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
  738. REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
  739. #if HAVE_MBREGEX
  740. PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  741. #endif
  742. return SUCCESS;
  743. }
  744. /* }}} */
  745. /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
  746. PHP_MSHUTDOWN_FUNCTION(mbstring)
  747. {
  748. UNREGISTER_INI_ENTRIES();
  749. if (MBSTRG(http_input_list)) {
  750. free(MBSTRG(http_input_list));
  751. }
  752. #ifdef ZEND_MULTIBYTE
  753. if (MBSTRG(script_encoding_list)) {
  754. free(MBSTRG(script_encoding_list));
  755. }
  756. #endif /* ZEND_MULTIBYTE */
  757. if (MBSTRG(detect_order_list)) {
  758. free(MBSTRG(detect_order_list));
  759. }
  760. #if HAVE_MBREGEX
  761. PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  762. #endif
  763. #ifdef ZTS
  764. ts_free_id(mbstring_globals_id);
  765. #else
  766. _php_mb_globals_dtor(&mbstring_globals TSRMLS_CC);
  767. #endif
  768. return SUCCESS;
  769. }
  770. /* }}} */
  771. /* {{{ PHP_RINIT_FUNCTION(mbstring) */
  772. PHP_RINIT_FUNCTION(mbstring)
  773. {
  774. int n;
  775. enum mbfl_no_encoding *list=NULL, *entry;
  776. zend_function *func, *orig;
  777. const struct mb_overload_def *p;
  778. MBSTRG(current_language) = MBSTRG(language);
  779. if (MBSTRG(internal_encoding) == mbfl_no_encoding_invalid) {
  780. char *default_enc = NULL;
  781. switch (MBSTRG(current_language)) {
  782. case mbfl_no_language_uni:
  783. default_enc = "UTF-8";
  784. break;
  785. case mbfl_no_language_japanese:
  786. default_enc = "EUC-JP";
  787. break;
  788. case mbfl_no_language_korean:
  789. default_enc = "EUC-KR";
  790. break;
  791. case mbfl_no_language_simplified_chinese:
  792. default_enc = "EUC-CN";
  793. break;
  794. case mbfl_no_language_traditional_chinese:
  795. default_enc = "EUC-TW";
  796. break;
  797. case mbfl_no_language_russian:
  798. default_enc = "KOI8-R";
  799. break;
  800. case mbfl_no_language_german:
  801. default_enc = "ISO-8859-15";
  802. break;
  803. case mbfl_no_language_armenian:
  804. default_enc = "ArmSCII-8";
  805. break;
  806. case mbfl_no_language_turkish:
  807. default_enc = "ISO-8859-9";
  808. break;
  809. case mbfl_no_language_english:
  810. default:
  811. default_enc = "ISO-8859-1";
  812. break;
  813. }
  814. if (default_enc) {
  815. zend_alter_ini_entry("mbstring.internal_encoding",
  816. sizeof("mbstring.internal_encoding"),
  817. default_enc, strlen(default_enc),
  818. PHP_INI_PERDIR, PHP_INI_STAGE_RUNTIME);
  819. }
  820. }
  821. MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
  822. MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
  823. MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
  824. MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
  825. n = 0;
  826. if (MBSTRG(detect_order_list)) {
  827. list = MBSTRG(detect_order_list);
  828. n = MBSTRG(detect_order_list_size);
  829. }
  830. if (n <= 0) {
  831. list = MBSTRG(default_detect_order_list);
  832. n = MBSTRG(default_detect_order_list_size);
  833. }
  834. entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0);
  835. MBSTRG(current_detect_order_list) = entry;
  836. MBSTRG(current_detect_order_list_size) = n;
  837. while (n > 0) {
  838. *entry++ = *list++;
  839. n--;
  840. }
  841. /* override original function. */
  842. if (MBSTRG(func_overload)){
  843. p = &(mb_ovld[0]);
  844. while (p->type > 0) {
  845. if ((MBSTRG(func_overload) & p->type) == p->type &&
  846. zend_hash_find(EG(function_table), p->save_func,
  847. strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
  848. zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
  849. if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
  850. php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
  851. return FAILURE;
  852. } else {
  853. zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
  854. if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
  855. NULL) == FAILURE) {
  856. php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
  857. return FAILURE;
  858. }
  859. }
  860. }
  861. p++;
  862. }
  863. }
  864. #if HAVE_MBREGEX
  865. PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  866. #endif
  867. #ifdef ZEND_MULTIBYTE
  868. php_mb_set_zend_encoding(TSRMLS_C);
  869. #endif /* ZEND_MULTIBYTE */
  870. return SUCCESS;
  871. }
  872. /* }}} */
  873. /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
  874. PHP_RSHUTDOWN_FUNCTION(mbstring)
  875. {
  876. const struct mb_overload_def *p;
  877. zend_function *orig;
  878. if (MBSTRG(current_detect_order_list) != NULL) {
  879. efree(MBSTRG(current_detect_order_list));
  880. MBSTRG(current_detect_order_list) = NULL;
  881. MBSTRG(current_detect_order_list_size) = 0;
  882. }
  883. if (MBSTRG(outconv) != NULL) {
  884. mbfl_buffer_converter_delete(MBSTRG(outconv));
  885. MBSTRG(outconv) = NULL;
  886. }
  887. /* clear http input identification. */
  888. MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
  889. MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
  890. MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
  891. MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
  892. MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
  893. /* clear overloaded function. */
  894. if (MBSTRG(func_overload)){
  895. p = &(mb_ovld[0]);
  896. while (p->type > 0 && zend_hash_find(EG(function_table), p->save_func, strlen(p->save_func)+1 , (void **)&orig) == SUCCESS) {
  897. zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
  898. zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
  899. p++;
  900. }
  901. }
  902. #if HAVE_MBREGEX
  903. PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  904. #endif
  905. return SUCCESS;
  906. }
  907. /* }}} */
  908. /* {{{ PHP_MINFO_FUNCTION(mbstring) */
  909. PHP_MINFO_FUNCTION(mbstring)
  910. {
  911. php_info_print_table_start();
  912. php_info_print_table_row(2, "Multibyte Support", "enabled");
  913. php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
  914. if (MBSTRG(encoding_translation)) {
  915. php_info_print_table_row(2, "HTTP input encoding translation", "enabled");
  916. }
  917. #if defined(HAVE_MBREGEX)
  918. {
  919. char buf[32];
  920. php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
  921. sprintf(buf, "%d.%d.%d",
  922. ONIGURUMA_VERSION_MAJOR,ONIGURUMA_VERSION_MINOR,ONIGURUMA_VERSION_TEENY);
  923. php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
  924. }
  925. #endif
  926. php_info_print_table_end();
  927. php_info_print_table_start();
  928. php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
  929. php_info_print_table_end();
  930. DISPLAY_INI_ENTRIES();
  931. }
  932. /* }}} */
  933. /* {{{ proto string mb_language([string language])
  934. Sets the current language or Returns the current language as a string */
  935. PHP_FUNCTION(mb_language)
  936. {
  937. char *name = NULL;
  938. int name_len = 0;
  939. enum mbfl_no_language no_language;
  940. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
  941. return;
  942. }
  943. if (name == NULL) {
  944. RETURN_STRING((char *)mbfl_no_language2name(MBSTRG(current_language)), 1);
  945. } else {
  946. no_language = mbfl_name2no_language(name);
  947. if (no_language == mbfl_no_language_invalid) {
  948. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
  949. RETURN_FALSE;
  950. } else {
  951. php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
  952. MBSTRG(current_language) = no_language;
  953. RETURN_TRUE;
  954. }
  955. }
  956. }
  957. /* }}} */
  958. /* {{{ proto string mb_internal_encoding([string encoding])
  959. Sets the current internal encoding or Returns the current internal encoding as a string */
  960. PHP_FUNCTION(mb_internal_encoding)
  961. {
  962. char *name = NULL;
  963. int name_len;
  964. enum mbfl_no_encoding no_encoding;
  965. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
  966. RETURN_FALSE;
  967. }
  968. if (name == NULL) {
  969. name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
  970. if (name != NULL) {
  971. RETURN_STRING(name, 1);
  972. } else {
  973. RETURN_FALSE;
  974. }
  975. } else {
  976. no_encoding = mbfl_name2no_encoding(name);
  977. if (no_encoding == mbfl_no_encoding_invalid) {
  978. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  979. RETURN_FALSE;
  980. } else {
  981. MBSTRG(current_internal_encoding) = no_encoding;
  982. #ifdef ZEND_MULTIBYTE
  983. /* TODO: make independent from mbstring.encoding_translation? */
  984. if (MBSTRG(encoding_translation)) {
  985. zend_multibyte_set_internal_encoding(name, name_len TSRMLS_CC);
  986. }
  987. #endif /* ZEND_MULTIBYTE */
  988. RETURN_TRUE;
  989. }
  990. }
  991. }
  992. /* }}} */
  993. /* {{{ proto mixed mb_http_input([string type])
  994. Returns the input encoding */
  995. PHP_FUNCTION(mb_http_input)
  996. {
  997. char *typ = NULL;
  998. int typ_len;
  999. int retname, n;
  1000. char *name, *list, *temp;
  1001. enum mbfl_no_encoding *entry;
  1002. enum mbfl_no_encoding result = mbfl_no_encoding_invalid;
  1003. retname = 1;
  1004. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
  1005. RETURN_FALSE;
  1006. }
  1007. if (typ == NULL) {
  1008. result = MBSTRG(http_input_identify);
  1009. } else {
  1010. switch (*typ) {
  1011. case 'G':
  1012. case 'g':
  1013. result = MBSTRG(http_input_identify_get);
  1014. break;
  1015. case 'P':
  1016. case 'p':
  1017. result = MBSTRG(http_input_identify_post);
  1018. break;
  1019. case 'C':
  1020. case 'c':
  1021. result = MBSTRG(http_input_identify_cookie);
  1022. break;
  1023. case 'S':
  1024. case 's':
  1025. result = MBSTRG(http_input_identify_string);
  1026. break;
  1027. case 'I':
  1028. case 'i':
  1029. array_init(return_value);
  1030. entry = MBSTRG(http_input_list);
  1031. n = MBSTRG(http_input_list_size);
  1032. while (n > 0) {
  1033. name = (char *)mbfl_no_encoding2name(*entry);
  1034. if (name) {
  1035. add_next_index_string(return_value, name, 1);
  1036. }
  1037. entry++;
  1038. n--;
  1039. }
  1040. retname = 0;
  1041. break;
  1042. case 'L':
  1043. case 'l':
  1044. entry = MBSTRG(http_input_list);
  1045. n = MBSTRG(http_input_list_size);
  1046. list = NULL;
  1047. while (n > 0) {
  1048. name = (char *)mbfl_no_encoding2name(*entry);
  1049. if (name) {
  1050. if (list) {
  1051. temp = list;
  1052. spprintf(&list, 0, "%s,%s", temp, name);
  1053. efree(temp);
  1054. if (!list) {
  1055. break;
  1056. }
  1057. } else {
  1058. list = estrdup(name);
  1059. }
  1060. }
  1061. entry++;
  1062. n--;
  1063. }
  1064. if (!list) {
  1065. RETURN_FALSE;
  1066. }
  1067. RETVAL_STRING(list, 0);
  1068. retname = 0;
  1069. break;
  1070. default:
  1071. result = MBSTRG(http_input_identify);
  1072. break;
  1073. }
  1074. }
  1075. if (retname) {
  1076. if (result != mbfl_no_encoding_invalid &&
  1077. (name = (char *)mbfl_no_encoding2name(result)) != NULL) {
  1078. RETVAL_STRING(name, 1);
  1079. } else {
  1080. RETVAL_FALSE;
  1081. }
  1082. }
  1083. }
  1084. /* }}} */
  1085. /* {{{ proto string mb_http_output([string encoding])
  1086. Sets the current output_encoding or returns the current output_encoding as a string */
  1087. PHP_FUNCTION(mb_http_output)
  1088. {
  1089. char *name = NULL;
  1090. int name_len;
  1091. enum mbfl_no_encoding no_encoding;
  1092. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
  1093. RETURN_FALSE;
  1094. }
  1095. if (name == NULL) {
  1096. name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding));
  1097. if (name != NULL) {
  1098. RETURN_STRING(name, 1);
  1099. } else {
  1100. RETURN_FALSE;
  1101. }
  1102. } else {
  1103. no_encoding = mbfl_name2no_encoding(name);
  1104. if (no_encoding == mbfl_no_encoding_invalid) {
  1105. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1106. RETURN_FALSE;
  1107. } else {
  1108. MBSTRG(current_http_output_encoding) = no_encoding;
  1109. RETURN_TRUE;
  1110. }
  1111. }
  1112. }
  1113. /* }}} */
  1114. /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
  1115. Sets the current detect_order or Return the current detect_order as a array */
  1116. PHP_FUNCTION(mb_detect_order)
  1117. {
  1118. zval **arg1;
  1119. int n, size;
  1120. enum mbfl_no_encoding *list, *entry;
  1121. char *name;
  1122. if (ZEND_NUM_ARGS() == 0) {
  1123. array_init(return_value);
  1124. entry = MBSTRG(current_detect_order_list);
  1125. n = MBSTRG(current_detect_order_list_size);
  1126. while (n > 0) {
  1127. name = (char *)mbfl_no_encoding2name(*entry);
  1128. if (name) {
  1129. add_next_index_string(return_value, name, 1);
  1130. }
  1131. entry++;
  1132. n--;
  1133. }
  1134. } else if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) {
  1135. list = NULL;
  1136. size = 0;
  1137. switch (Z_TYPE_PP(arg1)) {
  1138. case IS_ARRAY:
  1139. if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
  1140. if (list) {
  1141. efree(list);
  1142. }
  1143. RETURN_FALSE;
  1144. }
  1145. break;
  1146. default:
  1147. convert_to_string_ex(arg1);
  1148. if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
  1149. if (list) {
  1150. efree(list);
  1151. }
  1152. RETURN_FALSE;
  1153. }
  1154. break;
  1155. }
  1156. if (list == NULL) {
  1157. RETVAL_FALSE;
  1158. } else {
  1159. if (MBSTRG(current_detect_order_list)) {
  1160. efree(MBSTRG(current_detect_order_list));
  1161. }
  1162. MBSTRG(current_detect_order_list) = list;
  1163. MBSTRG(current_detect_order_list_size) = size;
  1164. RETVAL_TRUE;
  1165. }
  1166. } else {
  1167. WRONG_PARAM_COUNT;
  1168. }
  1169. }
  1170. /* }}} */
  1171. /* {{{ proto mixed mb_substitute_character([mixed substchar])
  1172. Sets the current substitute_character or returns the current substitute_character */
  1173. PHP_FUNCTION(mb_substitute_character)
  1174. {
  1175. zval **arg1;
  1176. if (ZEND_NUM_ARGS() == 0) {
  1177. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  1178. RETVAL_STRING("none", 1);
  1179. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  1180. RETVAL_STRING("long", 1);
  1181. } else {
  1182. RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
  1183. }
  1184. } else if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) {
  1185. RETVAL_TRUE;
  1186. switch (Z_TYPE_PP(arg1)) {
  1187. case IS_STRING:
  1188. if (strcasecmp("none", Z_STRVAL_PP(arg1)) == 0) {
  1189. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1190. } else if (strcasecmp("long", Z_STRVAL_PP(arg1)) == 0) {
  1191. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1192. } else {
  1193. convert_to_long_ex(arg1);
  1194. if (Z_LVAL_PP(arg1)< 0xffff && Z_LVAL_PP(arg1)> 0x0) {
  1195. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1196. MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
  1197. } else {
  1198. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
  1199. RETVAL_FALSE;
  1200. }
  1201. }
  1202. break;
  1203. default:
  1204. convert_to_long_ex(arg1);
  1205. if (Z_LVAL_PP(arg1)< 0xffff && Z_LVAL_PP(arg1)> 0x0) {
  1206. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1207. MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
  1208. } else {
  1209. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
  1210. RETVAL_FALSE;
  1211. }
  1212. break;
  1213. }
  1214. } else {
  1215. WRONG_PARAM_COUNT;
  1216. }
  1217. }
  1218. /* }}} */
  1219. /* {{{ proto string mb_preferred_mime_name(string encoding)
  1220. Return the preferred MIME name (charset) as a string */
  1221. PHP_FUNCTION(mb_preferred_mime_name)
  1222. {
  1223. enum mbfl_no_encoding no_encoding;
  1224. char *name = NULL;
  1225. int name_len;
  1226. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
  1227. return;
  1228. } else {
  1229. no_encoding = mbfl_name2no_encoding(name);
  1230. if (no_encoding == mbfl_no_encoding_invalid) {
  1231. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1232. RETVAL_FALSE;
  1233. } else {
  1234. const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
  1235. if (preferred_name == NULL || *preferred_name == '\0') {
  1236. php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
  1237. RETVAL_FALSE;
  1238. } else {
  1239. RETVAL_STRING((char *)preferred_name, 1);
  1240. }
  1241. }
  1242. }
  1243. }
  1244. /* }}} */
  1245. #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
  1246. #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
  1247. /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
  1248. Parses GET/POST/COOKIE data and sets global variables */
  1249. PHP_FUNCTION(mb_parse_str)
  1250. {
  1251. zval *track_vars_array;
  1252. char *encstr = NULL;
  1253. int encstr_len;
  1254. php_mb_encoding_handler_info_t info;
  1255. enum mbfl_no_encoding detected;
  1256. track_vars_array = NULL;
  1257. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
  1258. return;
  1259. }
  1260. /* Clear out the array */
  1261. if (track_vars_array != NULL) {
  1262. zval_dtor(track_vars_array);
  1263. array_init(track_vars_array);
  1264. }
  1265. encstr = estrndup(encstr, encstr_len);
  1266. info.data_type = PARSE_STRING;
  1267. info.separator = PG(arg_separator).input;
  1268. info.force_register_globals = (track_vars_array == NULL);
  1269. info.report_errors = 1;
  1270. info.to_encoding = MBSTRG(current_internal_encoding);
  1271. info.to_language = MBSTRG(current_language);
  1272. info.from_encodings = MBSTRG(http_input_list);
  1273. info.num_from_encodings = MBSTRG(http_input_list_size);
  1274. info.from_language = MBSTRG(current_language);
  1275. detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
  1276. MBSTRG(http_input_identify) = detected;
  1277. RETVAL_BOOL(detected != mbfl_no_encoding_invalid);
  1278. if (encstr != NULL) efree(encstr);
  1279. }
  1280. /* }}} */
  1281. /* {{{ proto string mb_output_handler(string contents, int status)
  1282. Returns string in output buffer converted to the http_output encoding */
  1283. PHP_FUNCTION(mb_output_handler)
  1284. {
  1285. char *arg_string;
  1286. int arg_string_len;
  1287. long arg_status;
  1288. mbfl_string string, result;
  1289. const char *charset;
  1290. char *p;
  1291. enum mbfl_no_encoding encoding;
  1292. int last_feed, len;
  1293. unsigned char send_text_mimetype = 0;
  1294. char *s, *mimetype = NULL;
  1295. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
  1296. return;
  1297. }
  1298. encoding = MBSTRG(current_http_output_encoding);
  1299. /* start phase only */
  1300. if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
  1301. /* delete the converter just in case. */
  1302. if (MBSTRG(outconv)) {
  1303. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1304. MBSTRG(outconv) = NULL;
  1305. }
  1306. if (encoding == mbfl_no_encoding_pass) {
  1307. RETURN_STRINGL(arg_string, arg_string_len, 1);
  1308. }
  1309. /* analyze mime type */
  1310. if (SG(sapi_headers).mimetype &&
  1311. strncmp(SG(sapi_headers).mimetype, "text/", 5) == 0) {
  1312. if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
  1313. mimetype = estrdup(SG(sapi_headers).mimetype);
  1314. } else {
  1315. mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
  1316. }
  1317. send_text_mimetype = 1;
  1318. } else if (SG(sapi_headers).send_default_content_type) {
  1319. mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
  1320. }
  1321. /* if content-type is not yet set, set it and activate the converter */
  1322. if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
  1323. charset = mbfl_no2preferred_mime_name(encoding);
  1324. if (charset) {
  1325. len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
  1326. if (sapi_add_header(p, len, 0) != FAILURE) {
  1327. SG(sapi_headers).send_default_content_type = 0;
  1328. }
  1329. }
  1330. /* activate the converter */
  1331. MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
  1332. if (send_text_mimetype){
  1333. efree(mimetype);
  1334. }
  1335. }
  1336. }
  1337. /* just return if the converter is not activated. */
  1338. if (MBSTRG(outconv) == NULL) {
  1339. RETURN_STRINGL(arg_string, arg_string_len, 1);
  1340. }
  1341. /* flag */
  1342. last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
  1343. /* mode */
  1344. mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
  1345. mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
  1346. /* feed the string */
  1347. mbfl_string_init(&string);
  1348. string.no_language = MBSTRG(current_language);
  1349. string.no_encoding = MBSTRG(current_internal_encoding);
  1350. string.val = (unsigned char *)arg_string;
  1351. string.len = arg_string_len;
  1352. mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
  1353. if (last_feed) {
  1354. mbfl_buffer_converter_flush(MBSTRG(outconv));
  1355. }
  1356. /* get the converter output, and return it */
  1357. mbfl_buffer_converter_result(MBSTRG(outconv), &result);
  1358. RETVAL_STRINGL((char *)result.val, result.len, 0); /* the string is already strdup()'ed */
  1359. /* delete the converter if it is the last feed. */
  1360. if (last_feed) {
  1361. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1362. MBSTRG(outconv) = NULL;
  1363. }
  1364. }
  1365. /* }}} */
  1366. /* {{{ proto int mb_strlen(string str [, string encoding])
  1367. Get character numbers of a string */
  1368. PHP_FUNCTION(mb_strlen)
  1369. {
  1370. int n;
  1371. mbfl_string string;
  1372. char *enc_name = NULL;
  1373. int enc_name_len;
  1374. mbfl_string_init(&string);
  1375. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
  1376. RETURN_FALSE;
  1377. }
  1378. string.no_language = MBSTRG(current_language);
  1379. if (enc_name == NULL) {
  1380. string.no_encoding = MBSTRG(current_internal_encoding);
  1381. } else {
  1382. string.no_encoding = mbfl_name2no_encoding(enc_name);
  1383. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1384. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1385. RETURN_FALSE;
  1386. }
  1387. }
  1388. n = mbfl_strlen(&string);
  1389. if (n >= 0) {
  1390. RETVAL_LONG(n);
  1391. } else {
  1392. RETVAL_FALSE;
  1393. }
  1394. }
  1395. /* }}} */
  1396. /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
  1397. Find position of first occurrence of a string within another */
  1398. PHP_FUNCTION(mb_strpos)
  1399. {
  1400. int n, reverse = 0;
  1401. long offset;
  1402. mbfl_string haystack, needle;
  1403. char *enc_name = NULL;
  1404. int enc_name_len;
  1405. mbfl_string_init(&haystack);
  1406. mbfl_string_init(&needle);
  1407. haystack.no_language = MBSTRG(current_language);
  1408. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1409. needle.no_language = MBSTRG(current_language);
  1410. needle.no_encoding = MBSTRG(current_internal_encoding);
  1411. offset = 0;
  1412. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
  1413. RETURN_FALSE;
  1414. }
  1415. if (enc_name != NULL) {
  1416. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1417. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1418. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1419. RETURN_FALSE;
  1420. }
  1421. }
  1422. if (offset < 0 || (unsigned long)offset > haystack.len) {
  1423. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is out of range");
  1424. RETURN_FALSE;
  1425. }
  1426. if (needle.len == 0) {
  1427. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty needle");
  1428. RETURN_FALSE;
  1429. }
  1430. n = mbfl_strpos(&haystack, &needle, offset, reverse);
  1431. if (n >= 0) {
  1432. RETVAL_LONG(n);
  1433. } else {
  1434. switch (-n) {
  1435. case 1:
  1436. break;
  1437. case 2:
  1438. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length.");
  1439. break;
  1440. case 4:
  1441. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error.");
  1442. break;
  1443. case 8:
  1444. php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty.");
  1445. break;
  1446. default:
  1447. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos.");
  1448. break;
  1449. }
  1450. RETVAL_FALSE;
  1451. }
  1452. }
  1453. /* }}} */
  1454. /* {{{ proto int mb_strrpos(string haystack, string needle [, string encoding])
  1455. Find the last occurrence of a character in a string within another */
  1456. PHP_FUNCTION(mb_strrpos)
  1457. {
  1458. int n;
  1459. mbfl_string haystack, needle;
  1460. char *enc_name = NULL;
  1461. int enc_name_len;
  1462. mbfl_string_init(&haystack);
  1463. mbfl_string_init(&needle);
  1464. haystack.no_language = MBSTRG(current_language);
  1465. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1466. needle.no_language = MBSTRG(current_language);
  1467. needle.no_encoding = MBSTRG(current_internal_encoding);
  1468. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
  1469. RETURN_FALSE;
  1470. }
  1471. if (enc_name != NULL) {
  1472. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1473. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1474. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1475. RETURN_FALSE;
  1476. }
  1477. }
  1478. if (haystack.len <= 0) {
  1479. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty haystack");
  1480. RETURN_FALSE;
  1481. }
  1482. if (needle.len <= 0) {
  1483. php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty needle");
  1484. RETURN_FALSE;
  1485. }
  1486. n = mbfl_strpos(&haystack, &needle, 0, 1);
  1487. if (n >= 0) {
  1488. RETVAL_LONG(n);
  1489. } else {
  1490. RETVAL_FALSE;
  1491. }
  1492. }
  1493. /* }}} */
  1494. /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
  1495. Count the number of substring occurrences */
  1496. PHP_FUNCTION(mb_substr_count)
  1497. {
  1498. int n;
  1499. mbfl_string haystack, needle;
  1500. char *enc_name = NULL;
  1501. int enc_name_len;
  1502. mbfl_string_init(&haystack);
  1503. mbfl_string_init(&needle);
  1504. haystack.no_language = MBSTRG(current_language);
  1505. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1506. needle.no_language = MBSTRG(current_language);
  1507. needle.no_encoding = MBSTRG(current_internal_encoding);
  1508. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
  1509. return;
  1510. }
  1511. if (enc_name != NULL) {
  1512. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1513. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1514. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1515. RETURN_FALSE;
  1516. }
  1517. }
  1518. if (needle.len <= 0) {
  1519. php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty needle");
  1520. RETURN_FALSE;
  1521. }
  1522. n = mbfl_substr_count(&haystack, &needle);
  1523. if (n >= 0) {
  1524. RETVAL_LONG(n);
  1525. } else {
  1526. RETVAL_FALSE;
  1527. }
  1528. }
  1529. /* }}} */
  1530. /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
  1531. Returns part of a string */
  1532. PHP_FUNCTION(mb_substr)
  1533. {
  1534. zval **arg1, **arg2, **arg3, **arg4;
  1535. int argc, from, len, mblen;
  1536. mbfl_string string, result, *ret;
  1537. mbfl_string_init(&string);
  1538. string.no_language = MBSTRG(current_language);
  1539. string.no_encoding = MBSTRG(current_internal_encoding);
  1540. argc = ZEND_NUM_ARGS();
  1541. switch (argc) {
  1542. case 2:
  1543. if (zend_get_parameters_ex(2, &arg1, &arg2) == FAILURE) {
  1544. WRONG_PARAM_COUNT;
  1545. }
  1546. break;
  1547. case 3:
  1548. if (zend_get_parameters_ex(3, &arg1, &arg2, &arg3) == FAILURE) {
  1549. WRONG_PARAM_COUNT;
  1550. }
  1551. break;
  1552. case 4:
  1553. if (zend_get_parameters_ex(4, &arg1, &arg2, &arg3, &arg4) == FAILURE) {
  1554. WRONG_PARAM_COUNT;
  1555. }
  1556. convert_to_string_ex(arg4);
  1557. string.no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg4));
  1558. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1559. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg4));
  1560. RETURN_FALSE;
  1561. }
  1562. break;
  1563. default:
  1564. WRONG_PARAM_COUNT;
  1565. }
  1566. convert_to_string_ex(arg1);
  1567. string.val = (unsigned char *)Z_STRVAL_PP(arg1);
  1568. string.len = Z_STRLEN_PP(arg1);
  1569. convert_to_long_ex(arg2);
  1570. from = Z_LVAL_PP(arg2);
  1571. if (argc >= 3) {
  1572. convert_to_long_ex(arg3);
  1573. len = Z_LVAL_PP(arg3);
  1574. } else {
  1575. len = Z_STRLEN_PP(arg1);
  1576. }
  1577. /* measures length */
  1578. mblen = 0;
  1579. if (from < 0 || len < 0) {
  1580. mblen = mbfl_strlen(&string);
  1581. }
  1582. /* if "from" position is negative, count start position from the end
  1583. * of the string
  1584. */
  1585. if (from < 0) {
  1586. from = mblen + from;
  1587. if (from < 0) {
  1588. from = 0;
  1589. }
  1590. }
  1591. /* if "length" position is negative, set it to the length
  1592. * needed to stop that many chars from the end of the string
  1593. */
  1594. if (len < 0) {
  1595. len = (mblen - from) + len;
  1596. if (len < 0) {
  1597. len = 0;
  1598. }
  1599. }
  1600. if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
  1601. && (from >= mbfl_strlen(&string))) {
  1602. RETURN_FALSE;
  1603. }
  1604. ret = mbfl_substr(&string, &result, from, len);
  1605. if (ret != NULL) {
  1606. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  1607. } else {
  1608. RETVAL_FALSE;
  1609. }
  1610. }
  1611. /* }}} */
  1612. /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
  1613. Returns part of a string */
  1614. PHP_FUNCTION(mb_strcut)
  1615. {
  1616. zval **arg1, **arg2, **arg3, **arg4;
  1617. int argc, from, len;
  1618. mbfl_string string, result, *ret;
  1619. mbfl_string_init(&string);
  1620. string.no_language = MBSTRG(current_language);
  1621. string.no_encoding = MBSTRG(current_internal_encoding);
  1622. argc = ZEND_NUM_ARGS();
  1623. switch (argc) {
  1624. case 2:
  1625. if (zend_get_parameters_ex(2, &arg1, &arg2) == FAILURE) {
  1626. WRONG_PARAM_COUNT;
  1627. }
  1628. break;
  1629. case 3:
  1630. if (zend_get_parameters_ex(3, &arg1, &arg2, &arg3) == FAILURE) {
  1631. WRONG_PARAM_COUNT;
  1632. }
  1633. break;
  1634. case 4:
  1635. if (zend_get_parameters_ex(4, &arg1, &arg2, &arg3, &arg4) == FAILURE) {
  1636. WRONG_PARAM_COUNT;
  1637. }
  1638. convert_to_string_ex(arg4);
  1639. string.no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg4));
  1640. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1641. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg4));
  1642. RETURN_FALSE;
  1643. }
  1644. break;
  1645. default:
  1646. WRONG_PARAM_COUNT;
  1647. }
  1648. convert_to_string_ex(arg1);
  1649. string.val = Z_STRVAL_PP(arg1);
  1650. string.len = Z_STRLEN_PP(arg1);
  1651. convert_to_long_ex(arg2);
  1652. from = Z_LVAL_PP(arg2);
  1653. if (argc >= 3) {
  1654. convert_to_long_ex(arg3);
  1655. len = Z_LVAL_PP(arg3);
  1656. } else {
  1657. len = Z_STRLEN_PP(arg1);
  1658. }
  1659. /* if "from" position is negative, count start position from the end
  1660. * of the string
  1661. */
  1662. if (from < 0) {
  1663. from = Z_STRLEN_PP(arg1) + from;
  1664. if (from < 0) {
  1665. from = 0;
  1666. }
  1667. }
  1668. /* if "length" position is negative, set it to the length
  1669. * needed to stop that many chars from the end of the string
  1670. */
  1671. if (len < 0) {
  1672. len = (Z_STRLEN_PP(arg1) - from) + len;
  1673. if (len < 0) {
  1674. len = 0;
  1675. }
  1676. }
  1677. if (from > Z_STRLEN_PP(arg1)) {
  1678. RETURN_FALSE;
  1679. }
  1680. if (((unsigned) from + (unsigned) len) > Z_STRLEN_PP(arg1)) {
  1681. len = Z_STRLEN_PP(arg1) - from;
  1682. }
  1683. ret = mbfl_strcut(&string, &result, from, len);
  1684. if (ret != NULL) {
  1685. RETVAL_STRINGL(ret->val, ret->len, 0); /* the string is already strdup()'ed */
  1686. } else {
  1687. RETVAL_FALSE;
  1688. }
  1689. }
  1690. /* }}} */
  1691. /* {{{ proto int mb_strwidth(string str [, string encoding])
  1692. Gets terminal width of a string */
  1693. PHP_FUNCTION(mb_strwidth)
  1694. {
  1695. int n;
  1696. mbfl_string string;
  1697. char *enc_name = NULL;
  1698. int enc_name_len;
  1699. mbfl_string_init(&string);
  1700. string.no_language = MBSTRG(current_language);
  1701. string.no_encoding = MBSTRG(current_internal_encoding);
  1702. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
  1703. return;
  1704. }
  1705. if (enc_name != NULL) {
  1706. string.no_encoding = mbfl_name2no_encoding(enc_name);
  1707. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1708. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1709. RETURN_FALSE;
  1710. }
  1711. }
  1712. n = mbfl_strwidth(&string);
  1713. if (n >= 0) {
  1714. RETVAL_LONG(n);
  1715. } else {
  1716. RETVAL_FALSE;
  1717. }
  1718. }
  1719. /* }}} */
  1720. /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
  1721. Trim the string in terminal width */
  1722. PHP_FUNCTION(mb_strimwidth)
  1723. {
  1724. zval **arg1, **arg2, **arg3, **arg4, **arg5;
  1725. int from, width;
  1726. mbfl_string string, result, marker, *ret;
  1727. mbfl_string_init(&string);
  1728. mbfl_string_init(&marker);
  1729. string.no_language = MBSTRG(current_language);
  1730. string.no_encoding = MBSTRG(current_internal_encoding);
  1731. marker.no_language = MBSTRG(current_language);
  1732. marker.no_encoding = MBSTRG(current_internal_encoding);
  1733. marker.val = NULL;
  1734. marker.len = 0;
  1735. switch (ZEND_NUM_ARGS()) {
  1736. case 3:
  1737. if (zend_get_parameters_ex(3, &arg1, &arg2, &arg3) == FAILURE) {
  1738. WRONG_PARAM_COUNT;
  1739. }
  1740. break;
  1741. case 4:
  1742. if (zend_get_parameters_ex(4, &arg1, &arg2, &arg3, &arg4) == FAILURE) {
  1743. WRONG_PARAM_COUNT;
  1744. }
  1745. break;
  1746. case 5:
  1747. if (zend_get_parameters_ex(5, &arg1, &arg2, &arg3, &arg4, &arg5) == FAILURE) {
  1748. WRONG_PARAM_COUNT;
  1749. }
  1750. convert_to_string_ex(arg5);
  1751. string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg5));
  1752. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1753. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg5));
  1754. RETURN_FALSE;
  1755. }
  1756. break;
  1757. default:
  1758. WRONG_PARAM_COUNT;
  1759. }
  1760. convert_to_string_ex(arg1);
  1761. string.val = (unsigned char *)Z_STRVAL_PP(arg1);
  1762. string.len = Z_STRLEN_PP(arg1);
  1763. convert_to_long_ex(arg2);
  1764. from = Z_LVAL_PP(arg2);
  1765. if (from < 0 || from > Z_STRLEN_PP(arg1)) {
  1766. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of reange");
  1767. RETURN_FALSE;
  1768. }
  1769. convert_to_long_ex(arg3);
  1770. width = Z_LVAL_PP(arg3);
  1771. if (width < 0) {
  1772. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
  1773. RETURN_FALSE;
  1774. }
  1775. if (ZEND_NUM_ARGS() >= 4) {
  1776. convert_to_string_ex(arg4);
  1777. marker.val = (unsigned char *)Z_STRVAL_PP(arg4);
  1778. marker.len = Z_STRLEN_PP(arg4);
  1779. }
  1780. ret = mbfl_strimwidth(&string, &marker, &result, from, width);
  1781. if (ret != NULL) {
  1782. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  1783. } else {
  1784. RETVAL_FALSE;
  1785. }
  1786. }
  1787. /* }}} */
  1788. /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
  1789. MBSTRING_API char * php_mb_convert_encoding(char *input, size_t length, char *_to_encoding, char *_from_encodings, size_t *output_len TSRMLS_DC)
  1790. {
  1791. mbfl_string string, result, *ret;
  1792. enum mbfl_no_encoding from_encoding, to_encoding;
  1793. mbfl_buffer_converter *convd;
  1794. int size, *list;
  1795. char *output=NULL;
  1796. if (output_len) {
  1797. *output_len = 0;
  1798. }
  1799. if (!input) {
  1800. return NULL;
  1801. }
  1802. /* new encoding */
  1803. if (_to_encoding && strlen(_to_encoding)) {
  1804. to_encoding = mbfl_name2no_encoding(_to_encoding);
  1805. if (to_encoding == mbfl_no_encoding_invalid) {
  1806. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
  1807. return NULL;
  1808. }
  1809. } else {
  1810. to_encoding = MBSTRG(current_internal_encoding);
  1811. }
  1812. /* initialize string */
  1813. mbfl_string_init(&string);
  1814. mbfl_string_init(&result);
  1815. from_encoding = MBSTRG(current_internal_encoding);
  1816. string.no_encoding = from_encoding;
  1817. string.no_language = MBSTRG(current_language);
  1818. string.val = (unsigned char *)input;
  1819. string.len = length;
  1820. /* pre-conversion encoding */
  1821. if (_from_encodings) {
  1822. list = NULL;
  1823. size = 0;
  1824. php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
  1825. if (size == 1) {
  1826. from_encoding = *list;
  1827. string.no_encoding = from_encoding;
  1828. } else if (size > 1) {
  1829. /* auto detect */
  1830. from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection));
  1831. if (from_encoding != mbfl_no_encoding_invalid) {
  1832. string.no_encoding = from_encoding;
  1833. } else {
  1834. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
  1835. from_encoding = mbfl_no_encoding_pass;
  1836. to_encoding = from_encoding;
  1837. string.no_encoding = from_encoding;
  1838. }
  1839. } else {
  1840. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
  1841. }
  1842. if (list != NULL) {
  1843. efree((void *)list);
  1844. }
  1845. }
  1846. /* initialize converter */
  1847. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  1848. if (convd == NULL) {
  1849. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
  1850. return NULL;
  1851. }
  1852. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  1853. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  1854. /* do it */
  1855. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  1856. if (ret) {
  1857. if (output_len) {
  1858. *output_len = ret->len;
  1859. }
  1860. output = (char *)ret->val;
  1861. }
  1862. mbfl_buffer_converter_delete(convd);
  1863. return output;
  1864. }
  1865. /* }}} */
  1866. /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
  1867. Returns converted string in desired encoding */
  1868. PHP_FUNCTION(mb_convert_encoding)
  1869. {
  1870. zval **arg_str, **arg_new, **arg_old;
  1871. int i;
  1872. size_t size, l, n;
  1873. char *_from_encodings, *ret, *s_free = NULL;
  1874. zval **hash_entry;
  1875. HashTable *target_hash;
  1876. _from_encodings = NULL;
  1877. if (ZEND_NUM_ARGS() == 2) {
  1878. if (zend_get_parameters_ex(2, &arg_str, &arg_new) == FAILURE) {
  1879. WRONG_PARAM_COUNT;
  1880. }
  1881. } else if (ZEND_NUM_ARGS() == 3) {
  1882. if (zend_get_parameters_ex(3, &arg_str, &arg_new, &arg_old) == FAILURE) {
  1883. WRONG_PARAM_COUNT;
  1884. }
  1885. switch (Z_TYPE_PP(arg_old)) {
  1886. case IS_ARRAY:
  1887. target_hash = Z_ARRVAL_PP(arg_old);
  1888. zend_hash_internal_pointer_reset(target_hash);
  1889. i = zend_hash_num_elements(target_hash);
  1890. _from_encodings = NULL;
  1891. while (i > 0) {
  1892. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  1893. break;
  1894. }
  1895. convert_to_string_ex(hash_entry);
  1896. if ( _from_encodings) {
  1897. l = strlen(_from_encodings);
  1898. n = strlen(Z_STRVAL_PP(hash_entry));
  1899. _from_encodings = erealloc(_from_encodings, l+n+2);
  1900. strcpy(_from_encodings+l,",");
  1901. strcpy(_from_encodings+l+1,Z_STRVAL_PP(hash_entry));
  1902. } else {
  1903. _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
  1904. }
  1905. zend_hash_move_forward(target_hash);
  1906. i--;
  1907. }
  1908. if (_from_encodings != NULL && !strlen(_from_encodings)) {
  1909. efree(_from_encodings);
  1910. _from_encodings = NULL;
  1911. }
  1912. s_free = _from_encodings;
  1913. break;
  1914. default:
  1915. convert_to_string_ex(arg_old);
  1916. _from_encodings = Z_STRVAL_PP(arg_old);
  1917. break;
  1918. }
  1919. } else {
  1920. WRONG_PARAM_COUNT;
  1921. }
  1922. /* new encoding */
  1923. convert_to_string_ex(arg_str);
  1924. convert_to_string_ex(arg_new);
  1925. ret = php_mb_convert_encoding( Z_STRVAL_PP(arg_str), Z_STRLEN_PP(arg_str), Z_STRVAL_PP(arg_new), _from_encodings, &size TSRMLS_CC);
  1926. if (ret != NULL) {
  1927. RETVAL_STRINGL(ret, size, 0); /* the string is already strdup()'ed */
  1928. } else {
  1929. RETVAL_FALSE;
  1930. }
  1931. if ( s_free) {
  1932. efree(s_free);
  1933. }
  1934. }
  1935. /* }}} */
  1936. /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
  1937. Returns a case-folded version of sourcestring */
  1938. PHP_FUNCTION(mb_convert_case)
  1939. {
  1940. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1941. int str_len, from_encoding_len;
  1942. long case_mode = 0;
  1943. char *newstr;
  1944. size_t ret_len;
  1945. RETVAL_FALSE;
  1946. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
  1947. &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
  1948. RETURN_FALSE;
  1949. newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  1950. if (newstr) {
  1951. RETVAL_STRINGL(newstr, ret_len, 0);
  1952. }
  1953. }
  1954. /* }}} */
  1955. /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
  1956. * Returns a uppercased version of sourcestring
  1957. */
  1958. PHP_FUNCTION(mb_strtoupper)
  1959. {
  1960. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1961. int str_len, from_encoding_len;
  1962. char *newstr;
  1963. size_t ret_len;
  1964. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
  1965. &from_encoding, &from_encoding_len) == FAILURE) {
  1966. return;
  1967. }
  1968. newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  1969. if (newstr) {
  1970. RETURN_STRINGL(newstr, ret_len, 0);
  1971. }
  1972. RETURN_FALSE;
  1973. }
  1974. /* }}} */
  1975. /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
  1976. * Returns a lowercased version of sourcestring
  1977. */
  1978. PHP_FUNCTION(mb_strtolower)
  1979. {
  1980. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1981. int str_len, from_encoding_len;
  1982. char *newstr;
  1983. size_t ret_len;
  1984. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
  1985. &from_encoding, &from_encoding_len) == FAILURE) {
  1986. return;
  1987. }
  1988. newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  1989. if (newstr) {
  1990. RETURN_STRINGL(newstr, ret_len, 0);
  1991. }
  1992. RETURN_FALSE;
  1993. }
  1994. /* }}} */
  1995. /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
  1996. Encodings of the given string is returned (as a string) */
  1997. PHP_FUNCTION(mb_detect_encoding)
  1998. {
  1999. zval **arg_str, **arg_list, **arg_strict;
  2000. mbfl_string string;
  2001. const char *ret;
  2002. enum mbfl_no_encoding *elist;
  2003. int size, *list, strict = 0;
  2004. if (ZEND_NUM_ARGS() == 1) {
  2005. if (zend_get_parameters_ex(1, &arg_str) == FAILURE) {
  2006. WRONG_PARAM_COUNT;
  2007. }
  2008. } else if (ZEND_NUM_ARGS() == 2) {
  2009. if (zend_get_parameters_ex(2, &arg_str, &arg_list) == FAILURE) {
  2010. WRONG_PARAM_COUNT;
  2011. }
  2012. } else if (ZEND_NUM_ARGS() == 3) {
  2013. if (zend_get_parameters_ex(3, &arg_str, &arg_list, &arg_strict) == FAILURE) {
  2014. WRONG_PARAM_COUNT;
  2015. }
  2016. } else {
  2017. WRONG_PARAM_COUNT;
  2018. }
  2019. /* make encoding list */
  2020. list = NULL;
  2021. size = 0;
  2022. if (ZEND_NUM_ARGS() >= 2 && Z_STRVAL_PP(arg_list)) {
  2023. switch (Z_TYPE_PP(arg_list)) {
  2024. case IS_ARRAY:
  2025. if (!php_mb_parse_encoding_array(*arg_list, &list, &size, 0 TSRMLS_CC)) {
  2026. if (list) {
  2027. efree(list);
  2028. size = 0;
  2029. }
  2030. }
  2031. break;
  2032. default:
  2033. convert_to_string_ex(arg_list);
  2034. if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg_list), Z_STRLEN_PP(arg_list), &list, &size, 0 TSRMLS_CC)) {
  2035. if (list) {
  2036. efree(list);
  2037. size = 0;
  2038. }
  2039. }
  2040. break;
  2041. }
  2042. if (size <= 0) {
  2043. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
  2044. }
  2045. }
  2046. if (ZEND_NUM_ARGS() == 3) {
  2047. convert_to_long_ex(arg_strict);
  2048. strict = Z_LVAL_PP(arg_strict);
  2049. }
  2050. else {
  2051. strict = MBSTRG(strict_detection);
  2052. }
  2053. if (size > 0 && list != NULL) {
  2054. elist = list;
  2055. } else {
  2056. elist = MBSTRG(current_detect_order_list);
  2057. size = MBSTRG(current_detect_order_list_size);
  2058. }
  2059. convert_to_string_ex(arg_str);
  2060. mbfl_string_init(&string);
  2061. string.no_language = MBSTRG(current_language);
  2062. string.val = (unsigned char *)Z_STRVAL_PP(arg_str);
  2063. string.len = Z_STRLEN_PP(arg_str);
  2064. ret = mbfl_identify_encoding_name(&string, elist, size, strict);
  2065. if (list != NULL) {
  2066. efree((void *)list);
  2067. }
  2068. if (ret != NULL) {
  2069. RETVAL_STRING((char *)ret, 1);
  2070. } else {
  2071. RETVAL_FALSE;
  2072. }
  2073. }
  2074. /* }}} */
  2075. /* {{{ proto array mb_list_encodings()
  2076. Returns an array of all supported encodings */
  2077. PHP_FUNCTION(mb_list_encodings)
  2078. {
  2079. const mbfl_encoding **encodings;
  2080. const mbfl_encoding *encoding;
  2081. int i;
  2082. array_init(return_value);
  2083. i = 0;
  2084. encodings = mbfl_get_supported_encodings();
  2085. while ((encoding = encodings[i++]) != NULL) {
  2086. add_next_index_string(return_value, (char *) encoding->name, 1);
  2087. }
  2088. }
  2089. /* }}} */
  2090. /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
  2091. Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
  2092. PHP_FUNCTION(mb_encode_mimeheader)
  2093. {
  2094. enum mbfl_no_encoding charset, transenc;
  2095. mbfl_string string, result, *ret;
  2096. char *charset_name = NULL;
  2097. int charset_name_len;
  2098. char *trans_enc_name = NULL;
  2099. int trans_enc_name_len;
  2100. char *linefeed = "\r\n";
  2101. int linefeed_len;
  2102. int indent = 0;
  2103. mbfl_string_init(&string);
  2104. string.no_language = MBSTRG(current_language);
  2105. string.no_encoding = MBSTRG(current_internal_encoding);
  2106. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
  2107. return;
  2108. }
  2109. charset = mbfl_no_encoding_pass;
  2110. transenc = mbfl_no_encoding_base64;
  2111. if (charset_name != NULL) {
  2112. charset = mbfl_name2no_encoding(charset_name);
  2113. if (charset == mbfl_no_encoding_invalid) {
  2114. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
  2115. RETURN_FALSE;
  2116. }
  2117. } else {
  2118. const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language));
  2119. if (lang != NULL) {
  2120. charset = lang->mail_charset;
  2121. transenc = lang->mail_header_encoding;
  2122. }
  2123. }
  2124. if (trans_enc_name != NULL) {
  2125. if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
  2126. transenc = mbfl_no_encoding_base64;
  2127. } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
  2128. transenc = mbfl_no_encoding_qprint;
  2129. }
  2130. }
  2131. mbfl_string_init(&result);
  2132. ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
  2133. if (ret != NULL) {
  2134. RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
  2135. } else {
  2136. RETVAL_FALSE;
  2137. }
  2138. }
  2139. /* }}} */
  2140. /* {{{ proto string mb_decode_mimeheader(string string)
  2141. Decodes the MIME "encoded-word" in the string */
  2142. PHP_FUNCTION(mb_decode_mimeheader)
  2143. {
  2144. mbfl_string string, result, *ret;
  2145. mbfl_string_init(&string);
  2146. string.no_language = MBSTRG(current_language);
  2147. string.no_encoding = MBSTRG(current_internal_encoding);
  2148. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
  2149. return;
  2150. }
  2151. mbfl_string_init(&result);
  2152. ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
  2153. if (ret != NULL) {
  2154. RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
  2155. } else {
  2156. RETVAL_FALSE;
  2157. }
  2158. }
  2159. /* }}} */
  2160. /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
  2161. Conversion between full-width character and half-width character (Japanese) */
  2162. PHP_FUNCTION(mb_convert_kana)
  2163. {
  2164. int opt, i;
  2165. mbfl_string string, result, *ret;
  2166. char *optstr = NULL;
  2167. int optstr_len;
  2168. char *encname = NULL;
  2169. int encname_len;
  2170. mbfl_string_init(&string);
  2171. string.no_language = MBSTRG(current_language);
  2172. string.no_encoding = MBSTRG(current_internal_encoding);
  2173. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
  2174. return;
  2175. }
  2176. /* option */
  2177. if (optstr != NULL) {
  2178. char *p = optstr;
  2179. int n = optstr_len;
  2180. i = 0;
  2181. opt = 0;
  2182. while (i < n) {
  2183. i++;
  2184. switch (*p++) {
  2185. case 'A':
  2186. opt |= 0x1;
  2187. break;
  2188. case 'a':
  2189. opt |= 0x10;
  2190. break;
  2191. case 'R':
  2192. opt |= 0x2;
  2193. break;
  2194. case 'r':
  2195. opt |= 0x20;
  2196. break;
  2197. case 'N':
  2198. opt |= 0x4;
  2199. break;
  2200. case 'n':
  2201. opt |= 0x40;
  2202. break;
  2203. case 'S':
  2204. opt |= 0x8;
  2205. break;
  2206. case 's':
  2207. opt |= 0x80;
  2208. break;
  2209. case 'K':
  2210. opt |= 0x100;
  2211. break;
  2212. case 'k':
  2213. opt |= 0x1000;
  2214. break;
  2215. case 'H':
  2216. opt |= 0x200;
  2217. break;
  2218. case 'h':
  2219. opt |= 0x2000;
  2220. break;
  2221. case 'V':
  2222. opt |= 0x800;
  2223. break;
  2224. case 'C':
  2225. opt |= 0x10000;
  2226. break;
  2227. case 'c':
  2228. opt |= 0x20000;
  2229. break;
  2230. case 'M':
  2231. opt |= 0x100000;
  2232. break;
  2233. case 'm':
  2234. opt |= 0x200000;
  2235. break;
  2236. }
  2237. }
  2238. } else {
  2239. opt = 0x900;
  2240. }
  2241. /* encoding */
  2242. if (encname != NULL) {
  2243. string.no_encoding = mbfl_name2no_encoding(encname);
  2244. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2245. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
  2246. RETURN_FALSE;
  2247. }
  2248. }
  2249. ret = mbfl_ja_jp_hantozen(&string, &result, opt);
  2250. if (ret != NULL) {
  2251. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2252. } else {
  2253. RETVAL_FALSE;
  2254. }
  2255. }
  2256. /* }}} */
  2257. #define PHP_MBSTR_STACK_BLOCK_SIZE 32
  2258. /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding [, mixed ...])
  2259. Converts the string resource in variables to desired encoding */
  2260. PHP_FUNCTION(mb_convert_variables)
  2261. {
  2262. zval ***args, ***stack, **var, **hash_entry;
  2263. HashTable *target_hash;
  2264. mbfl_string string, result, *ret;
  2265. enum mbfl_no_encoding from_encoding, to_encoding;
  2266. mbfl_encoding_detector *identd;
  2267. mbfl_buffer_converter *convd;
  2268. int n, argc, stack_level, stack_max, elistsz;
  2269. enum mbfl_no_encoding *elist;
  2270. char *name;
  2271. void *ptmp;
  2272. argc = ZEND_NUM_ARGS();
  2273. if (argc < 3) {
  2274. WRONG_PARAM_COUNT;
  2275. }
  2276. args = (zval ***)ecalloc(argc, sizeof(zval **));
  2277. if (zend_get_parameters_array_ex(argc, args) == FAILURE) {
  2278. efree((void *)args);
  2279. WRONG_PARAM_COUNT;
  2280. }
  2281. /* new encoding */
  2282. convert_to_string_ex(args[0]);
  2283. to_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(args[0]));
  2284. if (to_encoding == mbfl_no_encoding_invalid) {
  2285. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(args[0]));
  2286. efree((void *)args);
  2287. RETURN_FALSE;
  2288. }
  2289. /* initialize string */
  2290. mbfl_string_init(&string);
  2291. mbfl_string_init(&result);
  2292. from_encoding = MBSTRG(current_internal_encoding);
  2293. string.no_encoding = from_encoding;
  2294. string.no_language = MBSTRG(current_language);
  2295. /* pre-conversion encoding */
  2296. elist = NULL;
  2297. elistsz = 0;
  2298. switch (Z_TYPE_PP(args[1])) {
  2299. case IS_ARRAY:
  2300. php_mb_parse_encoding_array(*args[1], &elist, &elistsz, 0 TSRMLS_CC);
  2301. break;
  2302. default:
  2303. convert_to_string_ex(args[1]);
  2304. php_mb_parse_encoding_list(Z_STRVAL_PP(args[1]), Z_STRLEN_PP(args[1]), &elist, &elistsz, 0 TSRMLS_CC);
  2305. break;
  2306. }
  2307. if (elistsz <= 0) {
  2308. from_encoding = mbfl_no_encoding_pass;
  2309. } else if (elistsz == 1) {
  2310. from_encoding = *elist;
  2311. } else {
  2312. /* auto detect */
  2313. from_encoding = mbfl_no_encoding_invalid;
  2314. stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
  2315. stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
  2316. stack_level = 0;
  2317. identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
  2318. if (identd != NULL) {
  2319. n = 2;
  2320. while (n < argc || stack_level > 0) {
  2321. if (stack_level <= 0) {
  2322. var = args[n++];
  2323. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2324. target_hash = HASH_OF(*var);
  2325. if (target_hash != NULL) {
  2326. zend_hash_internal_pointer_reset(target_hash);
  2327. }
  2328. }
  2329. } else {
  2330. stack_level--;
  2331. var = stack[stack_level];
  2332. }
  2333. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2334. target_hash = HASH_OF(*var);
  2335. if (target_hash != NULL) {
  2336. while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
  2337. zend_hash_move_forward(target_hash);
  2338. if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
  2339. if (stack_level >= stack_max) {
  2340. stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
  2341. ptmp = erealloc(stack, sizeof(zval **)*stack_max);
  2342. stack = (zval ***)ptmp;
  2343. }
  2344. stack[stack_level] = var;
  2345. stack_level++;
  2346. var = hash_entry;
  2347. target_hash = HASH_OF(*var);
  2348. if (target_hash != NULL) {
  2349. zend_hash_internal_pointer_reset(target_hash);
  2350. continue;
  2351. }
  2352. } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
  2353. string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
  2354. string.len = Z_STRLEN_PP(hash_entry);
  2355. if (mbfl_encoding_detector_feed(identd, &string)) {
  2356. goto detect_end; /* complete detecting */
  2357. }
  2358. }
  2359. }
  2360. }
  2361. } else if (Z_TYPE_PP(var) == IS_STRING) {
  2362. string.val = (unsigned char *)Z_STRVAL_PP(var);
  2363. string.len = Z_STRLEN_PP(var);
  2364. if (mbfl_encoding_detector_feed(identd, &string)) {
  2365. goto detect_end; /* complete detecting */
  2366. }
  2367. }
  2368. }
  2369. detect_end:
  2370. from_encoding = mbfl_encoding_detector_judge(identd);
  2371. mbfl_encoding_detector_delete(identd);
  2372. }
  2373. efree(stack);
  2374. if (from_encoding == mbfl_no_encoding_invalid) {
  2375. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
  2376. from_encoding = mbfl_no_encoding_pass;
  2377. }
  2378. }
  2379. if (elist != NULL) {
  2380. efree((void *)elist);
  2381. }
  2382. /* create converter */
  2383. convd = NULL;
  2384. if (from_encoding != mbfl_no_encoding_pass) {
  2385. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
  2386. if (convd == NULL) {
  2387. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
  2388. RETURN_FALSE;
  2389. }
  2390. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  2391. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  2392. }
  2393. /* convert */
  2394. if (convd != NULL) {
  2395. stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
  2396. stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
  2397. stack_level = 0;
  2398. n = 2;
  2399. while (n < argc || stack_level > 0) {
  2400. if (stack_level <= 0) {
  2401. var = args[n++];
  2402. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2403. target_hash = HASH_OF(*var);
  2404. if (target_hash != NULL) {
  2405. zend_hash_internal_pointer_reset(target_hash);
  2406. }
  2407. }
  2408. } else {
  2409. stack_level--;
  2410. var = stack[stack_level];
  2411. }
  2412. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2413. target_hash = HASH_OF(*var);
  2414. if (target_hash != NULL) {
  2415. while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
  2416. zend_hash_move_forward(target_hash);
  2417. if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
  2418. if (stack_level >= stack_max) {
  2419. stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
  2420. ptmp = erealloc(stack, sizeof(zval **)*stack_max);
  2421. stack = (zval ***)ptmp;
  2422. }
  2423. stack[stack_level] = var;
  2424. stack_level++;
  2425. var = hash_entry;
  2426. SEPARATE_ZVAL(hash_entry);
  2427. target_hash = HASH_OF(*var);
  2428. if (target_hash != NULL) {
  2429. zend_hash_internal_pointer_reset(target_hash);
  2430. continue;
  2431. }
  2432. } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
  2433. string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
  2434. string.len = Z_STRLEN_PP(hash_entry);
  2435. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  2436. if (ret != NULL) {
  2437. if ((*hash_entry)->refcount > 1) {
  2438. ZVAL_DELREF(*hash_entry);
  2439. MAKE_STD_ZVAL(*hash_entry);
  2440. } else {
  2441. zval_dtor(*hash_entry);
  2442. }
  2443. ZVAL_STRINGL(*hash_entry, ret->val, ret->len, 0);
  2444. }
  2445. }
  2446. }
  2447. }
  2448. } else if (Z_TYPE_PP(var) == IS_STRING) {
  2449. string.val = (unsigned char *)Z_STRVAL_PP(var);
  2450. string.len = Z_STRLEN_PP(var);
  2451. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  2452. if (ret != NULL) {
  2453. zval_dtor(*var);
  2454. ZVAL_STRINGL(*var, ret->val, ret->len, 0);
  2455. }
  2456. }
  2457. }
  2458. efree(stack);
  2459. mbfl_buffer_converter_delete(convd);
  2460. }
  2461. efree((void *)args);
  2462. name = (char *)mbfl_no_encoding2name(from_encoding);
  2463. if (name != NULL) {
  2464. RETURN_STRING(name, 1);
  2465. } else {
  2466. RETURN_FALSE;
  2467. }
  2468. }
  2469. /* }}} */
  2470. /* {{{ HTML numeric entity */
  2471. /* {{{ static void php_mb_numericentity_exec() */
  2472. static void
  2473. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
  2474. {
  2475. zval **arg1, **arg2, **arg3, **hash_entry;
  2476. HashTable *target_hash;
  2477. int argc, i, *convmap, *mapelm, mapsize=0;
  2478. mbfl_string string, result, *ret;
  2479. enum mbfl_no_encoding no_encoding;
  2480. argc = ZEND_NUM_ARGS();
  2481. if ((argc == 2 && zend_get_parameters_ex(2, &arg1, &arg2) == FAILURE) ||
  2482. (argc == 3 && zend_get_parameters_ex(3, &arg1, &arg2, &arg3) == FAILURE) ||
  2483. argc < 2 || argc > 3) {
  2484. WRONG_PARAM_COUNT;
  2485. }
  2486. convert_to_string_ex(arg1);
  2487. mbfl_string_init(&string);
  2488. string.no_language = MBSTRG(current_language);
  2489. string.no_encoding = MBSTRG(current_internal_encoding);
  2490. string.val = (unsigned char *)Z_STRVAL_PP(arg1);
  2491. string.len = Z_STRLEN_PP(arg1);
  2492. /* encoding */
  2493. if (argc == 3) {
  2494. convert_to_string_ex(arg3);
  2495. no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg3));
  2496. if (no_encoding == mbfl_no_encoding_invalid) {
  2497. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg3));
  2498. RETURN_FALSE;
  2499. } else {
  2500. string.no_encoding = no_encoding;
  2501. }
  2502. }
  2503. /* conversion map */
  2504. convmap = NULL;
  2505. if (Z_TYPE_PP(arg2) == IS_ARRAY){
  2506. target_hash = Z_ARRVAL_PP(arg2);
  2507. zend_hash_internal_pointer_reset(target_hash);
  2508. i = zend_hash_num_elements(target_hash);
  2509. if (i > 0) {
  2510. convmap = (int *)safe_emalloc(i, sizeof(int), 0);
  2511. mapelm = convmap;
  2512. mapsize = 0;
  2513. while (i > 0) {
  2514. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  2515. break;
  2516. }
  2517. convert_to_long_ex(hash_entry);
  2518. *mapelm++ = Z_LVAL_PP(hash_entry);
  2519. mapsize++;
  2520. i--;
  2521. zend_hash_move_forward(target_hash);
  2522. }
  2523. }
  2524. }
  2525. if (convmap == NULL) {
  2526. RETURN_FALSE;
  2527. }
  2528. mapsize /= 4;
  2529. ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
  2530. if (ret != NULL) {
  2531. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2532. } else {
  2533. RETVAL_FALSE;
  2534. }
  2535. efree((void *)convmap);
  2536. }
  2537. /* }}} */
  2538. /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
  2539. Converts specified characters to HTML numeric entities */
  2540. PHP_FUNCTION(mb_encode_numericentity)
  2541. {
  2542. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  2543. }
  2544. /* }}} */
  2545. /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
  2546. Converts HTML numeric entities to character code */
  2547. PHP_FUNCTION(mb_decode_numericentity)
  2548. {
  2549. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  2550. }
  2551. /* }}} */
  2552. /* }}} */
  2553. /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
  2554. * Sends an email message with MIME scheme
  2555. */
  2556. #if HAVE_SENDMAIL
  2557. #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
  2558. if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
  2559. pos += 3; \
  2560. while (str[pos] == ' ' || str[pos] == '\t') { \
  2561. pos++; \
  2562. } \
  2563. continue; \
  2564. }
  2565. #define APPEND_ONE_CHAR(ch) do { \
  2566. if (token.a > 0) { \
  2567. smart_str_appendc(&token, ch); \
  2568. } else {\
  2569. token.len++; \
  2570. } \
  2571. } while (0)
  2572. #define SEPARATE_SMART_STR(str) do {\
  2573. if ((str)->a == 0) { \
  2574. char *tmp_ptr; \
  2575. (str)->a = 1; \
  2576. while ((str)->a < (str)->len) { \
  2577. (str)->a <<= 1; \
  2578. } \
  2579. tmp_ptr = emalloc((str)->a + 1); \
  2580. memcpy(tmp_ptr, (str)->c, (str)->len); \
  2581. (str)->c = tmp_ptr; \
  2582. } \
  2583. } while (0)
  2584. static void my_smart_str_dtor(smart_str *s)
  2585. {
  2586. if (s->a > 0) {
  2587. smart_str_free(s);
  2588. }
  2589. }
  2590. static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
  2591. {
  2592. const char *ps;
  2593. size_t icnt;
  2594. int state = 0;
  2595. int crlf_state = -1;
  2596. smart_str token = { 0, 0, 0 };
  2597. smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
  2598. ps = str;
  2599. icnt = str_len;
  2600. /*
  2601. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  2602. * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
  2603. * state 0 1 2 3
  2604. *
  2605. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  2606. * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
  2607. * crlf_state -1 0 1 -1
  2608. *
  2609. */
  2610. while (icnt > 0) {
  2611. switch (*ps) {
  2612. case ':':
  2613. if (crlf_state == 1) {
  2614. APPEND_ONE_CHAR('\r');
  2615. }
  2616. if (state == 0 || state == 1) {
  2617. fld_name = token;
  2618. state = 2;
  2619. } else {
  2620. APPEND_ONE_CHAR(*ps);
  2621. }
  2622. crlf_state = 0;
  2623. break;
  2624. case '\n':
  2625. if (crlf_state == -1) {
  2626. goto out;
  2627. }
  2628. crlf_state = -1;
  2629. break;
  2630. case '\r':
  2631. if (crlf_state == 1) {
  2632. APPEND_ONE_CHAR('\r');
  2633. } else {
  2634. crlf_state = 1;
  2635. }
  2636. break;
  2637. case ' ': case '\t':
  2638. if (crlf_state == -1) {
  2639. if (state == 3) {
  2640. /* continuing from the previous line */
  2641. SEPARATE_SMART_STR(&token);
  2642. state = 4;
  2643. } else {
  2644. /* simply skipping this new line */
  2645. state = 5;
  2646. }
  2647. } else {
  2648. if (crlf_state == 1) {
  2649. APPEND_ONE_CHAR('\r');
  2650. }
  2651. if (state == 1 || state == 3) {
  2652. APPEND_ONE_CHAR(*ps);
  2653. }
  2654. }
  2655. crlf_state = 0;
  2656. break;
  2657. default:
  2658. switch (state) {
  2659. case 0:
  2660. token.c = (char *)ps;
  2661. token.len = 0;
  2662. token.a = 0;
  2663. state = 1;
  2664. break;
  2665. case 2:
  2666. if (crlf_state != -1) {
  2667. token.c = (char *)ps;
  2668. token.len = 0;
  2669. token.a = 0;
  2670. state = 3;
  2671. break;
  2672. }
  2673. /* break is missing intentionally */
  2674. case 3:
  2675. if (crlf_state == -1) {
  2676. fld_val = token;
  2677. if (fld_name.c != NULL && fld_val.c != NULL) {
  2678. char *dummy;
  2679. /* FIXME: some locale free implementation is
  2680. * really required here,,, */
  2681. SEPARATE_SMART_STR(&fld_name);
  2682. php_strtoupper(fld_name.c, fld_name.len);
  2683. zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
  2684. my_smart_str_dtor(&fld_name);
  2685. }
  2686. memset(&fld_name, 0, sizeof(smart_str));
  2687. memset(&fld_val, 0, sizeof(smart_str));
  2688. token.c = (char *)ps;
  2689. token.len = 0;
  2690. token.a = 0;
  2691. state = 1;
  2692. }
  2693. break;
  2694. case 4:
  2695. APPEND_ONE_CHAR(' ');
  2696. state = 3;
  2697. break;
  2698. }
  2699. if (crlf_state == 1) {
  2700. APPEND_ONE_CHAR('\r');
  2701. }
  2702. APPEND_ONE_CHAR(*ps);
  2703. crlf_state = 0;
  2704. break;
  2705. }
  2706. ps++, icnt--;
  2707. }
  2708. out:
  2709. if (state == 2) {
  2710. token.c = "";
  2711. token.len = 0;
  2712. token.a = 0;
  2713. state = 3;
  2714. }
  2715. if (state == 3) {
  2716. fld_val = token;
  2717. if (fld_name.c != NULL && fld_val.c != NULL) {
  2718. void *dummy;
  2719. /* FIXME: some locale free implementation is
  2720. * really required here,,, */
  2721. SEPARATE_SMART_STR(&fld_name);
  2722. php_strtoupper(fld_name.c, fld_name.len);
  2723. zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
  2724. my_smart_str_dtor(&fld_name);
  2725. }
  2726. }
  2727. return state;
  2728. }
  2729. PHP_FUNCTION(mb_send_mail)
  2730. {
  2731. int n;
  2732. char *to=NULL;
  2733. int to_len;
  2734. char *message=NULL;
  2735. int message_len;
  2736. char *headers=NULL;
  2737. int headers_len;
  2738. char *subject=NULL;
  2739. int subject_len;
  2740. char *extra_cmd=NULL;
  2741. int extra_cmd_len;
  2742. int i;
  2743. char *to_r = NULL;
  2744. char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
  2745. struct {
  2746. int cnt_type:1;
  2747. int cnt_trans_enc:1;
  2748. } suppressed_hdrs = { 0, 0 };
  2749. char *message_buf=NULL, *subject_buf=NULL, *p;
  2750. mbfl_string orig_str, conv_str;
  2751. mbfl_string *pstr; /* pointer to mbfl string for return value */
  2752. enum mbfl_no_encoding
  2753. tran_cs, /* transfar text charset */
  2754. head_enc, /* header transfar encoding */
  2755. body_enc; /* body transfar encoding */
  2756. mbfl_memory_device device; /* automatic allocateable buffer for additional header */
  2757. const mbfl_language *lang;
  2758. int err = 0;
  2759. HashTable ht_headers;
  2760. smart_str *s;
  2761. extern void mbfl_memory_device_unput(mbfl_memory_device *device);
  2762. if (PG(safe_mode) && (ZEND_NUM_ARGS() == 5)) {
  2763. php_error_docref(NULL TSRMLS_CC, E_WARNING, "SAFE MODE Restriction in effect. The fifth parameter is disabled in SAFE MODE.");
  2764. RETURN_FALSE;
  2765. }
  2766. /* initialize */
  2767. mbfl_memory_device_init(&device, 0, 0);
  2768. mbfl_string_init(&orig_str);
  2769. mbfl_string_init(&conv_str);
  2770. /* character-set, transfer-encoding */
  2771. tran_cs = mbfl_no_encoding_utf8;
  2772. head_enc = mbfl_no_encoding_base64;
  2773. body_enc = mbfl_no_encoding_base64;
  2774. lang = mbfl_no2language(MBSTRG(current_language));
  2775. if (lang != NULL) {
  2776. tran_cs = lang->mail_charset;
  2777. head_enc = lang->mail_header_encoding;
  2778. body_enc = lang->mail_body_encoding;
  2779. }
  2780. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
  2781. return;
  2782. }
  2783. zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
  2784. if (headers != NULL) {
  2785. _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
  2786. }
  2787. if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
  2788. char *tmp;
  2789. char *param_name;
  2790. char *charset = NULL;
  2791. SEPARATE_SMART_STR(s);
  2792. smart_str_0(s);
  2793. p = strchr(s->c, ';');
  2794. if (p != NULL) {
  2795. /* skipping the padded spaces */
  2796. do {
  2797. ++p;
  2798. } while (*p == ' ' || *p == '\t');
  2799. if (*p != '\0') {
  2800. if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
  2801. if (strcasecmp(param_name, "charset") == 0) {
  2802. enum mbfl_no_encoding _tran_cs = tran_cs;
  2803. charset = php_strtok_r(NULL, "= ", &tmp);
  2804. if (charset != NULL) {
  2805. _tran_cs = mbfl_name2no_encoding(charset);
  2806. }
  2807. if (_tran_cs == mbfl_no_encoding_invalid) {
  2808. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
  2809. _tran_cs = mbfl_no_encoding_ascii;
  2810. }
  2811. tran_cs = _tran_cs;
  2812. }
  2813. }
  2814. }
  2815. }
  2816. suppressed_hdrs.cnt_type = 1;
  2817. }
  2818. if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
  2819. enum mbfl_no_encoding _body_enc;
  2820. SEPARATE_SMART_STR(s);
  2821. smart_str_0(s);
  2822. _body_enc = mbfl_name2no_encoding(s->c);
  2823. switch (_body_enc) {
  2824. case mbfl_no_encoding_base64:
  2825. case mbfl_no_encoding_7bit:
  2826. case mbfl_no_encoding_8bit:
  2827. body_enc = _body_enc;
  2828. break;
  2829. default:
  2830. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
  2831. body_enc = mbfl_no_encoding_8bit;
  2832. break;
  2833. }
  2834. suppressed_hdrs.cnt_trans_enc = 1;
  2835. }
  2836. /* To: */
  2837. if (to != NULL) {
  2838. if (to_len > 0) {
  2839. to_r = estrndup(to, to_len);
  2840. for (; to_len; to_len--) {
  2841. if (!isspace((unsigned char) to_r[to_len - 1])) {
  2842. break;
  2843. }
  2844. to_r[to_len - 1] = '\0';
  2845. }
  2846. for (i = 0; to_r[i]; i++) {
  2847. if (iscntrl((unsigned char) to_r[i])) {
  2848. /* According to RFC 822, section 3.1.1 long headers may be separated into
  2849. * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
  2850. * To prevent these separators from being replaced with a space, we use the
  2851. * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
  2852. */
  2853. SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
  2854. to_r[i] = ' ';
  2855. }
  2856. }
  2857. } else {
  2858. to_r = to;
  2859. }
  2860. } else {
  2861. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
  2862. err = 1;
  2863. }
  2864. /* Subject: */
  2865. if (subject != NULL && subject_len >= 0) {
  2866. orig_str.no_language = MBSTRG(current_language);
  2867. orig_str.val = (unsigned char *)subject;
  2868. orig_str.len = subject_len;
  2869. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  2870. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  2871. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  2872. orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  2873. }
  2874. pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
  2875. if (pstr != NULL) {
  2876. subject_buf = subject = (char *)pstr->val;
  2877. }
  2878. } else {
  2879. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
  2880. err = 1;
  2881. }
  2882. /* message body */
  2883. if (message != NULL) {
  2884. orig_str.no_language = MBSTRG(current_language);
  2885. orig_str.val = message;
  2886. orig_str.len = message_len;
  2887. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  2888. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  2889. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  2890. orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
  2891. }
  2892. pstr = NULL;
  2893. {
  2894. mbfl_string tmpstr;
  2895. if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
  2896. tmpstr.no_encoding=mbfl_no_encoding_8bit;
  2897. pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
  2898. efree(tmpstr.val);
  2899. }
  2900. }
  2901. if (pstr != NULL) {
  2902. message_buf = message = (char *)pstr->val;
  2903. }
  2904. } else {
  2905. /* this is not really an error, so it is allowed. */
  2906. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
  2907. message = NULL;
  2908. }
  2909. /* other headers */
  2910. #define PHP_MBSTR_MAIL_MIME_HEADER1 "Mime-Version: 1.0"
  2911. #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
  2912. #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
  2913. #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
  2914. if (headers != NULL) {
  2915. p = headers;
  2916. n = headers_len;
  2917. mbfl_memory_device_strncat(&device, p, n);
  2918. if (n > 0 && p[n - 1] != '\n') {
  2919. mbfl_memory_device_strncat(&device, "\n", 1);
  2920. }
  2921. }
  2922. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
  2923. mbfl_memory_device_strncat(&device, "\n", 1);
  2924. if (!suppressed_hdrs.cnt_type) {
  2925. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
  2926. p = (char *)mbfl_no2preferred_mime_name(tran_cs);
  2927. if (p != NULL) {
  2928. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
  2929. mbfl_memory_device_strcat(&device, p);
  2930. }
  2931. mbfl_memory_device_strncat(&device, "\n", 1);
  2932. }
  2933. if (!suppressed_hdrs.cnt_trans_enc) {
  2934. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
  2935. p = (char *)mbfl_no2preferred_mime_name(body_enc);
  2936. if (p == NULL) {
  2937. p = "7bit";
  2938. }
  2939. mbfl_memory_device_strcat(&device, p);
  2940. mbfl_memory_device_strncat(&device, "\n", 1);
  2941. }
  2942. mbfl_memory_device_unput(&device);
  2943. mbfl_memory_device_output('\0', &device);
  2944. headers = (char *)device.buffer;
  2945. if (force_extra_parameters) {
  2946. extra_cmd = estrdup(force_extra_parameters);
  2947. } else if (extra_cmd) {
  2948. extra_cmd = php_escape_shell_cmd(extra_cmd);
  2949. }
  2950. if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
  2951. RETVAL_TRUE;
  2952. } else {
  2953. RETVAL_FALSE;
  2954. }
  2955. if (extra_cmd) {
  2956. efree(extra_cmd);
  2957. }
  2958. if (to_r != to) {
  2959. efree(to_r);
  2960. }
  2961. if (subject_buf) {
  2962. efree((void *)subject_buf);
  2963. }
  2964. if (message_buf) {
  2965. efree((void *)message_buf);
  2966. }
  2967. mbfl_memory_device_clear(&device);
  2968. zend_hash_destroy(&ht_headers);
  2969. }
  2970. #undef SKIP_LONG_HEADER_SEP_MBSTRING
  2971. #undef APPEND_ONE_CHAR
  2972. #undef SEPARATE_SMART_STR
  2973. #undef PHP_MBSTR_MAIL_MIME_HEADER1
  2974. #undef PHP_MBSTR_MAIL_MIME_HEADER2
  2975. #undef PHP_MBSTR_MAIL_MIME_HEADER3
  2976. #undef PHP_MBSTR_MAIL_MIME_HEADER4
  2977. #else /* HAVE_SENDMAIL */
  2978. PHP_FUNCTION(mb_send_mail)
  2979. {
  2980. RETURN_FALSE;
  2981. }
  2982. #endif /* HAVE_SENDMAIL */
  2983. /* }}} */
  2984. /* {{{ proto string mb_get_info([string type])
  2985. Returns the current settings of mbstring */
  2986. PHP_FUNCTION(mb_get_info)
  2987. {
  2988. char *typ = NULL;
  2989. int typ_len;
  2990. char *name;
  2991. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
  2992. RETURN_FALSE;
  2993. }
  2994. if (!typ || !strcasecmp("all", typ)) {
  2995. const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language));
  2996. array_init(return_value);
  2997. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
  2998. add_assoc_string(return_value, "internal_encoding", name, 1);
  2999. }
  3000. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
  3001. add_assoc_string(return_value, "http_input", name, 1);
  3002. }
  3003. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
  3004. add_assoc_string(return_value, "http_output", name, 1);
  3005. }
  3006. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(func_overload))) != NULL) {
  3007. add_assoc_string(return_value, "func_overload", name, 1);
  3008. }
  3009. if (lang != NULL) {
  3010. add_assoc_string(return_value, "mail_charset",
  3011. mbfl_no_encoding2name(lang->mail_charset), 1);
  3012. add_assoc_string(return_value, "mail_header_encoding",
  3013. mbfl_no_encoding2name(lang->mail_header_encoding), 1);
  3014. add_assoc_string(return_value, "mail_body_encoding",
  3015. mbfl_no_encoding2name(lang->mail_body_encoding), 1);
  3016. }
  3017. } else if (!strcasecmp("internal_encoding", typ)) {
  3018. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
  3019. RETVAL_STRING(name, 1);
  3020. }
  3021. } else if (!strcasecmp("http_input", typ)) {
  3022. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
  3023. RETVAL_STRING(name, 1);
  3024. }
  3025. } else if (!strcasecmp("http_output", typ)) {
  3026. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
  3027. RETVAL_STRING(name, 1);
  3028. }
  3029. } else if (!strcasecmp("func_overload", typ)) {
  3030. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(func_overload))) != NULL) {
  3031. RETVAL_STRING(name, 1);
  3032. }
  3033. } else {
  3034. RETURN_FALSE;
  3035. }
  3036. }
  3037. /* }}} */
  3038. /* {{{ MBSTRING_API int php_mb_encoding_translation() */
  3039. MBSTRING_API int php_mb_encoding_translation(TSRMLS_D)
  3040. {
  3041. return MBSTRG(encoding_translation);
  3042. }
  3043. /* }}} */
  3044. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
  3045. MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
  3046. {
  3047. if (enc != NULL) {
  3048. if (enc->flag & MBFL_ENCTYPE_MBCS) {
  3049. if (enc->mblen_table != NULL) {
  3050. if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
  3051. }
  3052. } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
  3053. return 2;
  3054. } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
  3055. return 4;
  3056. }
  3057. }
  3058. return 1;
  3059. }
  3060. /* }}} */
  3061. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
  3062. MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
  3063. {
  3064. return php_mb_mbchar_bytes_ex(s,
  3065. mbfl_no2encoding(MBSTRG(internal_encoding)));
  3066. }
  3067. /* }}} */
  3068. /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
  3069. MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
  3070. {
  3071. register const char *p = s;
  3072. char *last=NULL;
  3073. if (nbytes == (size_t)-1) {
  3074. size_t nb = 0;
  3075. while (*p != '\0') {
  3076. if (nb == 0) {
  3077. if ((unsigned char)*p == (unsigned char)c) {
  3078. last = (char *)p;
  3079. }
  3080. nb = php_mb_mbchar_bytes_ex(p, enc);
  3081. if (nb == 0) {
  3082. return NULL; /* something is going wrong! */
  3083. }
  3084. }
  3085. --nb;
  3086. ++p;
  3087. }
  3088. } else {
  3089. register size_t bcnt = nbytes;
  3090. register size_t nbytes_char;
  3091. while (bcnt > 0) {
  3092. if ((unsigned char)*p == (unsigned char)c) {
  3093. last = (char *)p;
  3094. }
  3095. nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
  3096. if (bcnt < nbytes_char) {
  3097. return NULL;
  3098. }
  3099. p += nbytes_char;
  3100. bcnt -= nbytes_char;
  3101. }
  3102. }
  3103. return last;
  3104. }
  3105. /* }}} */
  3106. /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
  3107. MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
  3108. {
  3109. return php_mb_safe_strrchr_ex(s, c, nbytes,
  3110. mbfl_no2encoding(MBSTRG(internal_encoding)));
  3111. }
  3112. /* }}} */
  3113. /* {{{ MBSTRING_API char *php_mb_strrchr() */
  3114. MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC)
  3115. {
  3116. return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC);
  3117. }
  3118. /* }}} */
  3119. /* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */
  3120. MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
  3121. {
  3122. if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){
  3123. return php_mb_mbchar_bytes_ex(s,
  3124. mbfl_no2encoding(MBSTRG(http_input_identify)));
  3125. } else {
  3126. return php_mb_mbchar_bytes_ex(s,
  3127. mbfl_no2encoding(MBSTRG(internal_encoding)));
  3128. }
  3129. }
  3130. /* }}} */
  3131. /* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
  3132. MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from
  3133. TSRMLS_DC)
  3134. {
  3135. int i;
  3136. mbfl_string string, result, *ret = NULL;
  3137. enum mbfl_no_encoding from_encoding, to_encoding;
  3138. mbfl_buffer_converter *convd;
  3139. if (encoding_to) {
  3140. /* new encoding */
  3141. to_encoding = mbfl_name2no_encoding(encoding_to);
  3142. if (to_encoding == mbfl_no_encoding_invalid) {
  3143. return -1;
  3144. }
  3145. } else {
  3146. to_encoding = MBSTRG(current_internal_encoding);
  3147. }
  3148. if (encoding_from) {
  3149. /* old encoding */
  3150. from_encoding = mbfl_name2no_encoding(encoding_from);
  3151. if (from_encoding == mbfl_no_encoding_invalid) {
  3152. return -1;
  3153. }
  3154. } else {
  3155. from_encoding = MBSTRG(http_input_identify);
  3156. }
  3157. if (from_encoding == mbfl_no_encoding_pass) {
  3158. return 0;
  3159. }
  3160. /* initialize string */
  3161. mbfl_string_init(&string);
  3162. mbfl_string_init(&result);
  3163. string.no_encoding = from_encoding;
  3164. string.no_language = MBSTRG(current_language);
  3165. for (i=0; i<num; i++){
  3166. string.val = (char*)str[i];
  3167. string.len = len[i];
  3168. /* initialize converter */
  3169. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  3170. if (convd == NULL) {
  3171. return -1;
  3172. }
  3173. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  3174. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  3175. /* do it */
  3176. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3177. if (ret != NULL) {
  3178. efree(str[i]);
  3179. str[i] = ret->val;
  3180. len[i] = ret->len;
  3181. }
  3182. mbfl_buffer_converter_delete(convd);
  3183. }
  3184. return ret ? 0 : -1;
  3185. }
  3186. /* {{{ MBSTRING_API int php_mb_gpc_encoding_detector()
  3187. */
  3188. MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC)
  3189. {
  3190. mbfl_string string;
  3191. enum mbfl_no_encoding *elist;
  3192. enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid;
  3193. mbfl_encoding_detector *identd = NULL;
  3194. int size;
  3195. enum mbfl_no_encoding *list;
  3196. if (MBSTRG(http_input_list_size) == 1 &&
  3197. MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) {
  3198. MBSTRG(http_input_identify) = mbfl_no_encoding_pass;
  3199. return SUCCESS;
  3200. }
  3201. if (MBSTRG(http_input_list_size) == 1 &&
  3202. MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto &&
  3203. mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) {
  3204. MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0];
  3205. return SUCCESS;
  3206. }
  3207. if (arg_list && strlen(arg_list)>0) {
  3208. /* make encoding list */
  3209. list = NULL;
  3210. size = 0;
  3211. php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
  3212. if (size > 0 && list != NULL) {
  3213. elist = list;
  3214. } else {
  3215. elist = MBSTRG(current_detect_order_list);
  3216. size = MBSTRG(current_detect_order_list_size);
  3217. if (size <= 0){
  3218. elist = MBSTRG(default_detect_order_list);
  3219. size = MBSTRG(default_detect_order_list_size);
  3220. }
  3221. }
  3222. } else {
  3223. elist = MBSTRG(current_detect_order_list);
  3224. size = MBSTRG(current_detect_order_list_size);
  3225. if (size <= 0){
  3226. elist = MBSTRG(default_detect_order_list);
  3227. size = MBSTRG(default_detect_order_list_size);
  3228. }
  3229. }
  3230. mbfl_string_init(&string);
  3231. string.no_language = MBSTRG(current_language);
  3232. identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection));
  3233. if (identd) {
  3234. int n = 0;
  3235. while(n < num){
  3236. string.val = (unsigned char *)arg_string[n];
  3237. string.len = arg_length[n];
  3238. if (mbfl_encoding_detector_feed(identd, &string)) {
  3239. break;
  3240. }
  3241. n++;
  3242. }
  3243. encoding = mbfl_encoding_detector_judge(identd);
  3244. mbfl_encoding_detector_delete(identd);
  3245. }
  3246. if (encoding != mbfl_no_encoding_invalid) {
  3247. MBSTRG(http_input_identify) = encoding;
  3248. return SUCCESS;
  3249. } else {
  3250. return FAILURE;
  3251. }
  3252. }
  3253. /* }}} */
  3254. #ifdef ZEND_MULTIBYTE
  3255. /* {{{ MBSTRING_API int php_mb_set_zend_encoding() */
  3256. MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
  3257. {
  3258. /* 'd better use mbfl_memory_device? */
  3259. char *name, *list = NULL;
  3260. int n, *entry, list_size = 0;
  3261. zend_encoding_detector encoding_detector;
  3262. zend_encoding_converter encoding_converter;
  3263. zend_encoding_oddlen encoding_oddlen;
  3264. /* notify script encoding to Zend Engine */
  3265. entry = MBSTRG(script_encoding_list);
  3266. n = MBSTRG(script_encoding_list_size);
  3267. while (n > 0) {
  3268. name = (char *)mbfl_no_encoding2name(*entry);
  3269. if (name) {
  3270. list_size += strlen(name) + 1;
  3271. if (!list) {
  3272. list = (char*)emalloc(list_size);
  3273. *list = (char)NULL;
  3274. } else {
  3275. list = (char*)erealloc(list, list_size);
  3276. strcat(list, ",");
  3277. }
  3278. strcat(list, name);
  3279. }
  3280. entry++;
  3281. n--;
  3282. }
  3283. zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
  3284. if (list) {
  3285. efree(list);
  3286. }
  3287. encoding_detector = php_mb_encoding_detector;
  3288. encoding_converter = php_mb_encoding_converter;
  3289. encoding_oddlen = php_mb_oddlen;
  3290. /* TODO: make independent from mbstring.encoding_translation? */
  3291. if (MBSTRG(encoding_translation)) {
  3292. /* notify internal encoding to Zend Engine */
  3293. name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
  3294. zend_multibyte_set_internal_encoding(name, strlen(name) TSRMLS_CC);
  3295. }
  3296. zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
  3297. return 0;
  3298. }
  3299. /* }}} */
  3300. /* {{{ char *php_mb_encoding_detector()
  3301. * Interface for Zend Engine
  3302. */
  3303. char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg_list TSRMLS_DC)
  3304. {
  3305. mbfl_string string;
  3306. const char *ret;
  3307. enum mbfl_no_encoding *elist;
  3308. int size, *list;
  3309. /* make encoding list */
  3310. list = NULL;
  3311. size = 0;
  3312. php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
  3313. if (size <= 0) {
  3314. return NULL;
  3315. }
  3316. if (size > 0 && list != NULL) {
  3317. elist = list;
  3318. } else {
  3319. elist = MBSTRG(current_detect_order_list);
  3320. size = MBSTRG(current_detect_order_list_size);
  3321. }
  3322. mbfl_string_init(&string);
  3323. string.no_language = MBSTRG(current_language);
  3324. string.val = (char*)arg_string;
  3325. string.len = arg_length;
  3326. ret = mbfl_identify_encoding_name(&string, elist, size, 0);
  3327. if (list != NULL) {
  3328. efree((void *)list);
  3329. }
  3330. if (ret != NULL) {
  3331. return estrdup(ret);
  3332. } else {
  3333. return NULL;
  3334. }
  3335. }
  3336. /* }}} */
  3337. /* {{{ int php_mb_encoding_converter() */
  3338. int php_mb_encoding_converter(char **to, int *to_length, const char *from,
  3339. int from_length, const char *encoding_to, const char *encoding_from
  3340. TSRMLS_DC)
  3341. {
  3342. mbfl_string string, result, *ret;
  3343. enum mbfl_no_encoding from_encoding, to_encoding;
  3344. mbfl_buffer_converter *convd;
  3345. /* new encoding */
  3346. to_encoding = mbfl_name2no_encoding(encoding_to);
  3347. if (to_encoding == mbfl_no_encoding_invalid) {
  3348. return -1;
  3349. }
  3350. /* old encoding */
  3351. from_encoding = mbfl_name2no_encoding(encoding_from);
  3352. if (from_encoding == mbfl_no_encoding_invalid) {
  3353. return -1;
  3354. }
  3355. /* initialize string */
  3356. mbfl_string_init(&string);
  3357. mbfl_string_init(&result);
  3358. string.no_encoding = from_encoding;
  3359. string.no_language = MBSTRG(current_language);
  3360. string.val = (char*)from;
  3361. string.len = from_length;
  3362. /* initialize converter */
  3363. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  3364. if (convd == NULL) {
  3365. return -1;
  3366. }
  3367. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  3368. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  3369. /* do it */
  3370. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3371. if (ret != NULL) {
  3372. *to = ret->val;
  3373. *to_length = ret->len;
  3374. }
  3375. mbfl_buffer_converter_delete(convd);
  3376. return ret ? 0 : -1;
  3377. }
  3378. /* }}} */
  3379. /* {{{ int php_mb_oddlen()
  3380. * returns number of odd (e.g. appears only first byte of multibyte
  3381. * character) chars
  3382. */
  3383. int php_mb_oddlen(const char *string, int length, const char *encoding TSRMLS_DC)
  3384. {
  3385. mbfl_string mb_string;
  3386. mbfl_string_init(&mb_string);
  3387. mb_string.no_language = MBSTRG(current_language);
  3388. mb_string.no_encoding = mbfl_name2no_encoding(encoding);
  3389. mb_string.val = (char*)string;
  3390. mb_string.len = length;
  3391. if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
  3392. return 0;
  3393. }
  3394. return mbfl_oddlen(&mb_string);
  3395. }
  3396. /* }}} */
  3397. #endif /* ZEND_MULTIBYTE */
  3398. #endif /* HAVE_MBSTRING */
  3399. /*
  3400. * Local variables:
  3401. * tab-width: 4
  3402. * c-basic-offset: 4
  3403. * End:
  3404. * vim600: fdm=marker
  3405. * vim: noet sw=4 ts=4
  3406. */