You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3716 lines
101 KiB

21 years ago
25 years ago
25 years ago
22 years ago
23 years ago
23 years ago
23 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
23 years ago
23 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
25 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
25 years ago
24 years ago
22 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2005 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.0 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_0.txt. |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
  16. | Rui Hirokawa <hirokawa@php.net> |
  17. +----------------------------------------------------------------------+
  18. */
  19. /* $Id$ */
  20. /*
  21. * PHP 4 Multibyte String module "mbstring"
  22. *
  23. * History:
  24. * 2000.5.19 Release php-4.0RC2_jstring-1.0
  25. * 2001.4.1 Release php4_jstring-1.0.91
  26. * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
  27. * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
  28. */
  29. /*
  30. * PHP3 Internationalization support program.
  31. *
  32. * Copyright (c) 1999,2000 by the PHP3 internationalization team.
  33. * All rights reserved.
  34. *
  35. * See README_PHP3-i18n-ja for more detail.
  36. *
  37. * Authors:
  38. * Hironori Sato <satoh@jpnnet.com>
  39. * Shigeru Kanemoto <sgk@happysize.co.jp>
  40. * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
  41. * Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
  42. */
  43. /* {{{ includes */
  44. #ifdef HAVE_CONFIG_H
  45. #include "config.h"
  46. #endif
  47. #include "php.h"
  48. #include "php_ini.h"
  49. #include "php_variables.h"
  50. #include "mbstring.h"
  51. #include "ext/standard/php_string.h"
  52. #include "ext/standard/php_mail.h"
  53. #include "ext/standard/php_smart_str.h"
  54. #include "ext/standard/url.h"
  55. #include "main/php_output.h"
  56. #include "ext/standard/info.h"
  57. #include "libmbfl/mbfl/mbfl_allocators.h"
  58. #include "php_variables.h"
  59. #include "php_globals.h"
  60. #include "rfc1867.h"
  61. #include "php_content_types.h"
  62. #include "SAPI.h"
  63. #include "php_unicode.h"
  64. #include "TSRM.h"
  65. #include "mb_gpc.h"
  66. #ifdef ZEND_MULTIBYTE
  67. #include "zend_multibyte.h"
  68. #endif /* ZEND_MULTIBYTE */
  69. #if HAVE_MBSTRING
  70. /* }}} */
  71. /* {{{ prototypes */
  72. static void _php_mb_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC);
  73. static void _php_mb_globals_dtor(zend_mbstring_globals *pglobals TSRMLS_DC);
  74. /* }}} */
  75. /* {{{ php_mb_default_identify_list */
  76. typedef struct _php_mb_nls_ident_list {
  77. enum mbfl_no_language lang;
  78. enum mbfl_no_encoding* list;
  79. int list_size;
  80. } php_mb_nls_ident_list;
  81. static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
  82. mbfl_no_encoding_ascii,
  83. mbfl_no_encoding_jis,
  84. mbfl_no_encoding_utf8,
  85. mbfl_no_encoding_euc_jp,
  86. mbfl_no_encoding_sjis
  87. };
  88. static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
  89. mbfl_no_encoding_ascii,
  90. mbfl_no_encoding_utf8,
  91. mbfl_no_encoding_euc_cn,
  92. mbfl_no_encoding_cp936
  93. };
  94. static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
  95. mbfl_no_encoding_ascii,
  96. mbfl_no_encoding_utf8,
  97. mbfl_no_encoding_euc_tw,
  98. mbfl_no_encoding_big5
  99. };
  100. static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
  101. mbfl_no_encoding_ascii,
  102. mbfl_no_encoding_utf8,
  103. mbfl_no_encoding_euc_kr,
  104. mbfl_no_encoding_uhc
  105. };
  106. static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
  107. mbfl_no_encoding_ascii,
  108. mbfl_no_encoding_utf8,
  109. mbfl_no_encoding_koi8r,
  110. mbfl_no_encoding_cp1251,
  111. mbfl_no_encoding_cp866
  112. };
  113. static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
  114. mbfl_no_encoding_ascii,
  115. mbfl_no_encoding_utf8,
  116. mbfl_no_encoding_armscii8
  117. };
  118. static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
  119. mbfl_no_encoding_ascii,
  120. mbfl_no_encoding_utf8
  121. };
  122. static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
  123. { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
  124. { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
  125. { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
  126. { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
  127. { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
  128. { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
  129. { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
  130. };
  131. /* }}} */
  132. static
  133. ZEND_BEGIN_ARG_INFO(third_and_rest_force_ref, 1)
  134. ZEND_ARG_PASS_INFO(0)
  135. ZEND_ARG_PASS_INFO(0)
  136. ZEND_END_ARG_INFO();
  137. /* {{{ mb_overload_def mb_ovld[] */
  138. static const struct mb_overload_def mb_ovld[] = {
  139. {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
  140. {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
  141. {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
  142. {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
  143. {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
  144. {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
  145. {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
  146. {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
  147. #if HAVE_MBREGEX
  148. {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
  149. {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
  150. {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
  151. {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
  152. {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
  153. #endif
  154. {0, NULL, NULL, NULL}
  155. };
  156. /* }}} */
  157. /* {{{ function_entry mbstring_functions[] */
  158. function_entry mbstring_functions[] = {
  159. PHP_FE(mb_convert_case, NULL)
  160. PHP_FE(mb_strtoupper, NULL)
  161. PHP_FE(mb_strtolower, NULL)
  162. PHP_FE(mb_language, NULL)
  163. PHP_FE(mb_internal_encoding, NULL)
  164. PHP_FE(mb_http_input, NULL)
  165. PHP_FE(mb_http_output, NULL)
  166. PHP_FE(mb_detect_order, NULL)
  167. PHP_FE(mb_substitute_character, NULL)
  168. PHP_FE(mb_parse_str, second_arg_force_ref)
  169. PHP_FE(mb_output_handler, NULL)
  170. PHP_FE(mb_preferred_mime_name, NULL)
  171. PHP_FE(mb_strlen, NULL)
  172. PHP_FE(mb_strpos, NULL)
  173. PHP_FE(mb_strrpos, NULL)
  174. PHP_FE(mb_substr_count, NULL)
  175. PHP_FE(mb_substr, NULL)
  176. PHP_FE(mb_strcut, NULL)
  177. PHP_FE(mb_strwidth, NULL)
  178. PHP_FE(mb_strimwidth, NULL)
  179. PHP_FE(mb_convert_encoding, NULL)
  180. PHP_FE(mb_detect_encoding, NULL)
  181. PHP_FE(mb_list_encodings, NULL)
  182. PHP_FE(mb_convert_kana, NULL)
  183. PHP_FE(mb_encode_mimeheader, NULL)
  184. PHP_FE(mb_decode_mimeheader, NULL)
  185. PHP_FE(mb_convert_variables, third_and_rest_force_ref)
  186. PHP_FE(mb_encode_numericentity, NULL)
  187. PHP_FE(mb_decode_numericentity, NULL)
  188. PHP_FE(mb_send_mail, NULL)
  189. PHP_FE(mb_get_info, NULL)
  190. #if HAVE_MBREGEX
  191. PHP_MBREGEX_FUNCTION_ENTRIES
  192. #endif
  193. { NULL, NULL, NULL }
  194. };
  195. /* }}} */
  196. /* {{{ zend_module_entry mbstring_module_entry */
  197. zend_module_entry mbstring_module_entry = {
  198. STANDARD_MODULE_HEADER,
  199. "mbstring",
  200. mbstring_functions,
  201. PHP_MINIT(mbstring),
  202. PHP_MSHUTDOWN(mbstring),
  203. PHP_RINIT(mbstring),
  204. PHP_RSHUTDOWN(mbstring),
  205. PHP_MINFO(mbstring),
  206. NO_VERSION_YET,
  207. STANDARD_MODULE_PROPERTIES
  208. };
  209. /* }}} */
  210. /* {{{ static sapi_post_entry php_post_entries[] */
  211. static sapi_post_entry php_post_entries[] = {
  212. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
  213. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  214. { NULL, 0, NULL, NULL }
  215. };
  216. /* }}} */
  217. ZEND_DECLARE_MODULE_GLOBALS(mbstring)
  218. #ifdef COMPILE_DL_MBSTRING
  219. ZEND_GET_MODULE(mbstring)
  220. # ifdef PHP_WIN32
  221. # include "zend_arg_defs.c"
  222. # endif
  223. #endif
  224. /* {{{ allocators */
  225. static void *_php_mb_allocators_malloc(unsigned int sz)
  226. {
  227. return emalloc(sz);
  228. }
  229. static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
  230. {
  231. return erealloc(ptr, sz);
  232. }
  233. static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
  234. {
  235. return ecalloc(nelems, szelem);
  236. }
  237. static void _php_mb_allocators_free(void *ptr)
  238. {
  239. efree(ptr);
  240. }
  241. static void *_php_mb_allocators_pmalloc(unsigned int sz)
  242. {
  243. return pemalloc(sz, 1);
  244. }
  245. static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
  246. {
  247. return perealloc(ptr, sz, 1);
  248. }
  249. static void _php_mb_allocators_pfree(void *ptr)
  250. {
  251. pefree(ptr, 1);
  252. }
  253. static mbfl_allocators _php_mb_allocators = {
  254. _php_mb_allocators_malloc,
  255. _php_mb_allocators_realloc,
  256. _php_mb_allocators_calloc,
  257. _php_mb_allocators_free,
  258. _php_mb_allocators_pmalloc,
  259. _php_mb_allocators_prealloc,
  260. _php_mb_allocators_pfree
  261. };
  262. /* }}} */
  263. /* {{{ static sapi_post_entry mbstr_post_entries[] */
  264. static sapi_post_entry mbstr_post_entries[] = {
  265. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
  266. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  267. { NULL, 0, NULL, NULL }
  268. };
  269. /* }}} */
  270. /* {{{ static int php_mb_parse_encoding_list()
  271. * Return 0 if input contains any illegal encoding, otherwise 1.
  272. * Even if any illegal encoding is detected the result may contain a list
  273. * of parsed encodings.
  274. */
  275. static int
  276. php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
  277. {
  278. int n, l, size, bauto, ret = 1;
  279. char *p, *p1, *p2, *endp, *tmpstr;
  280. enum mbfl_no_encoding no_encoding;
  281. enum mbfl_no_encoding *src, *entry, *list;
  282. list = NULL;
  283. if (value == NULL || value_length <= 0) {
  284. if (return_list) {
  285. *return_list = NULL;
  286. }
  287. if (return_size) {
  288. *return_size = 0;
  289. }
  290. return 0;
  291. } else {
  292. enum mbfl_no_encoding *identify_list;
  293. int identify_list_size;
  294. identify_list = MBSTRG(default_detect_order_list);
  295. identify_list_size = MBSTRG(default_detect_order_list_size);
  296. /* copy the value string for work */
  297. if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
  298. tmpstr = (char *)estrndup(value+1, value_length-2);
  299. value_length -= 2;
  300. }
  301. else
  302. tmpstr = (char *)estrndup(value, value_length);
  303. if (tmpstr == NULL) {
  304. return 0;
  305. }
  306. /* count the number of listed encoding names */
  307. endp = tmpstr + value_length;
  308. n = 1;
  309. p1 = tmpstr;
  310. while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
  311. p1 = p2 + 1;
  312. n++;
  313. }
  314. size = n + identify_list_size;
  315. /* make list */
  316. list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
  317. if (list != NULL) {
  318. entry = list;
  319. n = 0;
  320. bauto = 0;
  321. p1 = tmpstr;
  322. do {
  323. p2 = p = php_memnstr(p1, ",", 1, endp);
  324. if (p == NULL) {
  325. p = endp;
  326. }
  327. *p = '\0';
  328. /* trim spaces */
  329. while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
  330. p1++;
  331. }
  332. p--;
  333. while (p > p1 && (*p == ' ' || *p == '\t')) {
  334. *p = '\0';
  335. p--;
  336. }
  337. /* convert to the encoding number and check encoding */
  338. if (strcasecmp(p1, "auto") == 0) {
  339. if (!bauto) {
  340. bauto = 1;
  341. l = identify_list_size;
  342. src = identify_list;
  343. while (l > 0) {
  344. *entry++ = *src++;
  345. l--;
  346. n++;
  347. }
  348. }
  349. } else {
  350. no_encoding = mbfl_name2no_encoding(p1);
  351. if (no_encoding != mbfl_no_encoding_invalid) {
  352. *entry++ = no_encoding;
  353. n++;
  354. } else {
  355. ret = 0;
  356. }
  357. }
  358. p1 = p2 + 1;
  359. } while (n < size && p2 != NULL);
  360. if (n > 0) {
  361. if (return_list) {
  362. *return_list = list;
  363. } else {
  364. pefree(list, persistent);
  365. }
  366. } else {
  367. pefree(list, persistent);
  368. if (return_list) {
  369. *return_list = NULL;
  370. }
  371. ret = 0;
  372. }
  373. if (return_size) {
  374. *return_size = n;
  375. }
  376. } else {
  377. if (return_list) {
  378. *return_list = NULL;
  379. }
  380. if (return_size) {
  381. *return_size = 0;
  382. }
  383. ret = 0;
  384. }
  385. efree(tmpstr);
  386. }
  387. return ret;
  388. }
  389. /* }}} */
  390. /* {{{ MBSTRING_API php_mb_check_encoding_list */
  391. MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
  392. return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
  393. }
  394. /* }}} */
  395. /* {{{ static int php_mb_parse_encoding_array()
  396. * Return 0 if input contains any illegal encoding, otherwise 1.
  397. * Even if any illegal encoding is detected the result may contain a list
  398. * of parsed encodings.
  399. */
  400. static int
  401. php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
  402. {
  403. zval **hash_entry;
  404. HashTable *target_hash;
  405. int i, n, l, size, bauto,ret = 1;
  406. enum mbfl_no_encoding no_encoding;
  407. enum mbfl_no_encoding *src, *list, *entry;
  408. list = NULL;
  409. if (Z_TYPE_P(array) == IS_ARRAY) {
  410. enum mbfl_no_encoding *identify_list;
  411. int identify_list_size;
  412. identify_list = MBSTRG(default_detect_order_list);
  413. identify_list_size = MBSTRG(default_detect_order_list_size);
  414. target_hash = Z_ARRVAL_P(array);
  415. zend_hash_internal_pointer_reset(target_hash);
  416. i = zend_hash_num_elements(target_hash);
  417. size = i + identify_list_size;
  418. list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
  419. if (list != NULL) {
  420. entry = list;
  421. bauto = 0;
  422. n = 0;
  423. while (i > 0) {
  424. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  425. break;
  426. }
  427. convert_to_string_ex(hash_entry);
  428. if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
  429. if (!bauto) {
  430. bauto = 1;
  431. l = identify_list_size;
  432. src = identify_list;
  433. while (l > 0) {
  434. *entry++ = *src++;
  435. l--;
  436. n++;
  437. }
  438. }
  439. } else {
  440. no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
  441. if (no_encoding != mbfl_no_encoding_invalid) {
  442. *entry++ = no_encoding;
  443. n++;
  444. } else {
  445. ret = 0;
  446. }
  447. }
  448. zend_hash_move_forward(target_hash);
  449. i--;
  450. }
  451. if (n > 0) {
  452. if (return_list) {
  453. *return_list = list;
  454. } else {
  455. pefree(list, persistent);
  456. }
  457. } else {
  458. pefree(list, persistent);
  459. if (return_list) {
  460. *return_list = NULL;
  461. }
  462. ret = 0;
  463. }
  464. if (return_size) {
  465. *return_size = n;
  466. }
  467. } else {
  468. if (return_list) {
  469. *return_list = NULL;
  470. }
  471. if (return_size) {
  472. *return_size = 0;
  473. }
  474. ret = 0;
  475. }
  476. }
  477. return ret;
  478. }
  479. /* }}} */
  480. /* {{{ php_mb_nls_get_default_detect_order_list */
  481. static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size)
  482. {
  483. size_t i;
  484. *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  485. *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  486. for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
  487. if (php_mb_default_identify_list[i].lang == lang) {
  488. *plist = php_mb_default_identify_list[i].list;
  489. *plist_size = php_mb_default_identify_list[i].list_size;
  490. return 1;
  491. }
  492. }
  493. return 0;
  494. }
  495. /* }}} */
  496. /* {{{ php.ini directive handler */
  497. static PHP_INI_MH(OnUpdate_mbstring_language)
  498. {
  499. enum mbfl_no_language no_language;
  500. no_language = mbfl_name2no_language(new_value);
  501. if (no_language == mbfl_no_language_invalid) {
  502. return FAILURE;
  503. }
  504. MBSTRG(language) = no_language;
  505. php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
  506. return SUCCESS;
  507. }
  508. /* }}} */
  509. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
  510. static PHP_INI_MH(OnUpdate_mbstring_detect_order)
  511. {
  512. enum mbfl_no_encoding *list;
  513. int size;
  514. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  515. if (MBSTRG(detect_order_list) != NULL) {
  516. free(MBSTRG(detect_order_list));
  517. }
  518. MBSTRG(detect_order_list) = list;
  519. MBSTRG(detect_order_list_size) = size;
  520. } else {
  521. return FAILURE;
  522. }
  523. return SUCCESS;
  524. }
  525. /* }}} */
  526. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
  527. static PHP_INI_MH(OnUpdate_mbstring_http_input)
  528. {
  529. enum mbfl_no_encoding *list;
  530. int size;
  531. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  532. if (MBSTRG(http_input_list) != NULL) {
  533. free(MBSTRG(http_input_list));
  534. }
  535. MBSTRG(http_input_list) = list;
  536. MBSTRG(http_input_list_size) = size;
  537. } else {
  538. return FAILURE;
  539. }
  540. return SUCCESS;
  541. }
  542. /* }}} */
  543. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
  544. static PHP_INI_MH(OnUpdate_mbstring_http_output)
  545. {
  546. enum mbfl_no_encoding no_encoding;
  547. no_encoding = mbfl_name2no_encoding(new_value);
  548. if (no_encoding != mbfl_no_encoding_invalid) {
  549. MBSTRG(http_output_encoding) = no_encoding;
  550. MBSTRG(current_http_output_encoding) = no_encoding;
  551. } else {
  552. if (new_value != NULL && new_value_length > 0) {
  553. return FAILURE;
  554. }
  555. }
  556. return SUCCESS;
  557. }
  558. /* }}} */
  559. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
  560. static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
  561. {
  562. enum mbfl_no_encoding no_encoding;
  563. if (new_value == NULL) {
  564. return SUCCESS;
  565. }
  566. no_encoding = mbfl_name2no_encoding(new_value);
  567. if (no_encoding != mbfl_no_encoding_invalid) {
  568. MBSTRG(internal_encoding) = no_encoding;
  569. MBSTRG(current_internal_encoding) = no_encoding;
  570. #if HAVE_MBREGEX
  571. {
  572. OnigEncoding mbctype;
  573. mbctype = php_mb_regex_name2mbctype(new_value);
  574. if (mbctype == ONIG_ENCODING_UNDEF) {
  575. mbctype = ONIG_ENCODING_EUC_JP;
  576. }
  577. MBSTRG(current_mbctype) = MBSTRG(default_mbctype) = mbctype;
  578. }
  579. #endif
  580. #ifdef ZEND_MULTIBYTE
  581. zend_multibyte_set_internal_encoding(new_value, new_value_length TSRMLS_CC);
  582. #endif /* ZEND_MULTIBYTE */
  583. } else {
  584. if (new_value != NULL && new_value_length > 0) {
  585. return FAILURE;
  586. }
  587. }
  588. return SUCCESS;
  589. }
  590. /* }}} */
  591. #ifdef ZEND_MULTIBYTE
  592. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */
  593. static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
  594. {
  595. int *list, size;
  596. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  597. if (MBSTRG(script_encoding_list) != NULL) {
  598. free(MBSTRG(script_encoding_list));
  599. }
  600. MBSTRG(script_encoding_list) = list;
  601. MBSTRG(script_encoding_list_size) = size;
  602. } else {
  603. return FAILURE;
  604. }
  605. return SUCCESS;
  606. }
  607. /* }}} */
  608. #endif /* ZEND_MULTIBYTE */
  609. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
  610. static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
  611. {
  612. if (new_value != NULL) {
  613. if (strcasecmp("none", new_value) == 0) {
  614. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  615. } else if (strcasecmp("long", new_value) == 0) {
  616. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  617. } else {
  618. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  619. MBSTRG(filter_illegal_substchar) = zend_atoi(new_value, new_value_length);
  620. }
  621. }
  622. return SUCCESS;
  623. }
  624. /* }}} */
  625. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
  626. static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
  627. {
  628. if (new_value == NULL) {
  629. return FAILURE;
  630. }
  631. OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
  632. if (MBSTRG(encoding_translation)) {
  633. sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
  634. sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
  635. sapi_register_treat_data(mbstr_treat_data);
  636. } else {
  637. sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
  638. sapi_register_post_entries(php_post_entries TSRMLS_CC);
  639. }
  640. return SUCCESS;
  641. }
  642. /* }}} */
  643. /* {{{ php.ini directive registration */
  644. PHP_INI_BEGIN()
  645. PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_SYSTEM | PHP_INI_PERDIR, OnUpdate_mbstring_language)
  646. PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
  647. PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
  648. PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
  649. PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
  650. #ifdef ZEND_MULTIBYTE
  651. PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
  652. #endif /* ZEND_MULTIBYTE */
  653. PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
  654. STD_PHP_INI_ENTRY("mbstring.func_overload", "0", PHP_INI_SYSTEM |
  655. PHP_INI_PERDIR, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
  656. STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
  657. PHP_INI_SYSTEM | PHP_INI_PERDIR, OnUpdate_mbstring_encoding_translation,
  658. encoding_translation, zend_mbstring_globals, mbstring_globals)
  659. PHP_INI_END()
  660. /* }}} */
  661. /* {{{ module global initialize handler */
  662. static void _php_mb_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC)
  663. {
  664. MBSTRG(language) = mbfl_no_language_uni;
  665. MBSTRG(current_language) = MBSTRG(language);
  666. MBSTRG(internal_encoding) = mbfl_no_encoding_invalid;
  667. MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
  668. #ifdef ZEND_MULTIBYTE
  669. MBSTRG(script_encoding_list) = NULL;
  670. MBSTRG(script_encoding_list_size) = 0;
  671. #endif /* ZEND_MULTIBYTE */
  672. MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
  673. MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
  674. MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
  675. MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
  676. MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
  677. MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
  678. MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
  679. MBSTRG(http_input_list) = NULL;
  680. MBSTRG(http_input_list_size) = 0;
  681. MBSTRG(detect_order_list) = NULL;
  682. MBSTRG(detect_order_list_size) = 0;
  683. MBSTRG(current_detect_order_list) = NULL;
  684. MBSTRG(current_detect_order_list_size) = 0;
  685. MBSTRG(default_detect_order_list) = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  686. MBSTRG(default_detect_order_list_size) = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  687. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  688. MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
  689. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  690. MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
  691. MBSTRG(func_overload) = 0;
  692. MBSTRG(encoding_translation) = 0;
  693. pglobals->outconv = NULL;
  694. #if HAVE_MBREGEX
  695. _php_mb_regex_globals_ctor(pglobals TSRMLS_CC);
  696. #endif
  697. }
  698. /* }}} */
  699. /* {{{ static void _php_mb_globals_dtor() */
  700. static void _php_mb_globals_dtor(zend_mbstring_globals *pglobals TSRMLS_DC)
  701. {
  702. #if HAVE_MBREGEX
  703. _php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
  704. #endif
  705. }
  706. /* }}} */
  707. /* {{{ PHP_MINIT_FUNCTION(mbstring) */
  708. PHP_MINIT_FUNCTION(mbstring)
  709. {
  710. __mbfl_allocators = &_php_mb_allocators;
  711. #ifdef ZTS
  712. ts_allocate_id(&mbstring_globals_id, sizeof(zend_mbstring_globals),
  713. (ts_allocate_ctor) _php_mb_globals_ctor,
  714. (ts_allocate_dtor) _php_mb_globals_dtor);
  715. #else
  716. _php_mb_globals_ctor(&mbstring_globals TSRMLS_CC);
  717. #endif
  718. REGISTER_INI_ENTRIES();
  719. if (MBSTRG(encoding_translation)) {
  720. sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
  721. sapi_register_treat_data(mbstr_treat_data);
  722. }
  723. REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
  724. REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
  725. REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
  726. REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
  727. REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
  728. REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
  729. #if HAVE_MBREGEX
  730. PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  731. #endif
  732. return SUCCESS;
  733. }
  734. /* }}} */
  735. /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
  736. PHP_MSHUTDOWN_FUNCTION(mbstring)
  737. {
  738. UNREGISTER_INI_ENTRIES();
  739. if (MBSTRG(http_input_list)) {
  740. free(MBSTRG(http_input_list));
  741. }
  742. #ifdef ZEND_MULTIBYTE
  743. if (MBSTRG(script_encoding_list)) {
  744. free(MBSTRG(script_encoding_list));
  745. }
  746. #endif /* ZEND_MULTIBYTE */
  747. if (MBSTRG(detect_order_list)) {
  748. free(MBSTRG(detect_order_list));
  749. }
  750. #if HAVE_MBREGEX
  751. PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  752. #endif
  753. #ifdef ZTS
  754. ts_free_id(mbstring_globals_id);
  755. #else
  756. _php_mb_globals_dtor(&mbstring_globals TSRMLS_CC);
  757. #endif
  758. return SUCCESS;
  759. }
  760. /* }}} */
  761. /* {{{ PHP_RINIT_FUNCTION(mbstring) */
  762. PHP_RINIT_FUNCTION(mbstring)
  763. {
  764. int n;
  765. enum mbfl_no_encoding *list=NULL, *entry;
  766. zend_function *func, *orig;
  767. const struct mb_overload_def *p;
  768. MBSTRG(current_language) = MBSTRG(language);
  769. if (MBSTRG(internal_encoding) == mbfl_no_encoding_invalid) {
  770. char *default_enc = NULL;
  771. switch (MBSTRG(current_language)) {
  772. case mbfl_no_language_uni:
  773. default_enc = "UTF-8";
  774. break;
  775. case mbfl_no_language_japanese:
  776. default_enc = "EUC-JP";
  777. break;
  778. case mbfl_no_language_korean:
  779. default_enc = "EUC-KR";
  780. break;
  781. case mbfl_no_language_simplified_chinese:
  782. default_enc = "EUC-CN";
  783. break;
  784. case mbfl_no_language_traditional_chinese:
  785. default_enc = "EUC-TW";
  786. break;
  787. case mbfl_no_language_russian:
  788. default_enc = "KOI8-R";
  789. break;
  790. case mbfl_no_language_german:
  791. default_enc = "ISO-8859-15";
  792. break;
  793. case mbfl_no_language_armenian:
  794. default_enc = "ArmSCII-8";
  795. break;
  796. case mbfl_no_language_english:
  797. default:
  798. default_enc = "ISO-8859-1";
  799. break;
  800. }
  801. if (default_enc) {
  802. zend_alter_ini_entry("mbstring.internal_encoding",
  803. sizeof("mbstring.internal_encoding"),
  804. default_enc, strlen(default_enc),
  805. PHP_INI_PERDIR, PHP_INI_STAGE_RUNTIME);
  806. }
  807. }
  808. MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
  809. MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
  810. MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
  811. MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
  812. n = 0;
  813. if (MBSTRG(detect_order_list)) {
  814. list = MBSTRG(detect_order_list);
  815. n = MBSTRG(detect_order_list_size);
  816. }
  817. if (n <= 0) {
  818. list = MBSTRG(default_detect_order_list);
  819. n = MBSTRG(default_detect_order_list_size);
  820. }
  821. entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0);
  822. MBSTRG(current_detect_order_list) = entry;
  823. MBSTRG(current_detect_order_list_size) = n;
  824. while (n > 0) {
  825. *entry++ = *list++;
  826. n--;
  827. }
  828. /* override original function. */
  829. if (MBSTRG(func_overload)){
  830. p = &(mb_ovld[0]);
  831. while (p->type > 0) {
  832. if ((MBSTRG(func_overload) & p->type) == p->type &&
  833. zend_hash_find(EG(function_table), p->save_func,
  834. strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
  835. zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
  836. if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
  837. php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
  838. return FAILURE;
  839. } else {
  840. zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
  841. if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
  842. NULL) == FAILURE) {
  843. php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
  844. return FAILURE;
  845. }
  846. }
  847. }
  848. p++;
  849. }
  850. }
  851. #if HAVE_MBREGEX
  852. PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  853. #endif
  854. #ifdef ZEND_MULTIBYTE
  855. php_mb_set_zend_encoding(TSRMLS_C);
  856. #endif /* ZEND_MULTIBYTE */
  857. return SUCCESS;
  858. }
  859. /* }}} */
  860. /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
  861. PHP_RSHUTDOWN_FUNCTION(mbstring)
  862. {
  863. const struct mb_overload_def *p;
  864. zend_function *orig;
  865. if (MBSTRG(current_detect_order_list) != NULL) {
  866. efree(MBSTRG(current_detect_order_list));
  867. MBSTRG(current_detect_order_list) = NULL;
  868. MBSTRG(current_detect_order_list_size) = 0;
  869. }
  870. if (MBSTRG(outconv) != NULL) {
  871. mbfl_buffer_converter_delete(MBSTRG(outconv));
  872. MBSTRG(outconv) = NULL;
  873. }
  874. /* clear http input identification. */
  875. MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
  876. MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
  877. MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
  878. MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
  879. MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
  880. /* clear overloaded function. */
  881. if (MBSTRG(func_overload)){
  882. p = &(mb_ovld[0]);
  883. while (p->type > 0 && zend_hash_find(EG(function_table), p->save_func, strlen(p->save_func)+1 , (void **)&orig) == SUCCESS) {
  884. zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
  885. zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
  886. p++;
  887. }
  888. }
  889. #if HAVE_MBREGEX
  890. PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  891. #endif
  892. return SUCCESS;
  893. }
  894. /* }}} */
  895. /* {{{ PHP_MINFO_FUNCTION(mbstring) */
  896. PHP_MINFO_FUNCTION(mbstring)
  897. {
  898. php_info_print_table_start();
  899. php_info_print_table_row(2, "Multibyte Support", "enabled");
  900. php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
  901. if (MBSTRG(encoding_translation)) {
  902. php_info_print_table_row(2, "HTTP input encoding translation", "enabled");
  903. }
  904. #if defined(HAVE_MBREGEX)
  905. {
  906. char buf[32];
  907. php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
  908. sprintf(buf, "%d.%d.%d",
  909. ONIGURUMA_VERSION_MAJOR,ONIGURUMA_VERSION_MINOR,ONIGURUMA_VERSION_TEENY);
  910. php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
  911. }
  912. #endif
  913. php_info_print_table_end();
  914. php_info_print_table_start();
  915. php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
  916. php_info_print_table_end();
  917. DISPLAY_INI_ENTRIES();
  918. }
  919. /* }}} */
  920. /* {{{ proto string mb_language([string language])
  921. Sets the current language or Returns the current language as a string */
  922. PHP_FUNCTION(mb_language)
  923. {
  924. char *name = NULL;
  925. int name_len = 0;
  926. enum mbfl_no_language no_language;
  927. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
  928. return;
  929. }
  930. if (name == NULL) {
  931. RETURN_STRING((char *)mbfl_no_language2name(MBSTRG(current_language)), 1);
  932. } else {
  933. no_language = mbfl_name2no_language(name);
  934. if (no_language == mbfl_no_language_invalid) {
  935. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
  936. RETURN_FALSE;
  937. } else {
  938. php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
  939. MBSTRG(current_language) = no_language;
  940. RETURN_TRUE;
  941. }
  942. }
  943. }
  944. /* }}} */
  945. /* {{{ proto string mb_internal_encoding([string encoding])
  946. Sets the current internal encoding or Returns the current internal encoding as a string */
  947. PHP_FUNCTION(mb_internal_encoding)
  948. {
  949. char *name = NULL;
  950. int name_len;
  951. enum mbfl_no_encoding no_encoding;
  952. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
  953. RETURN_FALSE;
  954. }
  955. if (name == NULL) {
  956. name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
  957. if (name != NULL) {
  958. RETURN_STRING(name, 1);
  959. } else {
  960. RETURN_FALSE;
  961. }
  962. } else {
  963. no_encoding = mbfl_name2no_encoding(name);
  964. if (no_encoding == mbfl_no_encoding_invalid) {
  965. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  966. RETURN_FALSE;
  967. } else {
  968. MBSTRG(current_internal_encoding) = no_encoding;
  969. #ifdef ZEND_MULTIBYTE
  970. /* TODO: make independent from mbstring.encoding_translation? */
  971. if (MBSTRG(encoding_translation)) {
  972. zend_multibyte_set_internal_encoding(name, name_len TSRMLS_CC);
  973. }
  974. #endif /* ZEND_MULTIBYTE */
  975. RETURN_TRUE;
  976. }
  977. }
  978. }
  979. /* }}} */
  980. /* {{{ proto mixed mb_http_input([string type])
  981. Returns the input encoding */
  982. PHP_FUNCTION(mb_http_input)
  983. {
  984. char *typ = NULL;
  985. int typ_len;
  986. int retname, n;
  987. char *name, *list, *temp;
  988. enum mbfl_no_encoding *entry;
  989. enum mbfl_no_encoding result = mbfl_no_encoding_invalid;
  990. retname = 1;
  991. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
  992. RETURN_FALSE;
  993. }
  994. if (typ == NULL) {
  995. result = MBSTRG(http_input_identify);
  996. } else {
  997. switch (*typ) {
  998. case 'G':
  999. case 'g':
  1000. result = MBSTRG(http_input_identify_get);
  1001. break;
  1002. case 'P':
  1003. case 'p':
  1004. result = MBSTRG(http_input_identify_post);
  1005. break;
  1006. case 'C':
  1007. case 'c':
  1008. result = MBSTRG(http_input_identify_cookie);
  1009. break;
  1010. case 'S':
  1011. case 's':
  1012. result = MBSTRG(http_input_identify_string);
  1013. break;
  1014. case 'I':
  1015. case 'i':
  1016. array_init(return_value);
  1017. entry = MBSTRG(http_input_list);
  1018. n = MBSTRG(http_input_list_size);
  1019. while (n > 0) {
  1020. name = (char *)mbfl_no_encoding2name(*entry);
  1021. if (name) {
  1022. add_next_index_string(return_value, name, 1);
  1023. }
  1024. entry++;
  1025. n--;
  1026. }
  1027. retname = 0;
  1028. break;
  1029. case 'L':
  1030. case 'l':
  1031. entry = MBSTRG(http_input_list);
  1032. n = MBSTRG(http_input_list_size);
  1033. list = NULL;
  1034. while (n > 0) {
  1035. name = (char *)mbfl_no_encoding2name(*entry);
  1036. if (name) {
  1037. if (list) {
  1038. temp = list;
  1039. spprintf(&list, 0, "%s,%s", temp, name);
  1040. efree(temp);
  1041. if (!list) {
  1042. break;
  1043. }
  1044. } else {
  1045. list = estrdup(name);
  1046. }
  1047. }
  1048. entry++;
  1049. n--;
  1050. }
  1051. if (!list) {
  1052. RETURN_FALSE;
  1053. }
  1054. RETVAL_STRING(list, 0);
  1055. retname = 0;
  1056. break;
  1057. default:
  1058. result = MBSTRG(http_input_identify);
  1059. break;
  1060. }
  1061. }
  1062. if (retname) {
  1063. if (result != mbfl_no_encoding_invalid &&
  1064. (name = (char *)mbfl_no_encoding2name(result)) != NULL) {
  1065. RETVAL_STRING(name, 1);
  1066. } else {
  1067. RETVAL_FALSE;
  1068. }
  1069. }
  1070. }
  1071. /* }}} */
  1072. /* {{{ proto string mb_http_output([string encoding])
  1073. Sets the current output_encoding or returns the current output_encoding as a string */
  1074. PHP_FUNCTION(mb_http_output)
  1075. {
  1076. char *name = NULL;
  1077. int name_len;
  1078. enum mbfl_no_encoding no_encoding;
  1079. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
  1080. RETURN_FALSE;
  1081. }
  1082. if (name == NULL) {
  1083. name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding));
  1084. if (name != NULL) {
  1085. RETURN_STRING(name, 1);
  1086. } else {
  1087. RETURN_FALSE;
  1088. }
  1089. } else {
  1090. no_encoding = mbfl_name2no_encoding(name);
  1091. if (no_encoding == mbfl_no_encoding_invalid) {
  1092. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1093. RETURN_FALSE;
  1094. } else {
  1095. MBSTRG(current_http_output_encoding) = no_encoding;
  1096. RETURN_TRUE;
  1097. }
  1098. }
  1099. }
  1100. /* }}} */
  1101. /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
  1102. Sets the current detect_order or Return the current detect_order as a array */
  1103. PHP_FUNCTION(mb_detect_order)
  1104. {
  1105. zval **arg1;
  1106. int n, size;
  1107. enum mbfl_no_encoding *list, *entry;
  1108. char *name;
  1109. if (ZEND_NUM_ARGS() == 0) {
  1110. array_init(return_value);
  1111. entry = MBSTRG(current_detect_order_list);
  1112. n = MBSTRG(current_detect_order_list_size);
  1113. while (n > 0) {
  1114. name = (char *)mbfl_no_encoding2name(*entry);
  1115. if (name) {
  1116. add_next_index_string(return_value, name, 1);
  1117. }
  1118. entry++;
  1119. n--;
  1120. }
  1121. } else if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) {
  1122. list = NULL;
  1123. size = 0;
  1124. switch (Z_TYPE_PP(arg1)) {
  1125. case IS_ARRAY:
  1126. if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
  1127. if (list) {
  1128. efree(list);
  1129. }
  1130. RETURN_FALSE;
  1131. }
  1132. break;
  1133. default:
  1134. convert_to_string_ex(arg1);
  1135. if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
  1136. if (list) {
  1137. efree(list);
  1138. }
  1139. RETURN_FALSE;
  1140. }
  1141. break;
  1142. }
  1143. if (list == NULL) {
  1144. RETVAL_FALSE;
  1145. } else {
  1146. if (MBSTRG(current_detect_order_list)) {
  1147. efree(MBSTRG(current_detect_order_list));
  1148. }
  1149. MBSTRG(current_detect_order_list) = list;
  1150. MBSTRG(current_detect_order_list_size) = size;
  1151. RETVAL_TRUE;
  1152. }
  1153. } else {
  1154. WRONG_PARAM_COUNT;
  1155. }
  1156. }
  1157. /* }}} */
  1158. /* {{{ proto mixed mb_substitute_character([mixed substchar])
  1159. Sets the current substitute_character or returns the current substitute_character */
  1160. PHP_FUNCTION(mb_substitute_character)
  1161. {
  1162. zval **arg1;
  1163. if (ZEND_NUM_ARGS() == 0) {
  1164. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  1165. RETVAL_STRING("none", 1);
  1166. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  1167. RETVAL_STRING("long", 1);
  1168. } else {
  1169. RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
  1170. }
  1171. } else if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) {
  1172. RETVAL_TRUE;
  1173. switch (Z_TYPE_PP(arg1)) {
  1174. case IS_STRING:
  1175. if (strcasecmp("none", Z_STRVAL_PP(arg1)) == 0) {
  1176. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1177. } else if (strcasecmp("long", Z_STRVAL_PP(arg1)) == 0) {
  1178. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1179. } else {
  1180. convert_to_long_ex(arg1);
  1181. if (Z_LVAL_PP(arg1)< 0xffff && Z_LVAL_PP(arg1)> 0x0) {
  1182. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1183. MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
  1184. } else {
  1185. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
  1186. RETVAL_FALSE;
  1187. }
  1188. }
  1189. break;
  1190. default:
  1191. convert_to_long_ex(arg1);
  1192. if (Z_LVAL_PP(arg1)< 0xffff && Z_LVAL_PP(arg1)> 0x0) {
  1193. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1194. MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
  1195. } else {
  1196. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
  1197. RETVAL_FALSE;
  1198. }
  1199. break;
  1200. }
  1201. } else {
  1202. WRONG_PARAM_COUNT;
  1203. }
  1204. }
  1205. /* }}} */
  1206. /* {{{ proto string mb_preferred_mime_name(string encoding)
  1207. Return the preferred MIME name (charset) as a string */
  1208. PHP_FUNCTION(mb_preferred_mime_name)
  1209. {
  1210. enum mbfl_no_encoding no_encoding;
  1211. char *name = NULL;
  1212. int name_len;
  1213. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
  1214. return;
  1215. } else {
  1216. no_encoding = mbfl_name2no_encoding(name);
  1217. if (no_encoding == mbfl_no_encoding_invalid) {
  1218. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1219. RETVAL_FALSE;
  1220. } else {
  1221. const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
  1222. if (preferred_name == NULL || *preferred_name == '\0') {
  1223. php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
  1224. RETVAL_FALSE;
  1225. } else {
  1226. RETVAL_STRING((char *)preferred_name, 1);
  1227. }
  1228. }
  1229. }
  1230. }
  1231. /* }}} */
  1232. #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
  1233. #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
  1234. /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
  1235. Parses GET/POST/COOKIE data and sets global variables */
  1236. PHP_FUNCTION(mb_parse_str)
  1237. {
  1238. zval *track_vars_array;
  1239. char *encstr = NULL;
  1240. int encstr_len;
  1241. php_mb_encoding_handler_info_t info;
  1242. enum mbfl_no_encoding detected;
  1243. track_vars_array = NULL;
  1244. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
  1245. return;
  1246. }
  1247. /* Clear out the array */
  1248. if (track_vars_array != NULL) {
  1249. zval_dtor(track_vars_array);
  1250. array_init(track_vars_array);
  1251. }
  1252. encstr = estrndup(encstr, encstr_len);
  1253. info.data_type = PARSE_STRING;
  1254. info.separator = PG(arg_separator).input;
  1255. info.force_register_globals = (track_vars_array == NULL);
  1256. info.report_errors = 1;
  1257. info.to_encoding = MBSTRG(current_internal_encoding);
  1258. info.to_language = MBSTRG(current_language);
  1259. info.from_encodings = MBSTRG(http_input_list);
  1260. info.num_from_encodings = MBSTRG(http_input_list_size);
  1261. info.from_language = MBSTRG(current_language);
  1262. detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
  1263. MBSTRG(http_input_identify) = detected;
  1264. RETVAL_BOOL(detected != mbfl_no_encoding_invalid);
  1265. if (encstr != NULL) efree(encstr);
  1266. }
  1267. /* }}} */
  1268. /* {{{ proto string mb_output_handler(string contents, int status)
  1269. Returns string in output buffer converted to the http_output encoding */
  1270. PHP_FUNCTION(mb_output_handler)
  1271. {
  1272. char *arg_string;
  1273. int arg_string_len;
  1274. long arg_status;
  1275. mbfl_string string, result;
  1276. const char *charset;
  1277. char *p;
  1278. enum mbfl_no_encoding encoding;
  1279. int last_feed, len;
  1280. unsigned char send_text_mimetype = 0;
  1281. char *s, *mimetype = NULL;
  1282. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
  1283. return;
  1284. }
  1285. encoding = MBSTRG(current_http_output_encoding);
  1286. /* start phase only */
  1287. if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
  1288. /* delete the converter just in case. */
  1289. if (MBSTRG(outconv)) {
  1290. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1291. MBSTRG(outconv) = NULL;
  1292. }
  1293. if (encoding == mbfl_no_encoding_pass) {
  1294. RETURN_STRINGL(arg_string, arg_string_len, 1);
  1295. }
  1296. /* analyze mime type */
  1297. if (SG(sapi_headers).mimetype &&
  1298. strncmp(SG(sapi_headers).mimetype, "text/", 5) == 0) {
  1299. if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
  1300. mimetype = estrdup(SG(sapi_headers).mimetype);
  1301. } else {
  1302. mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
  1303. }
  1304. send_text_mimetype = 1;
  1305. } else if (SG(sapi_headers).send_default_content_type) {
  1306. mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
  1307. }
  1308. /* if content-type is not yet set, set it and activate the converter */
  1309. if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
  1310. charset = mbfl_no2preferred_mime_name(encoding);
  1311. if (charset) {
  1312. len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
  1313. if (sapi_add_header(p, len, 0) != FAILURE) {
  1314. SG(sapi_headers).send_default_content_type = 0;
  1315. }
  1316. }
  1317. /* activate the converter */
  1318. MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
  1319. if (send_text_mimetype){
  1320. efree(mimetype);
  1321. }
  1322. }
  1323. }
  1324. /* just return if the converter is not activated. */
  1325. if (MBSTRG(outconv) == NULL) {
  1326. RETURN_STRINGL(arg_string, arg_string_len, 1);
  1327. }
  1328. /* flag */
  1329. last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
  1330. /* mode */
  1331. mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
  1332. mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
  1333. /* feed the string */
  1334. mbfl_string_init(&string);
  1335. string.no_language = MBSTRG(current_language);
  1336. string.no_encoding = MBSTRG(current_internal_encoding);
  1337. string.val = (unsigned char *)arg_string;
  1338. string.len = arg_string_len;
  1339. mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
  1340. if (last_feed) {
  1341. mbfl_buffer_converter_flush(MBSTRG(outconv));
  1342. }
  1343. /* get the converter output, and return it */
  1344. mbfl_buffer_converter_result(MBSTRG(outconv), &result);
  1345. RETVAL_STRINGL((char *)result.val, result.len, 0); /* the string is already strdup()'ed */
  1346. /* delete the converter if it is the last feed. */
  1347. if (last_feed) {
  1348. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1349. MBSTRG(outconv) = NULL;
  1350. }
  1351. }
  1352. /* }}} */
  1353. /* {{{ proto int mb_strlen(string str [, string encoding])
  1354. Get character numbers of a string */
  1355. PHP_FUNCTION(mb_strlen)
  1356. {
  1357. int n;
  1358. mbfl_string string;
  1359. char *enc_name = NULL;
  1360. int enc_name_len;
  1361. mbfl_string_init(&string);
  1362. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
  1363. RETURN_FALSE;
  1364. }
  1365. string.no_language = MBSTRG(current_language);
  1366. if (enc_name == NULL) {
  1367. string.no_encoding = MBSTRG(current_internal_encoding);
  1368. } else {
  1369. string.no_encoding = mbfl_name2no_encoding(enc_name);
  1370. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1371. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1372. RETURN_FALSE;
  1373. }
  1374. }
  1375. n = mbfl_strlen(&string);
  1376. if (n >= 0) {
  1377. RETVAL_LONG(n);
  1378. } else {
  1379. RETVAL_FALSE;
  1380. }
  1381. }
  1382. /* }}} */
  1383. /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
  1384. Find position of first occurrence of a string within another */
  1385. PHP_FUNCTION(mb_strpos)
  1386. {
  1387. int n, reverse = 0;
  1388. long offset;
  1389. mbfl_string haystack, needle;
  1390. char *enc_name = NULL;
  1391. int enc_name_len;
  1392. mbfl_string_init(&haystack);
  1393. mbfl_string_init(&needle);
  1394. haystack.no_language = MBSTRG(current_language);
  1395. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1396. needle.no_language = MBSTRG(current_language);
  1397. needle.no_encoding = MBSTRG(current_internal_encoding);
  1398. offset = 0;
  1399. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
  1400. RETURN_FALSE;
  1401. }
  1402. if (enc_name != NULL) {
  1403. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1404. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1405. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1406. RETURN_FALSE;
  1407. }
  1408. }
  1409. if (offset < 0 || (unsigned long)offset > haystack.len) {
  1410. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is out of range");
  1411. RETURN_FALSE;
  1412. }
  1413. if (needle.len == 0) {
  1414. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty needle");
  1415. RETURN_FALSE;
  1416. }
  1417. n = mbfl_strpos(&haystack, &needle, offset, reverse);
  1418. if (n >= 0) {
  1419. RETVAL_LONG(n);
  1420. } else {
  1421. switch (-n) {
  1422. case 1:
  1423. break;
  1424. case 2:
  1425. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length.");
  1426. break;
  1427. case 4:
  1428. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error.");
  1429. break;
  1430. case 8:
  1431. php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty.");
  1432. break;
  1433. default:
  1434. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos.");
  1435. break;
  1436. }
  1437. RETVAL_FALSE;
  1438. }
  1439. }
  1440. /* }}} */
  1441. /* {{{ proto int mb_strrpos(string haystack, string needle [, string encoding])
  1442. Find the last occurrence of a character in a string within another */
  1443. PHP_FUNCTION(mb_strrpos)
  1444. {
  1445. int n;
  1446. mbfl_string haystack, needle;
  1447. char *enc_name = NULL;
  1448. int enc_name_len;
  1449. mbfl_string_init(&haystack);
  1450. mbfl_string_init(&needle);
  1451. haystack.no_language = MBSTRG(current_language);
  1452. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1453. needle.no_language = MBSTRG(current_language);
  1454. needle.no_encoding = MBSTRG(current_internal_encoding);
  1455. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
  1456. RETURN_FALSE;
  1457. }
  1458. if (enc_name != NULL) {
  1459. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1460. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1461. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1462. RETURN_FALSE;
  1463. }
  1464. }
  1465. if (haystack.len <= 0) {
  1466. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty haystack");
  1467. RETURN_FALSE;
  1468. }
  1469. if (needle.len <= 0) {
  1470. php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty needle");
  1471. RETURN_FALSE;
  1472. }
  1473. n = mbfl_strpos(&haystack, &needle, 0, 1);
  1474. if (n >= 0) {
  1475. RETVAL_LONG(n);
  1476. } else {
  1477. RETVAL_FALSE;
  1478. }
  1479. }
  1480. /* }}} */
  1481. /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
  1482. Count the number of substring occurrences */
  1483. PHP_FUNCTION(mb_substr_count)
  1484. {
  1485. int n;
  1486. mbfl_string haystack, needle;
  1487. char *enc_name = NULL;
  1488. int enc_name_len;
  1489. mbfl_string_init(&haystack);
  1490. mbfl_string_init(&needle);
  1491. haystack.no_language = MBSTRG(current_language);
  1492. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1493. needle.no_language = MBSTRG(current_language);
  1494. needle.no_encoding = MBSTRG(current_internal_encoding);
  1495. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
  1496. return;
  1497. }
  1498. if (enc_name != NULL) {
  1499. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1500. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1501. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1502. RETURN_FALSE;
  1503. }
  1504. }
  1505. if (needle.len <= 0) {
  1506. php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty needle");
  1507. RETURN_FALSE;
  1508. }
  1509. n = mbfl_substr_count(&haystack, &needle);
  1510. if (n >= 0) {
  1511. RETVAL_LONG(n);
  1512. } else {
  1513. RETVAL_FALSE;
  1514. }
  1515. }
  1516. /* }}} */
  1517. /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
  1518. Returns part of a string */
  1519. PHP_FUNCTION(mb_substr)
  1520. {
  1521. zval **arg1, **arg2, **arg3, **arg4;
  1522. int argc, from, len, mblen;
  1523. mbfl_string string, result, *ret;
  1524. mbfl_string_init(&string);
  1525. string.no_language = MBSTRG(current_language);
  1526. string.no_encoding = MBSTRG(current_internal_encoding);
  1527. argc = ZEND_NUM_ARGS();
  1528. switch (argc) {
  1529. case 2:
  1530. if (zend_get_parameters_ex(2, &arg1, &arg2) == FAILURE) {
  1531. WRONG_PARAM_COUNT;
  1532. }
  1533. break;
  1534. case 3:
  1535. if (zend_get_parameters_ex(3, &arg1, &arg2, &arg3) == FAILURE) {
  1536. WRONG_PARAM_COUNT;
  1537. }
  1538. break;
  1539. case 4:
  1540. if (zend_get_parameters_ex(4, &arg1, &arg2, &arg3, &arg4) == FAILURE) {
  1541. WRONG_PARAM_COUNT;
  1542. }
  1543. convert_to_string_ex(arg4);
  1544. string.no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg4));
  1545. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1546. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg4));
  1547. RETURN_FALSE;
  1548. }
  1549. break;
  1550. default:
  1551. WRONG_PARAM_COUNT;
  1552. }
  1553. convert_to_string_ex(arg1);
  1554. string.val = (unsigned char *)Z_STRVAL_PP(arg1);
  1555. string.len = Z_STRLEN_PP(arg1);
  1556. convert_to_long_ex(arg2);
  1557. from = Z_LVAL_PP(arg2);
  1558. if (argc >= 3) {
  1559. convert_to_long_ex(arg3);
  1560. len = Z_LVAL_PP(arg3);
  1561. } else {
  1562. len = Z_STRLEN_PP(arg1);
  1563. }
  1564. /* measures length */
  1565. mblen = 0;
  1566. if (from < 0 || len < 0) {
  1567. mblen = mbfl_strlen(&string);
  1568. }
  1569. /* if "from" position is negative, count start position from the end
  1570. * of the string
  1571. */
  1572. if (from < 0) {
  1573. from = mblen + from;
  1574. if (from < 0) {
  1575. from = 0;
  1576. }
  1577. }
  1578. /* if "length" position is negative, set it to the length
  1579. * needed to stop that many chars from the end of the string
  1580. */
  1581. if (len < 0) {
  1582. len = (mblen - from) + len;
  1583. if (len < 0) {
  1584. len = 0;
  1585. }
  1586. }
  1587. ret = mbfl_substr(&string, &result, from, len);
  1588. if (ret != NULL) {
  1589. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  1590. } else {
  1591. RETVAL_FALSE;
  1592. }
  1593. }
  1594. /* }}} */
  1595. /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
  1596. Returns part of a string */
  1597. PHP_FUNCTION(mb_strcut)
  1598. {
  1599. pval **arg1, **arg2, **arg3, **arg4;
  1600. int argc, from, len;
  1601. mbfl_string string, result, *ret;
  1602. mbfl_string_init(&string);
  1603. string.no_language = MBSTRG(current_language);
  1604. string.no_encoding = MBSTRG(current_internal_encoding);
  1605. argc = ZEND_NUM_ARGS();
  1606. switch (argc) {
  1607. case 2:
  1608. if (zend_get_parameters_ex(2, &arg1, &arg2) == FAILURE) {
  1609. WRONG_PARAM_COUNT;
  1610. }
  1611. break;
  1612. case 3:
  1613. if (zend_get_parameters_ex(3, &arg1, &arg2, &arg3) == FAILURE) {
  1614. WRONG_PARAM_COUNT;
  1615. }
  1616. break;
  1617. case 4:
  1618. if (zend_get_parameters_ex(4, &arg1, &arg2, &arg3, &arg4) == FAILURE) {
  1619. WRONG_PARAM_COUNT;
  1620. }
  1621. convert_to_string_ex(arg4);
  1622. string.no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg4));
  1623. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1624. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg4));
  1625. RETURN_FALSE;
  1626. }
  1627. break;
  1628. default:
  1629. WRONG_PARAM_COUNT;
  1630. }
  1631. convert_to_string_ex(arg1);
  1632. string.val = Z_STRVAL_PP(arg1);
  1633. string.len = Z_STRLEN_PP(arg1);
  1634. convert_to_long_ex(arg2);
  1635. from = Z_LVAL_PP(arg2);
  1636. if (argc >= 3) {
  1637. convert_to_long_ex(arg3);
  1638. len = Z_LVAL_PP(arg3);
  1639. } else {
  1640. len = Z_STRLEN_PP(arg1);
  1641. }
  1642. /* if "from" position is negative, count start position from the end
  1643. * of the string
  1644. */
  1645. if (from < 0) {
  1646. from = Z_STRLEN_PP(arg1) + from;
  1647. if (from < 0) {
  1648. from = 0;
  1649. }
  1650. }
  1651. /* if "length" position is negative, set it to the length
  1652. * needed to stop that many chars from the end of the string
  1653. */
  1654. if (len < 0) {
  1655. len = (Z_STRLEN_PP(arg1) - from) + len;
  1656. if (len < 0) {
  1657. len = 0;
  1658. }
  1659. }
  1660. ret = mbfl_strcut(&string, &result, from, len);
  1661. if (ret != NULL) {
  1662. RETVAL_STRINGL(ret->val, ret->len, 0); /* the string is already strdup()'ed */
  1663. } else {
  1664. RETVAL_FALSE;
  1665. }
  1666. }
  1667. /* }}} */
  1668. /* {{{ proto int mb_strwidth(string str [, string encoding])
  1669. Gets terminal width of a string */
  1670. PHP_FUNCTION(mb_strwidth)
  1671. {
  1672. int n;
  1673. mbfl_string string;
  1674. char *enc_name = NULL;
  1675. int enc_name_len;
  1676. mbfl_string_init(&string);
  1677. string.no_language = MBSTRG(current_language);
  1678. string.no_encoding = MBSTRG(current_internal_encoding);
  1679. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
  1680. return;
  1681. }
  1682. if (enc_name != NULL) {
  1683. string.no_encoding = mbfl_name2no_encoding(enc_name);
  1684. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1685. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1686. RETURN_FALSE;
  1687. }
  1688. }
  1689. n = mbfl_strwidth(&string);
  1690. if (n >= 0) {
  1691. RETVAL_LONG(n);
  1692. } else {
  1693. RETVAL_FALSE;
  1694. }
  1695. }
  1696. /* }}} */
  1697. /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
  1698. Trim the string in terminal width */
  1699. PHP_FUNCTION(mb_strimwidth)
  1700. {
  1701. pval **arg1, **arg2, **arg3, **arg4, **arg5;
  1702. int from, width;
  1703. mbfl_string string, result, marker, *ret;
  1704. mbfl_string_init(&string);
  1705. mbfl_string_init(&marker);
  1706. string.no_language = MBSTRG(current_language);
  1707. string.no_encoding = MBSTRG(current_internal_encoding);
  1708. marker.no_language = MBSTRG(current_language);
  1709. marker.no_encoding = MBSTRG(current_internal_encoding);
  1710. marker.val = NULL;
  1711. marker.len = 0;
  1712. switch (ZEND_NUM_ARGS()) {
  1713. case 3:
  1714. if (zend_get_parameters_ex(3, &arg1, &arg2, &arg3) == FAILURE) {
  1715. WRONG_PARAM_COUNT;
  1716. }
  1717. break;
  1718. case 4:
  1719. if (zend_get_parameters_ex(4, &arg1, &arg2, &arg3, &arg4) == FAILURE) {
  1720. WRONG_PARAM_COUNT;
  1721. }
  1722. break;
  1723. case 5:
  1724. if (zend_get_parameters_ex(5, &arg1, &arg2, &arg3, &arg4, &arg5) == FAILURE) {
  1725. WRONG_PARAM_COUNT;
  1726. }
  1727. convert_to_string_ex(arg5);
  1728. string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg5));
  1729. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1730. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg5));
  1731. RETURN_FALSE;
  1732. }
  1733. break;
  1734. default:
  1735. WRONG_PARAM_COUNT;
  1736. }
  1737. convert_to_string_ex(arg1);
  1738. string.val = (unsigned char *)Z_STRVAL_PP(arg1);
  1739. string.len = Z_STRLEN_PP(arg1);
  1740. convert_to_long_ex(arg2);
  1741. from = Z_LVAL_PP(arg2);
  1742. if (from < 0 || from > Z_STRLEN_PP(arg1)) {
  1743. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of reange");
  1744. RETURN_FALSE;
  1745. }
  1746. convert_to_long_ex(arg3);
  1747. width = Z_LVAL_PP(arg3);
  1748. if (width < 0) {
  1749. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
  1750. RETURN_FALSE;
  1751. }
  1752. if (ZEND_NUM_ARGS() >= 4) {
  1753. convert_to_string_ex(arg4);
  1754. marker.val = (unsigned char *)Z_STRVAL_PP(arg4);
  1755. marker.len = Z_STRLEN_PP(arg4);
  1756. }
  1757. ret = mbfl_strimwidth(&string, &marker, &result, from, width);
  1758. if (ret != NULL) {
  1759. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  1760. } else {
  1761. RETVAL_FALSE;
  1762. }
  1763. }
  1764. /* }}} */
  1765. /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
  1766. MBSTRING_API char * php_mb_convert_encoding(char *input, size_t length, char *_to_encoding, char *_from_encodings, size_t *output_len TSRMLS_DC)
  1767. {
  1768. mbfl_string string, result, *ret;
  1769. enum mbfl_no_encoding from_encoding, to_encoding;
  1770. mbfl_buffer_converter *convd;
  1771. int size, *list;
  1772. char *output=NULL;
  1773. if (output_len) {
  1774. *output_len = 0;
  1775. }
  1776. if (!input) {
  1777. return NULL;
  1778. }
  1779. /* new encoding */
  1780. if (_to_encoding && strlen(_to_encoding)) {
  1781. to_encoding = mbfl_name2no_encoding(_to_encoding);
  1782. if (to_encoding == mbfl_no_encoding_invalid) {
  1783. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
  1784. return NULL;
  1785. }
  1786. } else {
  1787. to_encoding = MBSTRG(current_internal_encoding);
  1788. }
  1789. /* initialize string */
  1790. mbfl_string_init(&string);
  1791. mbfl_string_init(&result);
  1792. from_encoding = MBSTRG(current_internal_encoding);
  1793. string.no_encoding = from_encoding;
  1794. string.no_language = MBSTRG(current_language);
  1795. string.val = (unsigned char *)input;
  1796. string.len = length;
  1797. /* pre-conversion encoding */
  1798. if (_from_encodings) {
  1799. list = NULL;
  1800. size = 0;
  1801. php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
  1802. if (size == 1) {
  1803. from_encoding = *list;
  1804. string.no_encoding = from_encoding;
  1805. } else if (size > 1) {
  1806. /* auto detect */
  1807. from_encoding = mbfl_identify_encoding_no(&string, list, size);
  1808. if (from_encoding != mbfl_no_encoding_invalid) {
  1809. string.no_encoding = from_encoding;
  1810. } else {
  1811. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
  1812. from_encoding = mbfl_no_encoding_pass;
  1813. to_encoding = from_encoding;
  1814. string.no_encoding = from_encoding;
  1815. }
  1816. } else {
  1817. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
  1818. }
  1819. if (list != NULL) {
  1820. efree((void *)list);
  1821. }
  1822. }
  1823. /* initialize converter */
  1824. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  1825. if (convd == NULL) {
  1826. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
  1827. return NULL;
  1828. }
  1829. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  1830. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  1831. /* do it */
  1832. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  1833. if (ret) {
  1834. if (output_len) {
  1835. *output_len = ret->len;
  1836. }
  1837. output = (char *)ret->val;
  1838. }
  1839. mbfl_buffer_converter_delete(convd);
  1840. return output;
  1841. }
  1842. /* }}} */
  1843. /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
  1844. Returns converted string in desired encoding */
  1845. PHP_FUNCTION(mb_convert_encoding)
  1846. {
  1847. pval **arg_str, **arg_new, **arg_old;
  1848. int i;
  1849. size_t size, l, n;
  1850. char *_from_encodings, *ret, *s_free = NULL;
  1851. zval **hash_entry;
  1852. HashTable *target_hash;
  1853. _from_encodings = NULL;
  1854. if (ZEND_NUM_ARGS() == 2) {
  1855. if (zend_get_parameters_ex(2, &arg_str, &arg_new) == FAILURE) {
  1856. WRONG_PARAM_COUNT;
  1857. }
  1858. } else if (ZEND_NUM_ARGS() == 3) {
  1859. if (zend_get_parameters_ex(3, &arg_str, &arg_new, &arg_old) == FAILURE) {
  1860. WRONG_PARAM_COUNT;
  1861. }
  1862. switch (Z_TYPE_PP(arg_old)) {
  1863. case IS_ARRAY:
  1864. target_hash = Z_ARRVAL_PP(arg_old);
  1865. zend_hash_internal_pointer_reset(target_hash);
  1866. i = zend_hash_num_elements(target_hash);
  1867. _from_encodings = NULL;
  1868. while (i > 0) {
  1869. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  1870. break;
  1871. }
  1872. convert_to_string_ex(hash_entry);
  1873. if ( _from_encodings) {
  1874. l = strlen(_from_encodings);
  1875. n = strlen(Z_STRVAL_PP(hash_entry));
  1876. _from_encodings = erealloc(_from_encodings, l+n+2);
  1877. strcpy(_from_encodings+l,",");
  1878. strcpy(_from_encodings+l+1,Z_STRVAL_PP(hash_entry));
  1879. } else {
  1880. _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
  1881. }
  1882. zend_hash_move_forward(target_hash);
  1883. i--;
  1884. }
  1885. if (_from_encodings != NULL && !strlen(_from_encodings)) {
  1886. efree(_from_encodings);
  1887. _from_encodings = NULL;
  1888. }
  1889. s_free = _from_encodings;
  1890. break;
  1891. default:
  1892. convert_to_string_ex(arg_old);
  1893. _from_encodings = Z_STRVAL_PP(arg_old);
  1894. break;
  1895. }
  1896. } else {
  1897. WRONG_PARAM_COUNT;
  1898. }
  1899. /* new encoding */
  1900. convert_to_string_ex(arg_str);
  1901. convert_to_string_ex(arg_new);
  1902. ret = php_mb_convert_encoding( Z_STRVAL_PP(arg_str), Z_STRLEN_PP(arg_str), Z_STRVAL_PP(arg_new), _from_encodings, &size TSRMLS_CC);
  1903. if (ret != NULL) {
  1904. RETVAL_STRINGL(ret, size, 0); /* the string is already strdup()'ed */
  1905. } else {
  1906. RETVAL_FALSE;
  1907. }
  1908. if ( s_free) {
  1909. efree(s_free);
  1910. }
  1911. }
  1912. /* }}} */
  1913. /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
  1914. Returns a case-folded version of sourcestring */
  1915. PHP_FUNCTION(mb_convert_case)
  1916. {
  1917. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1918. int str_len, from_encoding_len;
  1919. long case_mode = 0;
  1920. char *newstr;
  1921. size_t ret_len;
  1922. RETVAL_FALSE;
  1923. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
  1924. &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
  1925. RETURN_FALSE;
  1926. newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  1927. if (newstr) {
  1928. RETVAL_STRINGL(newstr, ret_len, 0);
  1929. }
  1930. }
  1931. /* }}} */
  1932. /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
  1933. * Returns a uppercased version of sourcestring
  1934. */
  1935. PHP_FUNCTION(mb_strtoupper)
  1936. {
  1937. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1938. int str_len, from_encoding_len;
  1939. char *newstr;
  1940. size_t ret_len;
  1941. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
  1942. &from_encoding, &from_encoding_len) == FAILURE) {
  1943. return;
  1944. }
  1945. newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  1946. if (newstr) {
  1947. RETURN_STRINGL(newstr, ret_len, 0);
  1948. }
  1949. RETURN_FALSE;
  1950. }
  1951. /* }}} */
  1952. /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
  1953. * Returns a lowercased version of sourcestring
  1954. */
  1955. PHP_FUNCTION(mb_strtolower)
  1956. {
  1957. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1958. int str_len, from_encoding_len;
  1959. char *newstr;
  1960. size_t ret_len;
  1961. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
  1962. &from_encoding, &from_encoding_len) == FAILURE) {
  1963. return;
  1964. }
  1965. newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  1966. if (newstr) {
  1967. RETURN_STRINGL(newstr, ret_len, 0);
  1968. }
  1969. RETURN_FALSE;
  1970. }
  1971. /* }}} */
  1972. /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
  1973. Encodings of the given string is returned (as a string) */
  1974. PHP_FUNCTION(mb_detect_encoding)
  1975. {
  1976. pval **arg_str, **arg_list, **arg_strict;
  1977. mbfl_string string;
  1978. const char *ret;
  1979. enum mbfl_no_encoding *elist;
  1980. int size, *list, strict = 0;
  1981. if (ZEND_NUM_ARGS() == 1) {
  1982. if (zend_get_parameters_ex(1, &arg_str) == FAILURE) {
  1983. WRONG_PARAM_COUNT;
  1984. }
  1985. } else if (ZEND_NUM_ARGS() == 2) {
  1986. if (zend_get_parameters_ex(2, &arg_str, &arg_list) == FAILURE) {
  1987. WRONG_PARAM_COUNT;
  1988. }
  1989. } else if (ZEND_NUM_ARGS() == 3) {
  1990. if (zend_get_parameters_ex(3, &arg_str, &arg_list, &arg_strict) == FAILURE) {
  1991. WRONG_PARAM_COUNT;
  1992. }
  1993. } else {
  1994. WRONG_PARAM_COUNT;
  1995. }
  1996. /* make encoding list */
  1997. list = NULL;
  1998. size = 0;
  1999. if (ZEND_NUM_ARGS() >= 2 && Z_STRVAL_PP(arg_list)) {
  2000. switch (Z_TYPE_PP(arg_list)) {
  2001. case IS_ARRAY:
  2002. if (!php_mb_parse_encoding_array(*arg_list, &list, &size, 0 TSRMLS_CC)) {
  2003. if (list) {
  2004. efree(list);
  2005. size = 0;
  2006. }
  2007. }
  2008. break;
  2009. default:
  2010. convert_to_string_ex(arg_list);
  2011. if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg_list), Z_STRLEN_PP(arg_list), &list, &size, 0 TSRMLS_CC)) {
  2012. if (list) {
  2013. efree(list);
  2014. size = 0;
  2015. }
  2016. }
  2017. break;
  2018. }
  2019. if (size <= 0) {
  2020. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
  2021. }
  2022. }
  2023. if (ZEND_NUM_ARGS() == 3) {
  2024. convert_to_long_ex(arg_strict);
  2025. strict = Z_LVAL_PP(arg_strict);
  2026. }
  2027. if (size > 0 && list != NULL) {
  2028. elist = list;
  2029. } else {
  2030. elist = MBSTRG(current_detect_order_list);
  2031. size = MBSTRG(current_detect_order_list_size);
  2032. }
  2033. convert_to_string_ex(arg_str);
  2034. mbfl_string_init(&string);
  2035. string.no_language = MBSTRG(current_language);
  2036. string.val = (unsigned char *)Z_STRVAL_PP(arg_str);
  2037. string.len = Z_STRLEN_PP(arg_str);
  2038. ret = mbfl_identify_encoding_name(&string, elist, size, strict);
  2039. if (list != NULL) {
  2040. efree((void *)list);
  2041. }
  2042. if (ret != NULL) {
  2043. RETVAL_STRING((char *)ret, 1);
  2044. } else {
  2045. RETVAL_FALSE;
  2046. }
  2047. }
  2048. /* }}} */
  2049. /* {{{ proto array mb_list_encodings()
  2050. Returns an array of all supported encodings */
  2051. PHP_FUNCTION(mb_list_encodings)
  2052. {
  2053. const mbfl_encoding **encodings;
  2054. const mbfl_encoding *encoding;
  2055. int i;
  2056. array_init(return_value);
  2057. i = 0;
  2058. encodings = mbfl_get_supported_encodings();
  2059. while ((encoding = encodings[i++]) != NULL) {
  2060. add_next_index_string(return_value, (char *) encoding->name, 1);
  2061. }
  2062. }
  2063. /* }}} */
  2064. /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed]]])
  2065. Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
  2066. PHP_FUNCTION(mb_encode_mimeheader)
  2067. {
  2068. enum mbfl_no_encoding charset, transenc;
  2069. mbfl_string string, result, *ret;
  2070. char *charset_name = NULL;
  2071. int charset_name_len;
  2072. char *trans_enc_name = NULL;
  2073. int trans_enc_name_len;
  2074. char *linefeed = "\r\n";
  2075. int linefeed_len;
  2076. mbfl_string_init(&string);
  2077. string.no_language = MBSTRG(current_language);
  2078. string.no_encoding = MBSTRG(current_internal_encoding);
  2079. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sss", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len) == FAILURE) {
  2080. return;
  2081. }
  2082. charset = mbfl_no_encoding_pass;
  2083. transenc = mbfl_no_encoding_base64;
  2084. if (charset_name != NULL) {
  2085. charset = mbfl_name2no_encoding(charset_name);
  2086. if (charset == mbfl_no_encoding_invalid) {
  2087. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
  2088. RETURN_FALSE;
  2089. }
  2090. } else {
  2091. const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language));
  2092. if (lang != NULL) {
  2093. charset = lang->mail_charset;
  2094. transenc = lang->mail_header_encoding;
  2095. }
  2096. }
  2097. if (trans_enc_name != NULL) {
  2098. if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
  2099. transenc = mbfl_no_encoding_base64;
  2100. } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
  2101. transenc = mbfl_no_encoding_qprint;
  2102. }
  2103. }
  2104. mbfl_string_init(&result);
  2105. ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, 0);
  2106. if (ret != NULL) {
  2107. RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
  2108. } else {
  2109. RETVAL_FALSE;
  2110. }
  2111. }
  2112. /* }}} */
  2113. /* {{{ proto string mb_decode_mimeheader(string string)
  2114. Decodes the MIME "encoded-word" in the string */
  2115. PHP_FUNCTION(mb_decode_mimeheader)
  2116. {
  2117. mbfl_string string, result, *ret;
  2118. mbfl_string_init(&string);
  2119. string.no_language = MBSTRG(current_language);
  2120. string.no_encoding = MBSTRG(current_internal_encoding);
  2121. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
  2122. return;
  2123. }
  2124. mbfl_string_init(&result);
  2125. ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
  2126. if (ret != NULL) {
  2127. RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
  2128. } else {
  2129. RETVAL_FALSE;
  2130. }
  2131. }
  2132. /* }}} */
  2133. /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
  2134. Conversion between full-width character and half-width character (Japanese) */
  2135. PHP_FUNCTION(mb_convert_kana)
  2136. {
  2137. int opt, i;
  2138. mbfl_string string, result, *ret;
  2139. char *optstr = NULL;
  2140. int optstr_len;
  2141. char *encname = NULL;
  2142. int encname_len;
  2143. mbfl_string_init(&string);
  2144. string.no_language = MBSTRG(current_language);
  2145. string.no_encoding = MBSTRG(current_internal_encoding);
  2146. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
  2147. return;
  2148. }
  2149. /* option */
  2150. if (optstr != NULL) {
  2151. char *p = optstr;
  2152. int n = optstr_len;
  2153. i = 0;
  2154. opt = 0;
  2155. while (i < n) {
  2156. i++;
  2157. switch (*p++) {
  2158. case 'A':
  2159. opt |= 0x1;
  2160. break;
  2161. case 'a':
  2162. opt |= 0x10;
  2163. break;
  2164. case 'R':
  2165. opt |= 0x2;
  2166. break;
  2167. case 'r':
  2168. opt |= 0x20;
  2169. break;
  2170. case 'N':
  2171. opt |= 0x4;
  2172. break;
  2173. case 'n':
  2174. opt |= 0x40;
  2175. break;
  2176. case 'S':
  2177. opt |= 0x8;
  2178. break;
  2179. case 's':
  2180. opt |= 0x80;
  2181. break;
  2182. case 'K':
  2183. opt |= 0x100;
  2184. break;
  2185. case 'k':
  2186. opt |= 0x1000;
  2187. break;
  2188. case 'H':
  2189. opt |= 0x200;
  2190. break;
  2191. case 'h':
  2192. opt |= 0x2000;
  2193. break;
  2194. case 'V':
  2195. opt |= 0x800;
  2196. break;
  2197. case 'C':
  2198. opt |= 0x10000;
  2199. break;
  2200. case 'c':
  2201. opt |= 0x20000;
  2202. break;
  2203. case 'M':
  2204. opt |= 0x100000;
  2205. break;
  2206. case 'm':
  2207. opt |= 0x200000;
  2208. break;
  2209. }
  2210. }
  2211. } else {
  2212. opt = 0x900;
  2213. }
  2214. /* encoding */
  2215. if (encname != NULL) {
  2216. string.no_encoding = mbfl_name2no_encoding(encname);
  2217. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2218. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
  2219. RETURN_FALSE;
  2220. }
  2221. }
  2222. ret = mbfl_ja_jp_hantozen(&string, &result, opt);
  2223. if (ret != NULL) {
  2224. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2225. } else {
  2226. RETVAL_FALSE;
  2227. }
  2228. }
  2229. /* }}} */
  2230. #define PHP_MBSTR_STACK_BLOCK_SIZE 32
  2231. /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding [, mixed ...])
  2232. Converts the string resource in variables to desired encoding */
  2233. PHP_FUNCTION(mb_convert_variables)
  2234. {
  2235. pval ***args, ***stack, **var, **hash_entry;
  2236. HashTable *target_hash;
  2237. mbfl_string string, result, *ret;
  2238. enum mbfl_no_encoding from_encoding, to_encoding;
  2239. mbfl_encoding_detector *identd;
  2240. mbfl_buffer_converter *convd;
  2241. int n, argc, stack_level, stack_max, elistsz;
  2242. enum mbfl_no_encoding *elist;
  2243. char *name;
  2244. void *ptmp;
  2245. argc = ZEND_NUM_ARGS();
  2246. if (argc < 3) {
  2247. WRONG_PARAM_COUNT;
  2248. }
  2249. args = (pval ***)ecalloc(argc, sizeof(pval **));
  2250. if (zend_get_parameters_array_ex(argc, args) == FAILURE) {
  2251. efree((void *)args);
  2252. WRONG_PARAM_COUNT;
  2253. }
  2254. /* new encoding */
  2255. convert_to_string_ex(args[0]);
  2256. to_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(args[0]));
  2257. if (to_encoding == mbfl_no_encoding_invalid) {
  2258. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(args[0]));
  2259. efree((void *)args);
  2260. RETURN_FALSE;
  2261. }
  2262. /* initialize string */
  2263. mbfl_string_init(&string);
  2264. mbfl_string_init(&result);
  2265. from_encoding = MBSTRG(current_internal_encoding);
  2266. string.no_encoding = from_encoding;
  2267. string.no_language = MBSTRG(current_language);
  2268. /* pre-conversion encoding */
  2269. elist = NULL;
  2270. elistsz = 0;
  2271. switch (Z_TYPE_PP(args[1])) {
  2272. case IS_ARRAY:
  2273. php_mb_parse_encoding_array(*args[1], &elist, &elistsz, 0 TSRMLS_CC);
  2274. break;
  2275. default:
  2276. convert_to_string_ex(args[1]);
  2277. php_mb_parse_encoding_list(Z_STRVAL_PP(args[1]), Z_STRLEN_PP(args[1]), &elist, &elistsz, 0 TSRMLS_CC);
  2278. break;
  2279. }
  2280. if (elistsz <= 0) {
  2281. from_encoding = mbfl_no_encoding_pass;
  2282. } else if (elistsz == 1) {
  2283. from_encoding = *elist;
  2284. } else {
  2285. /* auto detect */
  2286. from_encoding = mbfl_no_encoding_invalid;
  2287. stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
  2288. stack = (pval ***)safe_emalloc(stack_max, sizeof(pval **), 0);
  2289. stack_level = 0;
  2290. identd = mbfl_encoding_detector_new(elist, elistsz);
  2291. if (identd != NULL) {
  2292. n = 2;
  2293. while (n < argc || stack_level > 0) {
  2294. if (stack_level <= 0) {
  2295. var = args[n++];
  2296. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2297. target_hash = HASH_OF(*var);
  2298. if (target_hash != NULL) {
  2299. zend_hash_internal_pointer_reset(target_hash);
  2300. }
  2301. }
  2302. } else {
  2303. stack_level--;
  2304. var = stack[stack_level];
  2305. }
  2306. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2307. target_hash = HASH_OF(*var);
  2308. if (target_hash != NULL) {
  2309. while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
  2310. zend_hash_move_forward(target_hash);
  2311. if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
  2312. if (stack_level >= stack_max) {
  2313. stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
  2314. ptmp = erealloc(stack, sizeof(pval **)*stack_max);
  2315. stack = (pval ***)ptmp;
  2316. }
  2317. stack[stack_level] = var;
  2318. stack_level++;
  2319. var = hash_entry;
  2320. target_hash = HASH_OF(*var);
  2321. if (target_hash != NULL) {
  2322. zend_hash_internal_pointer_reset(target_hash);
  2323. continue;
  2324. }
  2325. } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
  2326. string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
  2327. string.len = Z_STRLEN_PP(hash_entry);
  2328. if (mbfl_encoding_detector_feed(identd, &string)) {
  2329. goto detect_end; /* complete detecting */
  2330. }
  2331. }
  2332. }
  2333. }
  2334. } else if (Z_TYPE_PP(var) == IS_STRING) {
  2335. string.val = (unsigned char *)Z_STRVAL_PP(var);
  2336. string.len = Z_STRLEN_PP(var);
  2337. if (mbfl_encoding_detector_feed(identd, &string)) {
  2338. goto detect_end; /* complete detecting */
  2339. }
  2340. }
  2341. }
  2342. detect_end:
  2343. from_encoding = mbfl_encoding_detector_judge(identd);
  2344. mbfl_encoding_detector_delete(identd);
  2345. }
  2346. efree(stack);
  2347. if (from_encoding == mbfl_no_encoding_invalid) {
  2348. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
  2349. from_encoding = mbfl_no_encoding_pass;
  2350. }
  2351. }
  2352. if (elist != NULL) {
  2353. efree((void *)elist);
  2354. }
  2355. /* create converter */
  2356. convd = NULL;
  2357. if (from_encoding != mbfl_no_encoding_pass) {
  2358. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
  2359. if (convd == NULL) {
  2360. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
  2361. RETURN_FALSE;
  2362. }
  2363. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  2364. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  2365. }
  2366. /* convert */
  2367. if (convd != NULL) {
  2368. stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
  2369. stack = (pval ***)safe_emalloc(stack_max, sizeof(pval **), 0);
  2370. stack_level = 0;
  2371. n = 2;
  2372. while (n < argc || stack_level > 0) {
  2373. if (stack_level <= 0) {
  2374. var = args[n++];
  2375. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2376. target_hash = HASH_OF(*var);
  2377. if (target_hash != NULL) {
  2378. zend_hash_internal_pointer_reset(target_hash);
  2379. }
  2380. }
  2381. } else {
  2382. stack_level--;
  2383. var = stack[stack_level];
  2384. }
  2385. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2386. target_hash = HASH_OF(*var);
  2387. if (target_hash != NULL) {
  2388. while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
  2389. zend_hash_move_forward(target_hash);
  2390. if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
  2391. if (stack_level >= stack_max) {
  2392. stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
  2393. ptmp = erealloc(stack, sizeof(pval **)*stack_max);
  2394. stack = (pval ***)ptmp;
  2395. }
  2396. stack[stack_level] = var;
  2397. stack_level++;
  2398. var = hash_entry;
  2399. SEPARATE_ZVAL(hash_entry);
  2400. target_hash = HASH_OF(*var);
  2401. if (target_hash != NULL) {
  2402. zend_hash_internal_pointer_reset(target_hash);
  2403. continue;
  2404. }
  2405. } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
  2406. string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
  2407. string.len = Z_STRLEN_PP(hash_entry);
  2408. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  2409. if (ret != NULL) {
  2410. if ((*hash_entry)->refcount > 1) {
  2411. ZVAL_DELREF(*hash_entry);
  2412. MAKE_STD_ZVAL(*hash_entry);
  2413. } else {
  2414. zval_dtor(*hash_entry);
  2415. }
  2416. ZVAL_STRINGL(*hash_entry, ret->val, ret->len, 0);
  2417. }
  2418. }
  2419. }
  2420. }
  2421. } else if (Z_TYPE_PP(var) == IS_STRING) {
  2422. string.val = (unsigned char *)Z_STRVAL_PP(var);
  2423. string.len = Z_STRLEN_PP(var);
  2424. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  2425. if (ret != NULL) {
  2426. zval_dtor(*var);
  2427. ZVAL_STRINGL(*var, ret->val, ret->len, 0);
  2428. }
  2429. }
  2430. }
  2431. efree(stack);
  2432. mbfl_buffer_converter_delete(convd);
  2433. }
  2434. efree((void *)args);
  2435. name = (char *)mbfl_no_encoding2name(from_encoding);
  2436. if (name != NULL) {
  2437. RETURN_STRING(name, 1);
  2438. } else {
  2439. RETURN_FALSE;
  2440. }
  2441. }
  2442. /* }}} */
  2443. /* {{{ HTML numeric entity */
  2444. /* {{{ static void php_mb_numericentity_exec() */
  2445. static void
  2446. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
  2447. {
  2448. pval **arg1, **arg2, **arg3, **hash_entry;
  2449. HashTable *target_hash;
  2450. int argc, i, *convmap, *mapelm, mapsize=0;
  2451. mbfl_string string, result, *ret;
  2452. enum mbfl_no_encoding no_encoding;
  2453. argc = ZEND_NUM_ARGS();
  2454. if ((argc == 2 && zend_get_parameters_ex(2, &arg1, &arg2) == FAILURE) ||
  2455. (argc == 3 && zend_get_parameters_ex(3, &arg1, &arg2, &arg3) == FAILURE) ||
  2456. argc < 2 || argc > 3) {
  2457. WRONG_PARAM_COUNT;
  2458. }
  2459. convert_to_string_ex(arg1);
  2460. mbfl_string_init(&string);
  2461. string.no_language = MBSTRG(current_language);
  2462. string.no_encoding = MBSTRG(current_internal_encoding);
  2463. string.val = (unsigned char *)Z_STRVAL_PP(arg1);
  2464. string.len = Z_STRLEN_PP(arg1);
  2465. /* encoding */
  2466. if (argc == 3) {
  2467. convert_to_string_ex(arg3);
  2468. no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg3));
  2469. if (no_encoding == mbfl_no_encoding_invalid) {
  2470. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg3));
  2471. RETURN_FALSE;
  2472. } else {
  2473. string.no_encoding = no_encoding;
  2474. }
  2475. }
  2476. /* conversion map */
  2477. convmap = NULL;
  2478. if (Z_TYPE_PP(arg2) == IS_ARRAY){
  2479. target_hash = Z_ARRVAL_PP(arg2);
  2480. zend_hash_internal_pointer_reset(target_hash);
  2481. i = zend_hash_num_elements(target_hash);
  2482. if (i > 0) {
  2483. convmap = (int *)safe_emalloc(i, sizeof(int), 0);
  2484. mapelm = convmap;
  2485. mapsize = 0;
  2486. while (i > 0) {
  2487. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  2488. break;
  2489. }
  2490. convert_to_long_ex(hash_entry);
  2491. *mapelm++ = Z_LVAL_PP(hash_entry);
  2492. mapsize++;
  2493. i--;
  2494. zend_hash_move_forward(target_hash);
  2495. }
  2496. }
  2497. }
  2498. if (convmap == NULL) {
  2499. RETURN_FALSE;
  2500. }
  2501. mapsize /= 4;
  2502. ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
  2503. if (ret != NULL) {
  2504. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2505. } else {
  2506. RETVAL_FALSE;
  2507. }
  2508. efree((void *)convmap);
  2509. }
  2510. /* }}} */
  2511. /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
  2512. Converts specified characters to HTML numeric entities */
  2513. PHP_FUNCTION(mb_encode_numericentity)
  2514. {
  2515. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  2516. }
  2517. /* }}} */
  2518. /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
  2519. Converts HTML numeric entities to character code */
  2520. PHP_FUNCTION(mb_decode_numericentity)
  2521. {
  2522. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  2523. }
  2524. /* }}} */
  2525. /* }}} */
  2526. /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
  2527. * Sends an email message with MIME scheme
  2528. */
  2529. #if HAVE_SENDMAIL
  2530. #define APPEND_ONE_CHAR(ch) do { \
  2531. if (token.a > 0) { \
  2532. smart_str_appendc(&token, ch); \
  2533. } else {\
  2534. token.len++; \
  2535. } \
  2536. } while (0)
  2537. #define SEPARATE_SMART_STR(str) do {\
  2538. if ((str)->a == 0) { \
  2539. char *tmp_ptr; \
  2540. (str)->a = 1; \
  2541. while ((str)->a < (str)->len) { \
  2542. (str)->a <<= 1; \
  2543. } \
  2544. tmp_ptr = emalloc((str)->a + 1); \
  2545. memcpy(tmp_ptr, (str)->c, (str)->len); \
  2546. (str)->c = tmp_ptr; \
  2547. } \
  2548. } while (0)
  2549. static void my_smart_str_dtor(smart_str *s)
  2550. {
  2551. if (s->a > 0) {
  2552. smart_str_free(s);
  2553. }
  2554. }
  2555. static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
  2556. {
  2557. const char *ps;
  2558. size_t icnt;
  2559. int state = 0;
  2560. int crlf_state = -1;
  2561. smart_str token = { 0, 0, 0 };
  2562. smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
  2563. ps = str;
  2564. icnt = str_len;
  2565. /*
  2566. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  2567. * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
  2568. * state 0 1 2 3
  2569. *
  2570. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  2571. * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
  2572. * crlf_state -1 0 1 -1
  2573. *
  2574. */
  2575. while (icnt > 0) {
  2576. switch (*ps) {
  2577. case ':':
  2578. if (crlf_state == 1) {
  2579. APPEND_ONE_CHAR('\r');
  2580. }
  2581. if (state == 0 || state == 1) {
  2582. fld_name = token;
  2583. state = 2;
  2584. } else {
  2585. APPEND_ONE_CHAR(*ps);
  2586. }
  2587. crlf_state = 0;
  2588. break;
  2589. case '\n':
  2590. if (crlf_state == -1) {
  2591. goto out;
  2592. }
  2593. crlf_state = -1;
  2594. break;
  2595. case '\r':
  2596. if (crlf_state == 1) {
  2597. APPEND_ONE_CHAR('\r');
  2598. } else {
  2599. crlf_state = 1;
  2600. }
  2601. break;
  2602. case ' ': case '\t':
  2603. if (crlf_state == -1) {
  2604. if (state == 3) {
  2605. /* continuing from the previous line */
  2606. SEPARATE_SMART_STR(&token);
  2607. state = 4;
  2608. } else {
  2609. /* simply skipping this new line */
  2610. state = 5;
  2611. }
  2612. } else {
  2613. if (crlf_state == 1) {
  2614. APPEND_ONE_CHAR('\r');
  2615. }
  2616. if (state == 1 || state == 3) {
  2617. APPEND_ONE_CHAR(*ps);
  2618. }
  2619. }
  2620. crlf_state = 0;
  2621. break;
  2622. default:
  2623. switch (state) {
  2624. case 0:
  2625. token.c = (char *)ps;
  2626. token.len = 0;
  2627. token.a = 0;
  2628. state = 1;
  2629. break;
  2630. case 2:
  2631. if (crlf_state != -1) {
  2632. token.c = (char *)ps;
  2633. token.len = 0;
  2634. token.a = 0;
  2635. state = 3;
  2636. break;
  2637. }
  2638. /* break is missing intentionally */
  2639. case 3:
  2640. if (crlf_state == -1) {
  2641. fld_val = token;
  2642. if (fld_name.c != NULL && fld_val.c != NULL) {
  2643. char *dummy;
  2644. /* FIXME: some locale free implementation is
  2645. * really required here,,, */
  2646. SEPARATE_SMART_STR(&fld_name);
  2647. php_strtoupper(fld_name.c, fld_name.len);
  2648. zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
  2649. my_smart_str_dtor(&fld_name);
  2650. }
  2651. memset(&fld_name, 0, sizeof(smart_str));
  2652. memset(&fld_val, 0, sizeof(smart_str));
  2653. token.c = (char *)ps;
  2654. token.len = 0;
  2655. token.a = 0;
  2656. state = 1;
  2657. }
  2658. break;
  2659. case 4:
  2660. APPEND_ONE_CHAR(' ');
  2661. state = 3;
  2662. break;
  2663. }
  2664. if (crlf_state == 1) {
  2665. APPEND_ONE_CHAR('\r');
  2666. }
  2667. APPEND_ONE_CHAR(*ps);
  2668. crlf_state = 0;
  2669. break;
  2670. }
  2671. ps++, icnt--;
  2672. }
  2673. out:
  2674. if (state == 2) {
  2675. token.c = "";
  2676. token.len = 0;
  2677. token.a = 0;
  2678. state = 3;
  2679. }
  2680. if (state == 3) {
  2681. fld_val = token;
  2682. if (fld_name.c != NULL && fld_val.c != NULL) {
  2683. void *dummy;
  2684. /* FIXME: some locale free implementation is
  2685. * really required here,,, */
  2686. SEPARATE_SMART_STR(&fld_name);
  2687. php_strtoupper(fld_name.c, fld_name.len);
  2688. zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
  2689. my_smart_str_dtor(&fld_name);
  2690. }
  2691. }
  2692. return state;
  2693. }
  2694. PHP_FUNCTION(mb_send_mail)
  2695. {
  2696. int n;
  2697. char *to=NULL;
  2698. int to_len;
  2699. char *message=NULL;
  2700. int message_len;
  2701. char *headers=NULL;
  2702. int headers_len;
  2703. char *subject=NULL;
  2704. int subject_len;
  2705. char *extra_cmd=NULL;
  2706. int extra_cmd_len;
  2707. char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
  2708. struct {
  2709. int cnt_type:1;
  2710. int cnt_trans_enc:1;
  2711. } suppressed_hdrs = { 0, 0 };
  2712. char *message_buf=NULL, *subject_buf=NULL, *p;
  2713. mbfl_string orig_str, conv_str;
  2714. mbfl_string *pstr; /* pointer to mbfl string for return value */
  2715. enum mbfl_no_encoding
  2716. tran_cs, /* transfar text charset */
  2717. head_enc, /* header transfar encoding */
  2718. body_enc; /* body transfar encoding */
  2719. mbfl_memory_device device; /* automatic allocateable buffer for additional header */
  2720. const mbfl_language *lang;
  2721. int err = 0;
  2722. HashTable ht_headers;
  2723. smart_str *s;
  2724. extern void mbfl_memory_device_unput(mbfl_memory_device *device);
  2725. /* initialize */
  2726. mbfl_memory_device_init(&device, 0, 0);
  2727. mbfl_string_init(&orig_str);
  2728. mbfl_string_init(&conv_str);
  2729. /* character-set, transfer-encoding */
  2730. tran_cs = mbfl_no_encoding_utf8;
  2731. head_enc = mbfl_no_encoding_base64;
  2732. body_enc = mbfl_no_encoding_base64;
  2733. lang = mbfl_no2language(MBSTRG(current_language));
  2734. if (lang != NULL) {
  2735. tran_cs = lang->mail_charset;
  2736. head_enc = lang->mail_header_encoding;
  2737. body_enc = lang->mail_body_encoding;
  2738. }
  2739. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
  2740. return;
  2741. }
  2742. zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
  2743. if (headers != NULL) {
  2744. _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
  2745. }
  2746. if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
  2747. char *tmp;
  2748. char *param_name;
  2749. char *charset = NULL;
  2750. SEPARATE_SMART_STR(s);
  2751. smart_str_0(s);
  2752. p = strchr(s->c, ';');
  2753. if (p != NULL) {
  2754. /* skipping the padded spaces */
  2755. do {
  2756. ++p;
  2757. } while (*p == ' ' || *p == '\t');
  2758. if (*p != '\0') {
  2759. if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
  2760. if (strcasecmp(param_name, "charset") == 0) {
  2761. enum mbfl_no_encoding _tran_cs = tran_cs;
  2762. charset = php_strtok_r(NULL, "= ", &tmp);
  2763. if (charset != NULL) {
  2764. _tran_cs = mbfl_name2no_encoding(charset);
  2765. }
  2766. if (_tran_cs == mbfl_no_encoding_invalid) {
  2767. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
  2768. _tran_cs = mbfl_no_encoding_ascii;
  2769. }
  2770. tran_cs = _tran_cs;
  2771. }
  2772. }
  2773. }
  2774. }
  2775. suppressed_hdrs.cnt_type = 1;
  2776. }
  2777. if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
  2778. enum mbfl_no_encoding _body_enc;
  2779. SEPARATE_SMART_STR(s);
  2780. smart_str_0(s);
  2781. _body_enc = mbfl_name2no_encoding(s->c);
  2782. switch (_body_enc) {
  2783. case mbfl_no_encoding_base64:
  2784. case mbfl_no_encoding_7bit:
  2785. case mbfl_no_encoding_8bit:
  2786. body_enc = _body_enc;
  2787. break;
  2788. default:
  2789. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
  2790. body_enc = mbfl_no_encoding_8bit;
  2791. break;
  2792. }
  2793. suppressed_hdrs.cnt_trans_enc = 1;
  2794. }
  2795. /* To: */
  2796. if (to == NULL || to_len <= 0) {
  2797. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
  2798. err = 1;
  2799. }
  2800. /* Subject: */
  2801. if (subject != NULL && subject_len >= 0) {
  2802. orig_str.no_language = MBSTRG(current_language);
  2803. orig_str.val = (unsigned char *)subject;
  2804. orig_str.len = subject_len;
  2805. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  2806. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  2807. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  2808. orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
  2809. }
  2810. pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
  2811. if (pstr != NULL) {
  2812. subject_buf = subject = (char *)pstr->val;
  2813. }
  2814. } else {
  2815. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
  2816. err = 1;
  2817. }
  2818. /* message body */
  2819. if (message != NULL) {
  2820. orig_str.no_language = MBSTRG(current_language);
  2821. orig_str.val = message;
  2822. orig_str.len = message_len;
  2823. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  2824. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  2825. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  2826. orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
  2827. }
  2828. pstr = NULL;
  2829. {
  2830. mbfl_string tmpstr;
  2831. if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
  2832. tmpstr.no_encoding=mbfl_no_encoding_8bit;
  2833. pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
  2834. efree(tmpstr.val);
  2835. }
  2836. }
  2837. if (pstr != NULL) {
  2838. message_buf = message = (char *)pstr->val;
  2839. }
  2840. } else {
  2841. /* this is not really an error, so it is allowed. */
  2842. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
  2843. message = NULL;
  2844. }
  2845. /* other headers */
  2846. #define PHP_MBSTR_MAIL_MIME_HEADER1 "Mime-Version: 1.0"
  2847. #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
  2848. #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
  2849. #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
  2850. if (headers != NULL) {
  2851. p = headers;
  2852. n = headers_len;
  2853. mbfl_memory_device_strncat(&device, p, n);
  2854. if (n > 0 && p[n - 1] != '\n') {
  2855. mbfl_memory_device_strncat(&device, "\n", 1);
  2856. }
  2857. }
  2858. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
  2859. mbfl_memory_device_strncat(&device, "\n", 1);
  2860. if (!suppressed_hdrs.cnt_type) {
  2861. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
  2862. p = (char *)mbfl_no2preferred_mime_name(tran_cs);
  2863. if (p != NULL) {
  2864. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
  2865. mbfl_memory_device_strcat(&device, p);
  2866. }
  2867. mbfl_memory_device_strncat(&device, "\n", 1);
  2868. }
  2869. if (!suppressed_hdrs.cnt_trans_enc) {
  2870. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
  2871. p = (char *)mbfl_no2preferred_mime_name(body_enc);
  2872. if (p == NULL) {
  2873. p = "7bit";
  2874. }
  2875. mbfl_memory_device_strcat(&device, p);
  2876. mbfl_memory_device_strncat(&device, "\n", 1);
  2877. }
  2878. mbfl_memory_device_unput(&device);
  2879. mbfl_memory_device_output('\0', &device);
  2880. headers = (char *)device.buffer;
  2881. if (force_extra_parameters) {
  2882. extra_cmd = estrdup(force_extra_parameters);
  2883. } else if (extra_cmd) {
  2884. extra_cmd = php_escape_shell_cmd(extra_cmd);
  2885. }
  2886. if (!err && php_mail(to, subject, message, headers, extra_cmd TSRMLS_CC)) {
  2887. RETVAL_TRUE;
  2888. } else {
  2889. RETVAL_FALSE;
  2890. }
  2891. if (extra_cmd) {
  2892. efree(extra_cmd);
  2893. }
  2894. if (subject_buf) {
  2895. efree((void *)subject_buf);
  2896. }
  2897. if (message_buf) {
  2898. efree((void *)message_buf);
  2899. }
  2900. mbfl_memory_device_clear(&device);
  2901. zend_hash_destroy(&ht_headers);
  2902. }
  2903. #undef APPEND_ONE_CHAR
  2904. #undef SEPARATE_SMART_STR
  2905. #undef PHP_MBSTR_MAIL_MIME_HEADER1
  2906. #undef PHP_MBSTR_MAIL_MIME_HEADER2
  2907. #undef PHP_MBSTR_MAIL_MIME_HEADER3
  2908. #undef PHP_MBSTR_MAIL_MIME_HEADER4
  2909. #else /* HAVE_SENDMAIL */
  2910. PHP_FUNCTION(mb_send_mail)
  2911. {
  2912. RETURN_FALSE;
  2913. }
  2914. #endif /* HAVE_SENDMAIL */
  2915. /* }}} */
  2916. /* {{{ proto string mb_get_info([string type])
  2917. Returns the current settings of mbstring */
  2918. PHP_FUNCTION(mb_get_info)
  2919. {
  2920. char *typ = NULL;
  2921. int typ_len;
  2922. char *name;
  2923. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
  2924. RETURN_FALSE;
  2925. }
  2926. if (!typ || !strcasecmp("all", typ)) {
  2927. array_init(return_value);
  2928. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
  2929. add_assoc_string(return_value, "internal_encoding", name, 1);
  2930. }
  2931. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
  2932. add_assoc_string(return_value, "http_input", name, 1);
  2933. }
  2934. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
  2935. add_assoc_string(return_value, "http_output", name, 1);
  2936. }
  2937. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(func_overload))) != NULL) {
  2938. add_assoc_string(return_value, "func_overload", name, 1);
  2939. }
  2940. } else if (!strcasecmp("internal_encoding", typ)) {
  2941. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
  2942. RETVAL_STRING(name, 1);
  2943. }
  2944. } else if (!strcasecmp("http_input", typ)) {
  2945. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
  2946. RETVAL_STRING(name, 1);
  2947. }
  2948. } else if (!strcasecmp("http_output", typ)) {
  2949. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
  2950. RETVAL_STRING(name, 1);
  2951. }
  2952. } else if (!strcasecmp("func_overload", typ)) {
  2953. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(func_overload))) != NULL) {
  2954. RETVAL_STRING(name, 1);
  2955. }
  2956. } else {
  2957. RETURN_FALSE;
  2958. }
  2959. }
  2960. /* }}} */
  2961. /* {{{ MBSTRING_API int php_mb_encoding_translation() */
  2962. MBSTRING_API int php_mb_encoding_translation(TSRMLS_D)
  2963. {
  2964. return MBSTRG(encoding_translation);
  2965. }
  2966. /* }}} */
  2967. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
  2968. MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
  2969. {
  2970. if (enc != NULL) {
  2971. if (enc->flag & MBFL_ENCTYPE_MBCS) {
  2972. if (enc->mblen_table != NULL) {
  2973. if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
  2974. }
  2975. } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
  2976. return 2;
  2977. } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
  2978. return 4;
  2979. }
  2980. }
  2981. return 1;
  2982. }
  2983. /* }}} */
  2984. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
  2985. MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
  2986. {
  2987. return php_mb_mbchar_bytes_ex(s,
  2988. mbfl_no2encoding(MBSTRG(internal_encoding)));
  2989. }
  2990. /* }}} */
  2991. /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
  2992. MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
  2993. {
  2994. register const char *p = s;
  2995. char *last=NULL;
  2996. if (nbytes == (size_t)-1) {
  2997. size_t nb = 0;
  2998. while (*p != '\0') {
  2999. if (nb == 0) {
  3000. if ((unsigned char)*p == (unsigned char)c) {
  3001. last = (char *)p;
  3002. }
  3003. nb = php_mb_mbchar_bytes_ex(p, enc);
  3004. if (nb == 0) {
  3005. return NULL; /* something is going wrong! */
  3006. }
  3007. }
  3008. --nb;
  3009. ++p;
  3010. }
  3011. } else {
  3012. register size_t bcnt = nbytes;
  3013. register size_t nbytes_char;
  3014. while (bcnt > 0) {
  3015. if ((unsigned char)*p == (unsigned char)c) {
  3016. last = (char *)p;
  3017. }
  3018. nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
  3019. if (bcnt < nbytes_char) {
  3020. return NULL;
  3021. }
  3022. p += nbytes_char;
  3023. bcnt -= nbytes_char;
  3024. }
  3025. }
  3026. return last;
  3027. }
  3028. /* }}} */
  3029. /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
  3030. MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
  3031. {
  3032. return php_mb_safe_strrchr_ex(s, c, nbytes,
  3033. mbfl_no2encoding(MBSTRG(internal_encoding)));
  3034. }
  3035. /* }}} */
  3036. /* {{{ MBSTRING_API char *php_mb_strrchr() */
  3037. MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC)
  3038. {
  3039. return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC);
  3040. }
  3041. /* }}} */
  3042. /* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */
  3043. MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
  3044. {
  3045. if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){
  3046. return php_mb_mbchar_bytes_ex(s,
  3047. mbfl_no2encoding(MBSTRG(http_input_identify)));
  3048. } else {
  3049. return php_mb_mbchar_bytes_ex(s,
  3050. mbfl_no2encoding(MBSTRG(internal_encoding)));
  3051. }
  3052. }
  3053. /* }}} */
  3054. /* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
  3055. MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from
  3056. TSRMLS_DC)
  3057. {
  3058. int i;
  3059. mbfl_string string, result, *ret = NULL;
  3060. enum mbfl_no_encoding from_encoding, to_encoding;
  3061. mbfl_buffer_converter *convd;
  3062. if (encoding_to) {
  3063. /* new encoding */
  3064. to_encoding = mbfl_name2no_encoding(encoding_to);
  3065. if (to_encoding == mbfl_no_encoding_invalid) {
  3066. return -1;
  3067. }
  3068. } else {
  3069. to_encoding = MBSTRG(current_internal_encoding);
  3070. }
  3071. if (encoding_from) {
  3072. /* old encoding */
  3073. from_encoding = mbfl_name2no_encoding(encoding_from);
  3074. if (from_encoding == mbfl_no_encoding_invalid) {
  3075. return -1;
  3076. }
  3077. } else {
  3078. from_encoding = MBSTRG(http_input_identify);
  3079. }
  3080. if (from_encoding == mbfl_no_encoding_pass) {
  3081. return 0;
  3082. }
  3083. /* initialize string */
  3084. mbfl_string_init(&string);
  3085. mbfl_string_init(&result);
  3086. string.no_encoding = from_encoding;
  3087. string.no_language = MBSTRG(current_language);
  3088. for (i=0; i<num; i++){
  3089. string.val = (char*)str[i];
  3090. string.len = len[i];
  3091. /* initialize converter */
  3092. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  3093. if (convd == NULL) {
  3094. return -1;
  3095. }
  3096. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  3097. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  3098. /* do it */
  3099. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3100. if (ret != NULL) {
  3101. efree(str[i]);
  3102. str[i] = ret->val;
  3103. len[i] = ret->len;
  3104. }
  3105. mbfl_buffer_converter_delete(convd);
  3106. }
  3107. return ret ? 0 : -1;
  3108. }
  3109. /* {{{ MBSTRING_API int php_mb_gpc_encoding_detector()
  3110. */
  3111. MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC)
  3112. {
  3113. mbfl_string string;
  3114. enum mbfl_no_encoding *elist;
  3115. enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid;
  3116. mbfl_encoding_detector *identd = NULL;
  3117. int size;
  3118. enum mbfl_no_encoding *list;
  3119. if (MBSTRG(http_input_list_size) == 1 &&
  3120. MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) {
  3121. MBSTRG(http_input_identify) = mbfl_no_encoding_pass;
  3122. return SUCCESS;
  3123. }
  3124. if (MBSTRG(http_input_list_size) == 1 &&
  3125. MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto &&
  3126. mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) {
  3127. MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0];
  3128. return SUCCESS;
  3129. }
  3130. if (arg_list && strlen(arg_list)>0) {
  3131. /* make encoding list */
  3132. list = NULL;
  3133. size = 0;
  3134. php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
  3135. if (size > 0 && list != NULL) {
  3136. elist = list;
  3137. } else {
  3138. elist = MBSTRG(current_detect_order_list);
  3139. size = MBSTRG(current_detect_order_list_size);
  3140. if (size <= 0){
  3141. elist = MBSTRG(default_detect_order_list);
  3142. size = MBSTRG(default_detect_order_list_size);
  3143. }
  3144. }
  3145. } else {
  3146. elist = MBSTRG(current_detect_order_list);
  3147. size = MBSTRG(current_detect_order_list_size);
  3148. if (size <= 0){
  3149. elist = MBSTRG(default_detect_order_list);
  3150. size = MBSTRG(default_detect_order_list_size);
  3151. }
  3152. }
  3153. mbfl_string_init(&string);
  3154. string.no_language = MBSTRG(current_language);
  3155. identd = mbfl_encoding_detector_new(elist, size);
  3156. if (identd) {
  3157. int n = 0;
  3158. while(n < num){
  3159. string.val = (unsigned char *)arg_string[n];
  3160. string.len = arg_length[n];
  3161. if (mbfl_encoding_detector_feed(identd, &string)) {
  3162. break;
  3163. }
  3164. n++;
  3165. }
  3166. encoding = mbfl_encoding_detector_judge(identd);
  3167. mbfl_encoding_detector_delete(identd);
  3168. }
  3169. if (encoding != mbfl_no_encoding_invalid) {
  3170. MBSTRG(http_input_identify) = encoding;
  3171. return SUCCESS;
  3172. } else {
  3173. return FAILURE;
  3174. }
  3175. }
  3176. /* }}} */
  3177. #ifdef ZEND_MULTIBYTE
  3178. /* {{{ MBSTRING_API int php_mb_set_zend_encoding() */
  3179. MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
  3180. {
  3181. /* 'd better use mbfl_memory_device? */
  3182. char *name, *list = NULL;
  3183. int n, *entry, list_size = 0;
  3184. zend_encoding_detector encoding_detector;
  3185. zend_encoding_converter encoding_converter;
  3186. zend_encoding_oddlen encoding_oddlen;
  3187. /* notify script encoding to Zend Engine */
  3188. entry = MBSTRG(script_encoding_list);
  3189. n = MBSTRG(script_encoding_list_size);
  3190. while (n > 0) {
  3191. name = (char *)mbfl_no_encoding2name(*entry);
  3192. if (name) {
  3193. list_size += strlen(name) + 1;
  3194. if (!list) {
  3195. list = (char*)emalloc(list_size);
  3196. *list = (char)NULL;
  3197. } else {
  3198. list = (char*)erealloc(list, list_size);
  3199. strcat(list, ",");
  3200. }
  3201. strcat(list, name);
  3202. }
  3203. entry++;
  3204. n--;
  3205. }
  3206. zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
  3207. if (list) {
  3208. efree(list);
  3209. }
  3210. encoding_detector = php_mb_encoding_detector;
  3211. encoding_converter = php_mb_encoding_converter;
  3212. encoding_oddlen = php_mb_oddlen;
  3213. /* TODO: make independent from mbstring.encoding_translation? */
  3214. if (MBSTRG(encoding_translation)) {
  3215. /* notify internal encoding to Zend Engine */
  3216. name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
  3217. zend_multibyte_set_internal_encoding(name, strlen(name) TSRMLS_CC);
  3218. }
  3219. zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
  3220. return 0;
  3221. }
  3222. /* }}} */
  3223. /* {{{ char *php_mb_encoding_detector()
  3224. * Interface for Zend Engine
  3225. */
  3226. char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg_list TSRMLS_DC)
  3227. {
  3228. mbfl_string string;
  3229. const char *ret;
  3230. enum mbfl_no_encoding *elist;
  3231. int size, *list;
  3232. /* make encoding list */
  3233. list = NULL;
  3234. size = 0;
  3235. php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
  3236. if (size <= 0) {
  3237. return NULL;
  3238. }
  3239. if (size > 0 && list != NULL) {
  3240. elist = list;
  3241. } else {
  3242. elist = MBSTRG(current_detect_order_list);
  3243. size = MBSTRG(current_detect_order_list_size);
  3244. }
  3245. mbfl_string_init(&string);
  3246. string.no_language = MBSTRG(current_language);
  3247. string.val = (char*)arg_string;
  3248. string.len = arg_length;
  3249. ret = mbfl_identify_encoding_name(&string, elist, size, 0);
  3250. if (list != NULL) {
  3251. efree((void *)list);
  3252. }
  3253. if (ret != NULL) {
  3254. return estrdup(ret);
  3255. } else {
  3256. return NULL;
  3257. }
  3258. }
  3259. /* }}} */
  3260. /* {{{ int php_mb_encoding_converter() */
  3261. int php_mb_encoding_converter(char **to, int *to_length, const char *from,
  3262. int from_length, const char *encoding_to, const char *encoding_from
  3263. TSRMLS_DC)
  3264. {
  3265. mbfl_string string, result, *ret;
  3266. enum mbfl_no_encoding from_encoding, to_encoding;
  3267. mbfl_buffer_converter *convd;
  3268. /* new encoding */
  3269. to_encoding = mbfl_name2no_encoding(encoding_to);
  3270. if (to_encoding == mbfl_no_encoding_invalid) {
  3271. return -1;
  3272. }
  3273. /* old encoding */
  3274. from_encoding = mbfl_name2no_encoding(encoding_from);
  3275. if (from_encoding == mbfl_no_encoding_invalid) {
  3276. return -1;
  3277. }
  3278. /* initialize string */
  3279. mbfl_string_init(&string);
  3280. mbfl_string_init(&result);
  3281. string.no_encoding = from_encoding;
  3282. string.no_language = MBSTRG(current_language);
  3283. string.val = (char*)from;
  3284. string.len = from_length;
  3285. /* initialize converter */
  3286. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  3287. if (convd == NULL) {
  3288. return -1;
  3289. }
  3290. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  3291. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  3292. /* do it */
  3293. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3294. if (ret != NULL) {
  3295. *to = ret->val;
  3296. *to_length = ret->len;
  3297. }
  3298. mbfl_buffer_converter_delete(convd);
  3299. return ret ? 0 : -1;
  3300. }
  3301. /* }}} */
  3302. /* {{{ int php_mb_oddlen()
  3303. * returns number of odd (e.g. appears only first byte of multibyte
  3304. * character) chars
  3305. */
  3306. int php_mb_oddlen(const char *string, int length, const char *encoding TSRMLS_DC)
  3307. {
  3308. mbfl_string mb_string;
  3309. mbfl_string_init(&mb_string);
  3310. mb_string.no_language = MBSTRG(current_language);
  3311. mb_string.no_encoding = mbfl_name2no_encoding(encoding);
  3312. mb_string.val = (char*)string;
  3313. mb_string.len = length;
  3314. if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
  3315. return 0;
  3316. }
  3317. return mbfl_oddlen(&mb_string);
  3318. }
  3319. /* }}} */
  3320. #endif /* ZEND_MULTIBYTE */
  3321. #endif /* HAVE_MBSTRING */
  3322. /*
  3323. * Local variables:
  3324. * tab-width: 4
  3325. * c-basic-offset: 4
  3326. * End:
  3327. * vim600: fdm=marker
  3328. * vim: noet sw=4 ts=4
  3329. */