You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3717 lines
101 KiB

21 years ago
25 years ago
25 years ago
22 years ago
23 years ago
23 years ago
23 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
23 years ago
23 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
25 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
25 years ago
24 years ago
22 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
24 years ago
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2005 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.0 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_0.txt. |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
  16. | Rui Hirokawa <hirokawa@php.net> |
  17. +----------------------------------------------------------------------+
  18. */
  19. /* $Id$ */
  20. /*
  21. * PHP 4 Multibyte String module "mbstring"
  22. *
  23. * History:
  24. * 2000.5.19 Release php-4.0RC2_jstring-1.0
  25. * 2001.4.1 Release php4_jstring-1.0.91
  26. * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
  27. * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
  28. */
  29. /*
  30. * PHP3 Internationalization support program.
  31. *
  32. * Copyright (c) 1999,2000 by the PHP3 internationalization team.
  33. * All rights reserved.
  34. *
  35. * See README_PHP3-i18n-ja for more detail.
  36. *
  37. * Authors:
  38. * Hironori Sato <satoh@jpnnet.com>
  39. * Shigeru Kanemoto <sgk@happysize.co.jp>
  40. * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
  41. * Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
  42. */
  43. /* {{{ includes */
  44. #ifdef HAVE_CONFIG_H
  45. #include "config.h"
  46. #endif
  47. #include "php.h"
  48. #include "php_ini.h"
  49. #include "php_variables.h"
  50. #include "mbstring.h"
  51. #include "ext/standard/php_string.h"
  52. #include "ext/standard/php_mail.h"
  53. #include "ext/standard/exec.h"
  54. #include "ext/standard/php_smart_str.h"
  55. #include "ext/standard/url.h"
  56. #include "main/php_output.h"
  57. #include "ext/standard/info.h"
  58. #include "libmbfl/mbfl/mbfl_allocators.h"
  59. #include "php_variables.h"
  60. #include "php_globals.h"
  61. #include "rfc1867.h"
  62. #include "php_content_types.h"
  63. #include "SAPI.h"
  64. #include "php_unicode.h"
  65. #include "TSRM.h"
  66. #include "mb_gpc.h"
  67. #ifdef ZEND_MULTIBYTE
  68. #include "zend_multibyte.h"
  69. #endif /* ZEND_MULTIBYTE */
  70. #if HAVE_MBSTRING
  71. /* }}} */
  72. /* {{{ prototypes */
  73. static void _php_mb_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC);
  74. static void _php_mb_globals_dtor(zend_mbstring_globals *pglobals TSRMLS_DC);
  75. /* }}} */
  76. /* {{{ php_mb_default_identify_list */
  77. typedef struct _php_mb_nls_ident_list {
  78. enum mbfl_no_language lang;
  79. enum mbfl_no_encoding* list;
  80. int list_size;
  81. } php_mb_nls_ident_list;
  82. static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
  83. mbfl_no_encoding_ascii,
  84. mbfl_no_encoding_jis,
  85. mbfl_no_encoding_utf8,
  86. mbfl_no_encoding_euc_jp,
  87. mbfl_no_encoding_sjis
  88. };
  89. static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
  90. mbfl_no_encoding_ascii,
  91. mbfl_no_encoding_utf8,
  92. mbfl_no_encoding_euc_cn,
  93. mbfl_no_encoding_cp936
  94. };
  95. static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
  96. mbfl_no_encoding_ascii,
  97. mbfl_no_encoding_utf8,
  98. mbfl_no_encoding_euc_tw,
  99. mbfl_no_encoding_big5
  100. };
  101. static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
  102. mbfl_no_encoding_ascii,
  103. mbfl_no_encoding_utf8,
  104. mbfl_no_encoding_euc_kr,
  105. mbfl_no_encoding_uhc
  106. };
  107. static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
  108. mbfl_no_encoding_ascii,
  109. mbfl_no_encoding_utf8,
  110. mbfl_no_encoding_koi8r,
  111. mbfl_no_encoding_cp1251,
  112. mbfl_no_encoding_cp866
  113. };
  114. static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
  115. mbfl_no_encoding_ascii,
  116. mbfl_no_encoding_utf8,
  117. mbfl_no_encoding_armscii8
  118. };
  119. static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
  120. mbfl_no_encoding_ascii,
  121. mbfl_no_encoding_utf8
  122. };
  123. static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
  124. { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
  125. { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
  126. { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
  127. { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
  128. { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
  129. { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
  130. { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
  131. };
  132. /* }}} */
  133. static
  134. ZEND_BEGIN_ARG_INFO(third_and_rest_force_ref, 1)
  135. ZEND_ARG_PASS_INFO(0)
  136. ZEND_ARG_PASS_INFO(0)
  137. ZEND_END_ARG_INFO();
  138. /* {{{ mb_overload_def mb_ovld[] */
  139. static const struct mb_overload_def mb_ovld[] = {
  140. {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
  141. {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
  142. {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
  143. {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
  144. {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
  145. {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
  146. {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
  147. {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
  148. #if HAVE_MBREGEX
  149. {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
  150. {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
  151. {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
  152. {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
  153. {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
  154. #endif
  155. {0, NULL, NULL, NULL}
  156. };
  157. /* }}} */
  158. /* {{{ function_entry mbstring_functions[] */
  159. function_entry mbstring_functions[] = {
  160. PHP_FE(mb_convert_case, NULL)
  161. PHP_FE(mb_strtoupper, NULL)
  162. PHP_FE(mb_strtolower, NULL)
  163. PHP_FE(mb_language, NULL)
  164. PHP_FE(mb_internal_encoding, NULL)
  165. PHP_FE(mb_http_input, NULL)
  166. PHP_FE(mb_http_output, NULL)
  167. PHP_FE(mb_detect_order, NULL)
  168. PHP_FE(mb_substitute_character, NULL)
  169. PHP_FE(mb_parse_str, second_arg_force_ref)
  170. PHP_FE(mb_output_handler, NULL)
  171. PHP_FE(mb_preferred_mime_name, NULL)
  172. PHP_FE(mb_strlen, NULL)
  173. PHP_FE(mb_strpos, NULL)
  174. PHP_FE(mb_strrpos, NULL)
  175. PHP_FE(mb_substr_count, NULL)
  176. PHP_FE(mb_substr, NULL)
  177. PHP_FE(mb_strcut, NULL)
  178. PHP_FE(mb_strwidth, NULL)
  179. PHP_FE(mb_strimwidth, NULL)
  180. PHP_FE(mb_convert_encoding, NULL)
  181. PHP_FE(mb_detect_encoding, NULL)
  182. PHP_FE(mb_list_encodings, NULL)
  183. PHP_FE(mb_convert_kana, NULL)
  184. PHP_FE(mb_encode_mimeheader, NULL)
  185. PHP_FE(mb_decode_mimeheader, NULL)
  186. PHP_FE(mb_convert_variables, third_and_rest_force_ref)
  187. PHP_FE(mb_encode_numericentity, NULL)
  188. PHP_FE(mb_decode_numericentity, NULL)
  189. PHP_FE(mb_send_mail, NULL)
  190. PHP_FE(mb_get_info, NULL)
  191. #if HAVE_MBREGEX
  192. PHP_MBREGEX_FUNCTION_ENTRIES
  193. #endif
  194. { NULL, NULL, NULL }
  195. };
  196. /* }}} */
  197. /* {{{ zend_module_entry mbstring_module_entry */
  198. zend_module_entry mbstring_module_entry = {
  199. STANDARD_MODULE_HEADER,
  200. "mbstring",
  201. mbstring_functions,
  202. PHP_MINIT(mbstring),
  203. PHP_MSHUTDOWN(mbstring),
  204. PHP_RINIT(mbstring),
  205. PHP_RSHUTDOWN(mbstring),
  206. PHP_MINFO(mbstring),
  207. NO_VERSION_YET,
  208. STANDARD_MODULE_PROPERTIES
  209. };
  210. /* }}} */
  211. /* {{{ static sapi_post_entry php_post_entries[] */
  212. static sapi_post_entry php_post_entries[] = {
  213. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
  214. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  215. { NULL, 0, NULL, NULL }
  216. };
  217. /* }}} */
  218. ZEND_DECLARE_MODULE_GLOBALS(mbstring)
  219. #ifdef COMPILE_DL_MBSTRING
  220. ZEND_GET_MODULE(mbstring)
  221. # ifdef PHP_WIN32
  222. # include "zend_arg_defs.c"
  223. # endif
  224. #endif
  225. /* {{{ allocators */
  226. static void *_php_mb_allocators_malloc(unsigned int sz)
  227. {
  228. return emalloc(sz);
  229. }
  230. static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
  231. {
  232. return erealloc(ptr, sz);
  233. }
  234. static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
  235. {
  236. return ecalloc(nelems, szelem);
  237. }
  238. static void _php_mb_allocators_free(void *ptr)
  239. {
  240. efree(ptr);
  241. }
  242. static void *_php_mb_allocators_pmalloc(unsigned int sz)
  243. {
  244. return pemalloc(sz, 1);
  245. }
  246. static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
  247. {
  248. return perealloc(ptr, sz, 1);
  249. }
  250. static void _php_mb_allocators_pfree(void *ptr)
  251. {
  252. pefree(ptr, 1);
  253. }
  254. static mbfl_allocators _php_mb_allocators = {
  255. _php_mb_allocators_malloc,
  256. _php_mb_allocators_realloc,
  257. _php_mb_allocators_calloc,
  258. _php_mb_allocators_free,
  259. _php_mb_allocators_pmalloc,
  260. _php_mb_allocators_prealloc,
  261. _php_mb_allocators_pfree
  262. };
  263. /* }}} */
  264. /* {{{ static sapi_post_entry mbstr_post_entries[] */
  265. static sapi_post_entry mbstr_post_entries[] = {
  266. { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
  267. { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
  268. { NULL, 0, NULL, NULL }
  269. };
  270. /* }}} */
  271. /* {{{ static int php_mb_parse_encoding_list()
  272. * Return 0 if input contains any illegal encoding, otherwise 1.
  273. * Even if any illegal encoding is detected the result may contain a list
  274. * of parsed encodings.
  275. */
  276. static int
  277. php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
  278. {
  279. int n, l, size, bauto, ret = 1;
  280. char *p, *p1, *p2, *endp, *tmpstr;
  281. enum mbfl_no_encoding no_encoding;
  282. enum mbfl_no_encoding *src, *entry, *list;
  283. list = NULL;
  284. if (value == NULL || value_length <= 0) {
  285. if (return_list) {
  286. *return_list = NULL;
  287. }
  288. if (return_size) {
  289. *return_size = 0;
  290. }
  291. return 0;
  292. } else {
  293. enum mbfl_no_encoding *identify_list;
  294. int identify_list_size;
  295. identify_list = MBSTRG(default_detect_order_list);
  296. identify_list_size = MBSTRG(default_detect_order_list_size);
  297. /* copy the value string for work */
  298. if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
  299. tmpstr = (char *)estrndup(value+1, value_length-2);
  300. value_length -= 2;
  301. }
  302. else
  303. tmpstr = (char *)estrndup(value, value_length);
  304. if (tmpstr == NULL) {
  305. return 0;
  306. }
  307. /* count the number of listed encoding names */
  308. endp = tmpstr + value_length;
  309. n = 1;
  310. p1 = tmpstr;
  311. while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
  312. p1 = p2 + 1;
  313. n++;
  314. }
  315. size = n + identify_list_size;
  316. /* make list */
  317. list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
  318. if (list != NULL) {
  319. entry = list;
  320. n = 0;
  321. bauto = 0;
  322. p1 = tmpstr;
  323. do {
  324. p2 = p = php_memnstr(p1, ",", 1, endp);
  325. if (p == NULL) {
  326. p = endp;
  327. }
  328. *p = '\0';
  329. /* trim spaces */
  330. while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
  331. p1++;
  332. }
  333. p--;
  334. while (p > p1 && (*p == ' ' || *p == '\t')) {
  335. *p = '\0';
  336. p--;
  337. }
  338. /* convert to the encoding number and check encoding */
  339. if (strcasecmp(p1, "auto") == 0) {
  340. if (!bauto) {
  341. bauto = 1;
  342. l = identify_list_size;
  343. src = identify_list;
  344. while (l > 0) {
  345. *entry++ = *src++;
  346. l--;
  347. n++;
  348. }
  349. }
  350. } else {
  351. no_encoding = mbfl_name2no_encoding(p1);
  352. if (no_encoding != mbfl_no_encoding_invalid) {
  353. *entry++ = no_encoding;
  354. n++;
  355. } else {
  356. ret = 0;
  357. }
  358. }
  359. p1 = p2 + 1;
  360. } while (n < size && p2 != NULL);
  361. if (n > 0) {
  362. if (return_list) {
  363. *return_list = list;
  364. } else {
  365. pefree(list, persistent);
  366. }
  367. } else {
  368. pefree(list, persistent);
  369. if (return_list) {
  370. *return_list = NULL;
  371. }
  372. ret = 0;
  373. }
  374. if (return_size) {
  375. *return_size = n;
  376. }
  377. } else {
  378. if (return_list) {
  379. *return_list = NULL;
  380. }
  381. if (return_size) {
  382. *return_size = 0;
  383. }
  384. ret = 0;
  385. }
  386. efree(tmpstr);
  387. }
  388. return ret;
  389. }
  390. /* }}} */
  391. /* {{{ MBSTRING_API php_mb_check_encoding_list */
  392. MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
  393. return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
  394. }
  395. /* }}} */
  396. /* {{{ static int php_mb_parse_encoding_array()
  397. * Return 0 if input contains any illegal encoding, otherwise 1.
  398. * Even if any illegal encoding is detected the result may contain a list
  399. * of parsed encodings.
  400. */
  401. static int
  402. php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
  403. {
  404. zval **hash_entry;
  405. HashTable *target_hash;
  406. int i, n, l, size, bauto,ret = 1;
  407. enum mbfl_no_encoding no_encoding;
  408. enum mbfl_no_encoding *src, *list, *entry;
  409. list = NULL;
  410. if (Z_TYPE_P(array) == IS_ARRAY) {
  411. enum mbfl_no_encoding *identify_list;
  412. int identify_list_size;
  413. identify_list = MBSTRG(default_detect_order_list);
  414. identify_list_size = MBSTRG(default_detect_order_list_size);
  415. target_hash = Z_ARRVAL_P(array);
  416. zend_hash_internal_pointer_reset(target_hash);
  417. i = zend_hash_num_elements(target_hash);
  418. size = i + identify_list_size;
  419. list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
  420. if (list != NULL) {
  421. entry = list;
  422. bauto = 0;
  423. n = 0;
  424. while (i > 0) {
  425. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  426. break;
  427. }
  428. convert_to_string_ex(hash_entry);
  429. if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
  430. if (!bauto) {
  431. bauto = 1;
  432. l = identify_list_size;
  433. src = identify_list;
  434. while (l > 0) {
  435. *entry++ = *src++;
  436. l--;
  437. n++;
  438. }
  439. }
  440. } else {
  441. no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
  442. if (no_encoding != mbfl_no_encoding_invalid) {
  443. *entry++ = no_encoding;
  444. n++;
  445. } else {
  446. ret = 0;
  447. }
  448. }
  449. zend_hash_move_forward(target_hash);
  450. i--;
  451. }
  452. if (n > 0) {
  453. if (return_list) {
  454. *return_list = list;
  455. } else {
  456. pefree(list, persistent);
  457. }
  458. } else {
  459. pefree(list, persistent);
  460. if (return_list) {
  461. *return_list = NULL;
  462. }
  463. ret = 0;
  464. }
  465. if (return_size) {
  466. *return_size = n;
  467. }
  468. } else {
  469. if (return_list) {
  470. *return_list = NULL;
  471. }
  472. if (return_size) {
  473. *return_size = 0;
  474. }
  475. ret = 0;
  476. }
  477. }
  478. return ret;
  479. }
  480. /* }}} */
  481. /* {{{ php_mb_nls_get_default_detect_order_list */
  482. static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size)
  483. {
  484. size_t i;
  485. *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  486. *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  487. for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
  488. if (php_mb_default_identify_list[i].lang == lang) {
  489. *plist = php_mb_default_identify_list[i].list;
  490. *plist_size = php_mb_default_identify_list[i].list_size;
  491. return 1;
  492. }
  493. }
  494. return 0;
  495. }
  496. /* }}} */
  497. /* {{{ php.ini directive handler */
  498. static PHP_INI_MH(OnUpdate_mbstring_language)
  499. {
  500. enum mbfl_no_language no_language;
  501. no_language = mbfl_name2no_language(new_value);
  502. if (no_language == mbfl_no_language_invalid) {
  503. return FAILURE;
  504. }
  505. MBSTRG(language) = no_language;
  506. php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
  507. return SUCCESS;
  508. }
  509. /* }}} */
  510. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
  511. static PHP_INI_MH(OnUpdate_mbstring_detect_order)
  512. {
  513. enum mbfl_no_encoding *list;
  514. int size;
  515. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  516. if (MBSTRG(detect_order_list) != NULL) {
  517. free(MBSTRG(detect_order_list));
  518. }
  519. MBSTRG(detect_order_list) = list;
  520. MBSTRG(detect_order_list_size) = size;
  521. } else {
  522. return FAILURE;
  523. }
  524. return SUCCESS;
  525. }
  526. /* }}} */
  527. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
  528. static PHP_INI_MH(OnUpdate_mbstring_http_input)
  529. {
  530. enum mbfl_no_encoding *list;
  531. int size;
  532. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  533. if (MBSTRG(http_input_list) != NULL) {
  534. free(MBSTRG(http_input_list));
  535. }
  536. MBSTRG(http_input_list) = list;
  537. MBSTRG(http_input_list_size) = size;
  538. } else {
  539. return FAILURE;
  540. }
  541. return SUCCESS;
  542. }
  543. /* }}} */
  544. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
  545. static PHP_INI_MH(OnUpdate_mbstring_http_output)
  546. {
  547. enum mbfl_no_encoding no_encoding;
  548. no_encoding = mbfl_name2no_encoding(new_value);
  549. if (no_encoding != mbfl_no_encoding_invalid) {
  550. MBSTRG(http_output_encoding) = no_encoding;
  551. MBSTRG(current_http_output_encoding) = no_encoding;
  552. } else {
  553. if (new_value != NULL && new_value_length > 0) {
  554. return FAILURE;
  555. }
  556. }
  557. return SUCCESS;
  558. }
  559. /* }}} */
  560. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
  561. static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
  562. {
  563. enum mbfl_no_encoding no_encoding;
  564. if (new_value == NULL) {
  565. return SUCCESS;
  566. }
  567. no_encoding = mbfl_name2no_encoding(new_value);
  568. if (no_encoding != mbfl_no_encoding_invalid) {
  569. MBSTRG(internal_encoding) = no_encoding;
  570. MBSTRG(current_internal_encoding) = no_encoding;
  571. #if HAVE_MBREGEX
  572. {
  573. OnigEncoding mbctype;
  574. mbctype = php_mb_regex_name2mbctype(new_value);
  575. if (mbctype == ONIG_ENCODING_UNDEF) {
  576. mbctype = ONIG_ENCODING_EUC_JP;
  577. }
  578. MBSTRG(current_mbctype) = MBSTRG(default_mbctype) = mbctype;
  579. }
  580. #endif
  581. #ifdef ZEND_MULTIBYTE
  582. zend_multibyte_set_internal_encoding(new_value, new_value_length TSRMLS_CC);
  583. #endif /* ZEND_MULTIBYTE */
  584. } else {
  585. if (new_value != NULL && new_value_length > 0) {
  586. return FAILURE;
  587. }
  588. }
  589. return SUCCESS;
  590. }
  591. /* }}} */
  592. #ifdef ZEND_MULTIBYTE
  593. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */
  594. static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
  595. {
  596. int *list, size;
  597. if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
  598. if (MBSTRG(script_encoding_list) != NULL) {
  599. free(MBSTRG(script_encoding_list));
  600. }
  601. MBSTRG(script_encoding_list) = list;
  602. MBSTRG(script_encoding_list_size) = size;
  603. } else {
  604. return FAILURE;
  605. }
  606. return SUCCESS;
  607. }
  608. /* }}} */
  609. #endif /* ZEND_MULTIBYTE */
  610. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
  611. static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
  612. {
  613. if (new_value != NULL) {
  614. if (strcasecmp("none", new_value) == 0) {
  615. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  616. } else if (strcasecmp("long", new_value) == 0) {
  617. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  618. } else {
  619. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  620. MBSTRG(filter_illegal_substchar) = zend_atoi(new_value, new_value_length);
  621. }
  622. }
  623. return SUCCESS;
  624. }
  625. /* }}} */
  626. /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
  627. static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
  628. {
  629. if (new_value == NULL) {
  630. return FAILURE;
  631. }
  632. OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
  633. if (MBSTRG(encoding_translation)) {
  634. sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
  635. sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
  636. sapi_register_treat_data(mbstr_treat_data);
  637. } else {
  638. sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
  639. sapi_register_post_entries(php_post_entries TSRMLS_CC);
  640. }
  641. return SUCCESS;
  642. }
  643. /* }}} */
  644. /* {{{ php.ini directive registration */
  645. PHP_INI_BEGIN()
  646. PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_SYSTEM | PHP_INI_PERDIR, OnUpdate_mbstring_language)
  647. PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
  648. PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
  649. PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
  650. PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
  651. #ifdef ZEND_MULTIBYTE
  652. PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
  653. #endif /* ZEND_MULTIBYTE */
  654. PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
  655. STD_PHP_INI_ENTRY("mbstring.func_overload", "0", PHP_INI_SYSTEM |
  656. PHP_INI_PERDIR, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
  657. STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
  658. PHP_INI_SYSTEM | PHP_INI_PERDIR, OnUpdate_mbstring_encoding_translation,
  659. encoding_translation, zend_mbstring_globals, mbstring_globals)
  660. PHP_INI_END()
  661. /* }}} */
  662. /* {{{ module global initialize handler */
  663. static void _php_mb_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC)
  664. {
  665. MBSTRG(language) = mbfl_no_language_uni;
  666. MBSTRG(current_language) = MBSTRG(language);
  667. MBSTRG(internal_encoding) = mbfl_no_encoding_invalid;
  668. MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
  669. #ifdef ZEND_MULTIBYTE
  670. MBSTRG(script_encoding_list) = NULL;
  671. MBSTRG(script_encoding_list_size) = 0;
  672. #endif /* ZEND_MULTIBYTE */
  673. MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
  674. MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
  675. MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
  676. MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
  677. MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
  678. MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
  679. MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
  680. MBSTRG(http_input_list) = NULL;
  681. MBSTRG(http_input_list_size) = 0;
  682. MBSTRG(detect_order_list) = NULL;
  683. MBSTRG(detect_order_list_size) = 0;
  684. MBSTRG(current_detect_order_list) = NULL;
  685. MBSTRG(current_detect_order_list_size) = 0;
  686. MBSTRG(default_detect_order_list) = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
  687. MBSTRG(default_detect_order_list_size) = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
  688. MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  689. MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
  690. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  691. MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
  692. MBSTRG(func_overload) = 0;
  693. MBSTRG(encoding_translation) = 0;
  694. pglobals->outconv = NULL;
  695. #if HAVE_MBREGEX
  696. _php_mb_regex_globals_ctor(pglobals TSRMLS_CC);
  697. #endif
  698. }
  699. /* }}} */
  700. /* {{{ static void _php_mb_globals_dtor() */
  701. static void _php_mb_globals_dtor(zend_mbstring_globals *pglobals TSRMLS_DC)
  702. {
  703. #if HAVE_MBREGEX
  704. _php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
  705. #endif
  706. }
  707. /* }}} */
  708. /* {{{ PHP_MINIT_FUNCTION(mbstring) */
  709. PHP_MINIT_FUNCTION(mbstring)
  710. {
  711. __mbfl_allocators = &_php_mb_allocators;
  712. #ifdef ZTS
  713. ts_allocate_id(&mbstring_globals_id, sizeof(zend_mbstring_globals),
  714. (ts_allocate_ctor) _php_mb_globals_ctor,
  715. (ts_allocate_dtor) _php_mb_globals_dtor);
  716. #else
  717. _php_mb_globals_ctor(&mbstring_globals TSRMLS_CC);
  718. #endif
  719. REGISTER_INI_ENTRIES();
  720. if (MBSTRG(encoding_translation)) {
  721. sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
  722. sapi_register_treat_data(mbstr_treat_data);
  723. }
  724. REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
  725. REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
  726. REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
  727. REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
  728. REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
  729. REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
  730. #if HAVE_MBREGEX
  731. PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  732. #endif
  733. return SUCCESS;
  734. }
  735. /* }}} */
  736. /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
  737. PHP_MSHUTDOWN_FUNCTION(mbstring)
  738. {
  739. UNREGISTER_INI_ENTRIES();
  740. if (MBSTRG(http_input_list)) {
  741. free(MBSTRG(http_input_list));
  742. }
  743. #ifdef ZEND_MULTIBYTE
  744. if (MBSTRG(script_encoding_list)) {
  745. free(MBSTRG(script_encoding_list));
  746. }
  747. #endif /* ZEND_MULTIBYTE */
  748. if (MBSTRG(detect_order_list)) {
  749. free(MBSTRG(detect_order_list));
  750. }
  751. #if HAVE_MBREGEX
  752. PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  753. #endif
  754. #ifdef ZTS
  755. ts_free_id(mbstring_globals_id);
  756. #else
  757. _php_mb_globals_dtor(&mbstring_globals TSRMLS_CC);
  758. #endif
  759. return SUCCESS;
  760. }
  761. /* }}} */
  762. /* {{{ PHP_RINIT_FUNCTION(mbstring) */
  763. PHP_RINIT_FUNCTION(mbstring)
  764. {
  765. int n;
  766. enum mbfl_no_encoding *list=NULL, *entry;
  767. zend_function *func, *orig;
  768. const struct mb_overload_def *p;
  769. MBSTRG(current_language) = MBSTRG(language);
  770. if (MBSTRG(internal_encoding) == mbfl_no_encoding_invalid) {
  771. char *default_enc = NULL;
  772. switch (MBSTRG(current_language)) {
  773. case mbfl_no_language_uni:
  774. default_enc = "UTF-8";
  775. break;
  776. case mbfl_no_language_japanese:
  777. default_enc = "EUC-JP";
  778. break;
  779. case mbfl_no_language_korean:
  780. default_enc = "EUC-KR";
  781. break;
  782. case mbfl_no_language_simplified_chinese:
  783. default_enc = "EUC-CN";
  784. break;
  785. case mbfl_no_language_traditional_chinese:
  786. default_enc = "EUC-TW";
  787. break;
  788. case mbfl_no_language_russian:
  789. default_enc = "KOI8-R";
  790. break;
  791. case mbfl_no_language_german:
  792. default_enc = "ISO-8859-15";
  793. break;
  794. case mbfl_no_language_armenian:
  795. default_enc = "ArmSCII-8";
  796. break;
  797. case mbfl_no_language_english:
  798. default:
  799. default_enc = "ISO-8859-1";
  800. break;
  801. }
  802. if (default_enc) {
  803. zend_alter_ini_entry("mbstring.internal_encoding",
  804. sizeof("mbstring.internal_encoding"),
  805. default_enc, strlen(default_enc),
  806. PHP_INI_PERDIR, PHP_INI_STAGE_RUNTIME);
  807. }
  808. }
  809. MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
  810. MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
  811. MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
  812. MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
  813. n = 0;
  814. if (MBSTRG(detect_order_list)) {
  815. list = MBSTRG(detect_order_list);
  816. n = MBSTRG(detect_order_list_size);
  817. }
  818. if (n <= 0) {
  819. list = MBSTRG(default_detect_order_list);
  820. n = MBSTRG(default_detect_order_list_size);
  821. }
  822. entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0);
  823. MBSTRG(current_detect_order_list) = entry;
  824. MBSTRG(current_detect_order_list_size) = n;
  825. while (n > 0) {
  826. *entry++ = *list++;
  827. n--;
  828. }
  829. /* override original function. */
  830. if (MBSTRG(func_overload)){
  831. p = &(mb_ovld[0]);
  832. while (p->type > 0) {
  833. if ((MBSTRG(func_overload) & p->type) == p->type &&
  834. zend_hash_find(EG(function_table), p->save_func,
  835. strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
  836. zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
  837. if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
  838. php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
  839. return FAILURE;
  840. } else {
  841. zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
  842. if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
  843. NULL) == FAILURE) {
  844. php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
  845. return FAILURE;
  846. }
  847. }
  848. }
  849. p++;
  850. }
  851. }
  852. #if HAVE_MBREGEX
  853. PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  854. #endif
  855. #ifdef ZEND_MULTIBYTE
  856. php_mb_set_zend_encoding(TSRMLS_C);
  857. #endif /* ZEND_MULTIBYTE */
  858. return SUCCESS;
  859. }
  860. /* }}} */
  861. /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
  862. PHP_RSHUTDOWN_FUNCTION(mbstring)
  863. {
  864. const struct mb_overload_def *p;
  865. zend_function *orig;
  866. if (MBSTRG(current_detect_order_list) != NULL) {
  867. efree(MBSTRG(current_detect_order_list));
  868. MBSTRG(current_detect_order_list) = NULL;
  869. MBSTRG(current_detect_order_list_size) = 0;
  870. }
  871. if (MBSTRG(outconv) != NULL) {
  872. mbfl_buffer_converter_delete(MBSTRG(outconv));
  873. MBSTRG(outconv) = NULL;
  874. }
  875. /* clear http input identification. */
  876. MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
  877. MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
  878. MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
  879. MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
  880. MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
  881. /* clear overloaded function. */
  882. if (MBSTRG(func_overload)){
  883. p = &(mb_ovld[0]);
  884. while (p->type > 0 && zend_hash_find(EG(function_table), p->save_func, strlen(p->save_func)+1 , (void **)&orig) == SUCCESS) {
  885. zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
  886. zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
  887. p++;
  888. }
  889. }
  890. #if HAVE_MBREGEX
  891. PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
  892. #endif
  893. return SUCCESS;
  894. }
  895. /* }}} */
  896. /* {{{ PHP_MINFO_FUNCTION(mbstring) */
  897. PHP_MINFO_FUNCTION(mbstring)
  898. {
  899. php_info_print_table_start();
  900. php_info_print_table_row(2, "Multibyte Support", "enabled");
  901. php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
  902. if (MBSTRG(encoding_translation)) {
  903. php_info_print_table_row(2, "HTTP input encoding translation", "enabled");
  904. }
  905. #if defined(HAVE_MBREGEX)
  906. {
  907. char buf[32];
  908. php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
  909. sprintf(buf, "%d.%d.%d",
  910. ONIGURUMA_VERSION_MAJOR,ONIGURUMA_VERSION_MINOR,ONIGURUMA_VERSION_TEENY);
  911. php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
  912. }
  913. #endif
  914. php_info_print_table_end();
  915. php_info_print_table_start();
  916. php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
  917. php_info_print_table_end();
  918. DISPLAY_INI_ENTRIES();
  919. }
  920. /* }}} */
  921. /* {{{ proto string mb_language([string language])
  922. Sets the current language or Returns the current language as a string */
  923. PHP_FUNCTION(mb_language)
  924. {
  925. char *name = NULL;
  926. int name_len = 0;
  927. enum mbfl_no_language no_language;
  928. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
  929. return;
  930. }
  931. if (name == NULL) {
  932. RETURN_STRING((char *)mbfl_no_language2name(MBSTRG(current_language)), 1);
  933. } else {
  934. no_language = mbfl_name2no_language(name);
  935. if (no_language == mbfl_no_language_invalid) {
  936. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
  937. RETURN_FALSE;
  938. } else {
  939. php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
  940. MBSTRG(current_language) = no_language;
  941. RETURN_TRUE;
  942. }
  943. }
  944. }
  945. /* }}} */
  946. /* {{{ proto string mb_internal_encoding([string encoding])
  947. Sets the current internal encoding or Returns the current internal encoding as a string */
  948. PHP_FUNCTION(mb_internal_encoding)
  949. {
  950. char *name = NULL;
  951. int name_len;
  952. enum mbfl_no_encoding no_encoding;
  953. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
  954. RETURN_FALSE;
  955. }
  956. if (name == NULL) {
  957. name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
  958. if (name != NULL) {
  959. RETURN_STRING(name, 1);
  960. } else {
  961. RETURN_FALSE;
  962. }
  963. } else {
  964. no_encoding = mbfl_name2no_encoding(name);
  965. if (no_encoding == mbfl_no_encoding_invalid) {
  966. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  967. RETURN_FALSE;
  968. } else {
  969. MBSTRG(current_internal_encoding) = no_encoding;
  970. #ifdef ZEND_MULTIBYTE
  971. /* TODO: make independent from mbstring.encoding_translation? */
  972. if (MBSTRG(encoding_translation)) {
  973. zend_multibyte_set_internal_encoding(name, name_len TSRMLS_CC);
  974. }
  975. #endif /* ZEND_MULTIBYTE */
  976. RETURN_TRUE;
  977. }
  978. }
  979. }
  980. /* }}} */
  981. /* {{{ proto mixed mb_http_input([string type])
  982. Returns the input encoding */
  983. PHP_FUNCTION(mb_http_input)
  984. {
  985. char *typ = NULL;
  986. int typ_len;
  987. int retname, n;
  988. char *name, *list, *temp;
  989. enum mbfl_no_encoding *entry;
  990. enum mbfl_no_encoding result = mbfl_no_encoding_invalid;
  991. retname = 1;
  992. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
  993. RETURN_FALSE;
  994. }
  995. if (typ == NULL) {
  996. result = MBSTRG(http_input_identify);
  997. } else {
  998. switch (*typ) {
  999. case 'G':
  1000. case 'g':
  1001. result = MBSTRG(http_input_identify_get);
  1002. break;
  1003. case 'P':
  1004. case 'p':
  1005. result = MBSTRG(http_input_identify_post);
  1006. break;
  1007. case 'C':
  1008. case 'c':
  1009. result = MBSTRG(http_input_identify_cookie);
  1010. break;
  1011. case 'S':
  1012. case 's':
  1013. result = MBSTRG(http_input_identify_string);
  1014. break;
  1015. case 'I':
  1016. case 'i':
  1017. array_init(return_value);
  1018. entry = MBSTRG(http_input_list);
  1019. n = MBSTRG(http_input_list_size);
  1020. while (n > 0) {
  1021. name = (char *)mbfl_no_encoding2name(*entry);
  1022. if (name) {
  1023. add_next_index_string(return_value, name, 1);
  1024. }
  1025. entry++;
  1026. n--;
  1027. }
  1028. retname = 0;
  1029. break;
  1030. case 'L':
  1031. case 'l':
  1032. entry = MBSTRG(http_input_list);
  1033. n = MBSTRG(http_input_list_size);
  1034. list = NULL;
  1035. while (n > 0) {
  1036. name = (char *)mbfl_no_encoding2name(*entry);
  1037. if (name) {
  1038. if (list) {
  1039. temp = list;
  1040. spprintf(&list, 0, "%s,%s", temp, name);
  1041. efree(temp);
  1042. if (!list) {
  1043. break;
  1044. }
  1045. } else {
  1046. list = estrdup(name);
  1047. }
  1048. }
  1049. entry++;
  1050. n--;
  1051. }
  1052. if (!list) {
  1053. RETURN_FALSE;
  1054. }
  1055. RETVAL_STRING(list, 0);
  1056. retname = 0;
  1057. break;
  1058. default:
  1059. result = MBSTRG(http_input_identify);
  1060. break;
  1061. }
  1062. }
  1063. if (retname) {
  1064. if (result != mbfl_no_encoding_invalid &&
  1065. (name = (char *)mbfl_no_encoding2name(result)) != NULL) {
  1066. RETVAL_STRING(name, 1);
  1067. } else {
  1068. RETVAL_FALSE;
  1069. }
  1070. }
  1071. }
  1072. /* }}} */
  1073. /* {{{ proto string mb_http_output([string encoding])
  1074. Sets the current output_encoding or returns the current output_encoding as a string */
  1075. PHP_FUNCTION(mb_http_output)
  1076. {
  1077. char *name = NULL;
  1078. int name_len;
  1079. enum mbfl_no_encoding no_encoding;
  1080. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
  1081. RETURN_FALSE;
  1082. }
  1083. if (name == NULL) {
  1084. name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding));
  1085. if (name != NULL) {
  1086. RETURN_STRING(name, 1);
  1087. } else {
  1088. RETURN_FALSE;
  1089. }
  1090. } else {
  1091. no_encoding = mbfl_name2no_encoding(name);
  1092. if (no_encoding == mbfl_no_encoding_invalid) {
  1093. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1094. RETURN_FALSE;
  1095. } else {
  1096. MBSTRG(current_http_output_encoding) = no_encoding;
  1097. RETURN_TRUE;
  1098. }
  1099. }
  1100. }
  1101. /* }}} */
  1102. /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
  1103. Sets the current detect_order or Return the current detect_order as a array */
  1104. PHP_FUNCTION(mb_detect_order)
  1105. {
  1106. zval **arg1;
  1107. int n, size;
  1108. enum mbfl_no_encoding *list, *entry;
  1109. char *name;
  1110. if (ZEND_NUM_ARGS() == 0) {
  1111. array_init(return_value);
  1112. entry = MBSTRG(current_detect_order_list);
  1113. n = MBSTRG(current_detect_order_list_size);
  1114. while (n > 0) {
  1115. name = (char *)mbfl_no_encoding2name(*entry);
  1116. if (name) {
  1117. add_next_index_string(return_value, name, 1);
  1118. }
  1119. entry++;
  1120. n--;
  1121. }
  1122. } else if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) {
  1123. list = NULL;
  1124. size = 0;
  1125. switch (Z_TYPE_PP(arg1)) {
  1126. case IS_ARRAY:
  1127. if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
  1128. if (list) {
  1129. efree(list);
  1130. }
  1131. RETURN_FALSE;
  1132. }
  1133. break;
  1134. default:
  1135. convert_to_string_ex(arg1);
  1136. if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
  1137. if (list) {
  1138. efree(list);
  1139. }
  1140. RETURN_FALSE;
  1141. }
  1142. break;
  1143. }
  1144. if (list == NULL) {
  1145. RETVAL_FALSE;
  1146. } else {
  1147. if (MBSTRG(current_detect_order_list)) {
  1148. efree(MBSTRG(current_detect_order_list));
  1149. }
  1150. MBSTRG(current_detect_order_list) = list;
  1151. MBSTRG(current_detect_order_list_size) = size;
  1152. RETVAL_TRUE;
  1153. }
  1154. } else {
  1155. WRONG_PARAM_COUNT;
  1156. }
  1157. }
  1158. /* }}} */
  1159. /* {{{ proto mixed mb_substitute_character([mixed substchar])
  1160. Sets the current substitute_character or returns the current substitute_character */
  1161. PHP_FUNCTION(mb_substitute_character)
  1162. {
  1163. zval **arg1;
  1164. if (ZEND_NUM_ARGS() == 0) {
  1165. if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
  1166. RETVAL_STRING("none", 1);
  1167. } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
  1168. RETVAL_STRING("long", 1);
  1169. } else {
  1170. RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
  1171. }
  1172. } else if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) {
  1173. RETVAL_TRUE;
  1174. switch (Z_TYPE_PP(arg1)) {
  1175. case IS_STRING:
  1176. if (strcasecmp("none", Z_STRVAL_PP(arg1)) == 0) {
  1177. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
  1178. } else if (strcasecmp("long", Z_STRVAL_PP(arg1)) == 0) {
  1179. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
  1180. } else {
  1181. convert_to_long_ex(arg1);
  1182. if (Z_LVAL_PP(arg1)< 0xffff && Z_LVAL_PP(arg1)> 0x0) {
  1183. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1184. MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
  1185. } else {
  1186. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
  1187. RETVAL_FALSE;
  1188. }
  1189. }
  1190. break;
  1191. default:
  1192. convert_to_long_ex(arg1);
  1193. if (Z_LVAL_PP(arg1)< 0xffff && Z_LVAL_PP(arg1)> 0x0) {
  1194. MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
  1195. MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
  1196. } else {
  1197. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
  1198. RETVAL_FALSE;
  1199. }
  1200. break;
  1201. }
  1202. } else {
  1203. WRONG_PARAM_COUNT;
  1204. }
  1205. }
  1206. /* }}} */
  1207. /* {{{ proto string mb_preferred_mime_name(string encoding)
  1208. Return the preferred MIME name (charset) as a string */
  1209. PHP_FUNCTION(mb_preferred_mime_name)
  1210. {
  1211. enum mbfl_no_encoding no_encoding;
  1212. char *name = NULL;
  1213. int name_len;
  1214. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
  1215. return;
  1216. } else {
  1217. no_encoding = mbfl_name2no_encoding(name);
  1218. if (no_encoding == mbfl_no_encoding_invalid) {
  1219. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
  1220. RETVAL_FALSE;
  1221. } else {
  1222. const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
  1223. if (preferred_name == NULL || *preferred_name == '\0') {
  1224. php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
  1225. RETVAL_FALSE;
  1226. } else {
  1227. RETVAL_STRING((char *)preferred_name, 1);
  1228. }
  1229. }
  1230. }
  1231. }
  1232. /* }}} */
  1233. #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
  1234. #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
  1235. /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
  1236. Parses GET/POST/COOKIE data and sets global variables */
  1237. PHP_FUNCTION(mb_parse_str)
  1238. {
  1239. zval *track_vars_array;
  1240. char *encstr = NULL;
  1241. int encstr_len;
  1242. php_mb_encoding_handler_info_t info;
  1243. enum mbfl_no_encoding detected;
  1244. track_vars_array = NULL;
  1245. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
  1246. return;
  1247. }
  1248. /* Clear out the array */
  1249. if (track_vars_array != NULL) {
  1250. zval_dtor(track_vars_array);
  1251. array_init(track_vars_array);
  1252. }
  1253. encstr = estrndup(encstr, encstr_len);
  1254. info.data_type = PARSE_STRING;
  1255. info.separator = PG(arg_separator).input;
  1256. info.force_register_globals = (track_vars_array == NULL);
  1257. info.report_errors = 1;
  1258. info.to_encoding = MBSTRG(current_internal_encoding);
  1259. info.to_language = MBSTRG(current_language);
  1260. info.from_encodings = MBSTRG(http_input_list);
  1261. info.num_from_encodings = MBSTRG(http_input_list_size);
  1262. info.from_language = MBSTRG(current_language);
  1263. detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
  1264. MBSTRG(http_input_identify) = detected;
  1265. RETVAL_BOOL(detected != mbfl_no_encoding_invalid);
  1266. if (encstr != NULL) efree(encstr);
  1267. }
  1268. /* }}} */
  1269. /* {{{ proto string mb_output_handler(string contents, int status)
  1270. Returns string in output buffer converted to the http_output encoding */
  1271. PHP_FUNCTION(mb_output_handler)
  1272. {
  1273. char *arg_string;
  1274. int arg_string_len;
  1275. long arg_status;
  1276. mbfl_string string, result;
  1277. const char *charset;
  1278. char *p;
  1279. enum mbfl_no_encoding encoding;
  1280. int last_feed, len;
  1281. unsigned char send_text_mimetype = 0;
  1282. char *s, *mimetype = NULL;
  1283. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
  1284. return;
  1285. }
  1286. encoding = MBSTRG(current_http_output_encoding);
  1287. /* start phase only */
  1288. if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
  1289. /* delete the converter just in case. */
  1290. if (MBSTRG(outconv)) {
  1291. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1292. MBSTRG(outconv) = NULL;
  1293. }
  1294. if (encoding == mbfl_no_encoding_pass) {
  1295. RETURN_STRINGL(arg_string, arg_string_len, 1);
  1296. }
  1297. /* analyze mime type */
  1298. if (SG(sapi_headers).mimetype &&
  1299. strncmp(SG(sapi_headers).mimetype, "text/", 5) == 0) {
  1300. if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
  1301. mimetype = estrdup(SG(sapi_headers).mimetype);
  1302. } else {
  1303. mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
  1304. }
  1305. send_text_mimetype = 1;
  1306. } else if (SG(sapi_headers).send_default_content_type) {
  1307. mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
  1308. }
  1309. /* if content-type is not yet set, set it and activate the converter */
  1310. if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
  1311. charset = mbfl_no2preferred_mime_name(encoding);
  1312. if (charset) {
  1313. len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
  1314. if (sapi_add_header(p, len, 0) != FAILURE) {
  1315. SG(sapi_headers).send_default_content_type = 0;
  1316. }
  1317. }
  1318. /* activate the converter */
  1319. MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
  1320. if (send_text_mimetype){
  1321. efree(mimetype);
  1322. }
  1323. }
  1324. }
  1325. /* just return if the converter is not activated. */
  1326. if (MBSTRG(outconv) == NULL) {
  1327. RETURN_STRINGL(arg_string, arg_string_len, 1);
  1328. }
  1329. /* flag */
  1330. last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
  1331. /* mode */
  1332. mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
  1333. mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
  1334. /* feed the string */
  1335. mbfl_string_init(&string);
  1336. string.no_language = MBSTRG(current_language);
  1337. string.no_encoding = MBSTRG(current_internal_encoding);
  1338. string.val = (unsigned char *)arg_string;
  1339. string.len = arg_string_len;
  1340. mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
  1341. if (last_feed) {
  1342. mbfl_buffer_converter_flush(MBSTRG(outconv));
  1343. }
  1344. /* get the converter output, and return it */
  1345. mbfl_buffer_converter_result(MBSTRG(outconv), &result);
  1346. RETVAL_STRINGL((char *)result.val, result.len, 0); /* the string is already strdup()'ed */
  1347. /* delete the converter if it is the last feed. */
  1348. if (last_feed) {
  1349. mbfl_buffer_converter_delete(MBSTRG(outconv));
  1350. MBSTRG(outconv) = NULL;
  1351. }
  1352. }
  1353. /* }}} */
  1354. /* {{{ proto int mb_strlen(string str [, string encoding])
  1355. Get character numbers of a string */
  1356. PHP_FUNCTION(mb_strlen)
  1357. {
  1358. int n;
  1359. mbfl_string string;
  1360. char *enc_name = NULL;
  1361. int enc_name_len;
  1362. mbfl_string_init(&string);
  1363. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
  1364. RETURN_FALSE;
  1365. }
  1366. string.no_language = MBSTRG(current_language);
  1367. if (enc_name == NULL) {
  1368. string.no_encoding = MBSTRG(current_internal_encoding);
  1369. } else {
  1370. string.no_encoding = mbfl_name2no_encoding(enc_name);
  1371. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1372. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1373. RETURN_FALSE;
  1374. }
  1375. }
  1376. n = mbfl_strlen(&string);
  1377. if (n >= 0) {
  1378. RETVAL_LONG(n);
  1379. } else {
  1380. RETVAL_FALSE;
  1381. }
  1382. }
  1383. /* }}} */
  1384. /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
  1385. Find position of first occurrence of a string within another */
  1386. PHP_FUNCTION(mb_strpos)
  1387. {
  1388. int n, reverse = 0;
  1389. long offset;
  1390. mbfl_string haystack, needle;
  1391. char *enc_name = NULL;
  1392. int enc_name_len;
  1393. mbfl_string_init(&haystack);
  1394. mbfl_string_init(&needle);
  1395. haystack.no_language = MBSTRG(current_language);
  1396. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1397. needle.no_language = MBSTRG(current_language);
  1398. needle.no_encoding = MBSTRG(current_internal_encoding);
  1399. offset = 0;
  1400. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
  1401. RETURN_FALSE;
  1402. }
  1403. if (enc_name != NULL) {
  1404. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1405. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1406. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1407. RETURN_FALSE;
  1408. }
  1409. }
  1410. if (offset < 0 || (unsigned long)offset > haystack.len) {
  1411. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is out of range");
  1412. RETURN_FALSE;
  1413. }
  1414. if (needle.len == 0) {
  1415. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty needle");
  1416. RETURN_FALSE;
  1417. }
  1418. n = mbfl_strpos(&haystack, &needle, offset, reverse);
  1419. if (n >= 0) {
  1420. RETVAL_LONG(n);
  1421. } else {
  1422. switch (-n) {
  1423. case 1:
  1424. break;
  1425. case 2:
  1426. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length.");
  1427. break;
  1428. case 4:
  1429. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error.");
  1430. break;
  1431. case 8:
  1432. php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty.");
  1433. break;
  1434. default:
  1435. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos.");
  1436. break;
  1437. }
  1438. RETVAL_FALSE;
  1439. }
  1440. }
  1441. /* }}} */
  1442. /* {{{ proto int mb_strrpos(string haystack, string needle [, string encoding])
  1443. Find the last occurrence of a character in a string within another */
  1444. PHP_FUNCTION(mb_strrpos)
  1445. {
  1446. int n;
  1447. mbfl_string haystack, needle;
  1448. char *enc_name = NULL;
  1449. int enc_name_len;
  1450. mbfl_string_init(&haystack);
  1451. mbfl_string_init(&needle);
  1452. haystack.no_language = MBSTRG(current_language);
  1453. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1454. needle.no_language = MBSTRG(current_language);
  1455. needle.no_encoding = MBSTRG(current_internal_encoding);
  1456. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
  1457. RETURN_FALSE;
  1458. }
  1459. if (enc_name != NULL) {
  1460. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1461. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1462. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1463. RETURN_FALSE;
  1464. }
  1465. }
  1466. if (haystack.len <= 0) {
  1467. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty haystack");
  1468. RETURN_FALSE;
  1469. }
  1470. if (needle.len <= 0) {
  1471. php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty needle");
  1472. RETURN_FALSE;
  1473. }
  1474. n = mbfl_strpos(&haystack, &needle, 0, 1);
  1475. if (n >= 0) {
  1476. RETVAL_LONG(n);
  1477. } else {
  1478. RETVAL_FALSE;
  1479. }
  1480. }
  1481. /* }}} */
  1482. /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
  1483. Count the number of substring occurrences */
  1484. PHP_FUNCTION(mb_substr_count)
  1485. {
  1486. int n;
  1487. mbfl_string haystack, needle;
  1488. char *enc_name = NULL;
  1489. int enc_name_len;
  1490. mbfl_string_init(&haystack);
  1491. mbfl_string_init(&needle);
  1492. haystack.no_language = MBSTRG(current_language);
  1493. haystack.no_encoding = MBSTRG(current_internal_encoding);
  1494. needle.no_language = MBSTRG(current_language);
  1495. needle.no_encoding = MBSTRG(current_internal_encoding);
  1496. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
  1497. return;
  1498. }
  1499. if (enc_name != NULL) {
  1500. haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
  1501. if (haystack.no_encoding == mbfl_no_encoding_invalid) {
  1502. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1503. RETURN_FALSE;
  1504. }
  1505. }
  1506. if (needle.len <= 0) {
  1507. php_error_docref(NULL TSRMLS_CC, E_WARNING,"Empty needle");
  1508. RETURN_FALSE;
  1509. }
  1510. n = mbfl_substr_count(&haystack, &needle);
  1511. if (n >= 0) {
  1512. RETVAL_LONG(n);
  1513. } else {
  1514. RETVAL_FALSE;
  1515. }
  1516. }
  1517. /* }}} */
  1518. /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
  1519. Returns part of a string */
  1520. PHP_FUNCTION(mb_substr)
  1521. {
  1522. zval **arg1, **arg2, **arg3, **arg4;
  1523. int argc, from, len, mblen;
  1524. mbfl_string string, result, *ret;
  1525. mbfl_string_init(&string);
  1526. string.no_language = MBSTRG(current_language);
  1527. string.no_encoding = MBSTRG(current_internal_encoding);
  1528. argc = ZEND_NUM_ARGS();
  1529. switch (argc) {
  1530. case 2:
  1531. if (zend_get_parameters_ex(2, &arg1, &arg2) == FAILURE) {
  1532. WRONG_PARAM_COUNT;
  1533. }
  1534. break;
  1535. case 3:
  1536. if (zend_get_parameters_ex(3, &arg1, &arg2, &arg3) == FAILURE) {
  1537. WRONG_PARAM_COUNT;
  1538. }
  1539. break;
  1540. case 4:
  1541. if (zend_get_parameters_ex(4, &arg1, &arg2, &arg3, &arg4) == FAILURE) {
  1542. WRONG_PARAM_COUNT;
  1543. }
  1544. convert_to_string_ex(arg4);
  1545. string.no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg4));
  1546. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1547. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg4));
  1548. RETURN_FALSE;
  1549. }
  1550. break;
  1551. default:
  1552. WRONG_PARAM_COUNT;
  1553. }
  1554. convert_to_string_ex(arg1);
  1555. string.val = (unsigned char *)Z_STRVAL_PP(arg1);
  1556. string.len = Z_STRLEN_PP(arg1);
  1557. convert_to_long_ex(arg2);
  1558. from = Z_LVAL_PP(arg2);
  1559. if (argc >= 3) {
  1560. convert_to_long_ex(arg3);
  1561. len = Z_LVAL_PP(arg3);
  1562. } else {
  1563. len = Z_STRLEN_PP(arg1);
  1564. }
  1565. /* measures length */
  1566. mblen = 0;
  1567. if (from < 0 || len < 0) {
  1568. mblen = mbfl_strlen(&string);
  1569. }
  1570. /* if "from" position is negative, count start position from the end
  1571. * of the string
  1572. */
  1573. if (from < 0) {
  1574. from = mblen + from;
  1575. if (from < 0) {
  1576. from = 0;
  1577. }
  1578. }
  1579. /* if "length" position is negative, set it to the length
  1580. * needed to stop that many chars from the end of the string
  1581. */
  1582. if (len < 0) {
  1583. len = (mblen - from) + len;
  1584. if (len < 0) {
  1585. len = 0;
  1586. }
  1587. }
  1588. ret = mbfl_substr(&string, &result, from, len);
  1589. if (ret != NULL) {
  1590. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  1591. } else {
  1592. RETVAL_FALSE;
  1593. }
  1594. }
  1595. /* }}} */
  1596. /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
  1597. Returns part of a string */
  1598. PHP_FUNCTION(mb_strcut)
  1599. {
  1600. pval **arg1, **arg2, **arg3, **arg4;
  1601. int argc, from, len;
  1602. mbfl_string string, result, *ret;
  1603. mbfl_string_init(&string);
  1604. string.no_language = MBSTRG(current_language);
  1605. string.no_encoding = MBSTRG(current_internal_encoding);
  1606. argc = ZEND_NUM_ARGS();
  1607. switch (argc) {
  1608. case 2:
  1609. if (zend_get_parameters_ex(2, &arg1, &arg2) == FAILURE) {
  1610. WRONG_PARAM_COUNT;
  1611. }
  1612. break;
  1613. case 3:
  1614. if (zend_get_parameters_ex(3, &arg1, &arg2, &arg3) == FAILURE) {
  1615. WRONG_PARAM_COUNT;
  1616. }
  1617. break;
  1618. case 4:
  1619. if (zend_get_parameters_ex(4, &arg1, &arg2, &arg3, &arg4) == FAILURE) {
  1620. WRONG_PARAM_COUNT;
  1621. }
  1622. convert_to_string_ex(arg4);
  1623. string.no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg4));
  1624. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1625. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg4));
  1626. RETURN_FALSE;
  1627. }
  1628. break;
  1629. default:
  1630. WRONG_PARAM_COUNT;
  1631. }
  1632. convert_to_string_ex(arg1);
  1633. string.val = Z_STRVAL_PP(arg1);
  1634. string.len = Z_STRLEN_PP(arg1);
  1635. convert_to_long_ex(arg2);
  1636. from = Z_LVAL_PP(arg2);
  1637. if (argc >= 3) {
  1638. convert_to_long_ex(arg3);
  1639. len = Z_LVAL_PP(arg3);
  1640. } else {
  1641. len = Z_STRLEN_PP(arg1);
  1642. }
  1643. /* if "from" position is negative, count start position from the end
  1644. * of the string
  1645. */
  1646. if (from < 0) {
  1647. from = Z_STRLEN_PP(arg1) + from;
  1648. if (from < 0) {
  1649. from = 0;
  1650. }
  1651. }
  1652. /* if "length" position is negative, set it to the length
  1653. * needed to stop that many chars from the end of the string
  1654. */
  1655. if (len < 0) {
  1656. len = (Z_STRLEN_PP(arg1) - from) + len;
  1657. if (len < 0) {
  1658. len = 0;
  1659. }
  1660. }
  1661. ret = mbfl_strcut(&string, &result, from, len);
  1662. if (ret != NULL) {
  1663. RETVAL_STRINGL(ret->val, ret->len, 0); /* the string is already strdup()'ed */
  1664. } else {
  1665. RETVAL_FALSE;
  1666. }
  1667. }
  1668. /* }}} */
  1669. /* {{{ proto int mb_strwidth(string str [, string encoding])
  1670. Gets terminal width of a string */
  1671. PHP_FUNCTION(mb_strwidth)
  1672. {
  1673. int n;
  1674. mbfl_string string;
  1675. char *enc_name = NULL;
  1676. int enc_name_len;
  1677. mbfl_string_init(&string);
  1678. string.no_language = MBSTRG(current_language);
  1679. string.no_encoding = MBSTRG(current_internal_encoding);
  1680. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
  1681. return;
  1682. }
  1683. if (enc_name != NULL) {
  1684. string.no_encoding = mbfl_name2no_encoding(enc_name);
  1685. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1686. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
  1687. RETURN_FALSE;
  1688. }
  1689. }
  1690. n = mbfl_strwidth(&string);
  1691. if (n >= 0) {
  1692. RETVAL_LONG(n);
  1693. } else {
  1694. RETVAL_FALSE;
  1695. }
  1696. }
  1697. /* }}} */
  1698. /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
  1699. Trim the string in terminal width */
  1700. PHP_FUNCTION(mb_strimwidth)
  1701. {
  1702. pval **arg1, **arg2, **arg3, **arg4, **arg5;
  1703. int from, width;
  1704. mbfl_string string, result, marker, *ret;
  1705. mbfl_string_init(&string);
  1706. mbfl_string_init(&marker);
  1707. string.no_language = MBSTRG(current_language);
  1708. string.no_encoding = MBSTRG(current_internal_encoding);
  1709. marker.no_language = MBSTRG(current_language);
  1710. marker.no_encoding = MBSTRG(current_internal_encoding);
  1711. marker.val = NULL;
  1712. marker.len = 0;
  1713. switch (ZEND_NUM_ARGS()) {
  1714. case 3:
  1715. if (zend_get_parameters_ex(3, &arg1, &arg2, &arg3) == FAILURE) {
  1716. WRONG_PARAM_COUNT;
  1717. }
  1718. break;
  1719. case 4:
  1720. if (zend_get_parameters_ex(4, &arg1, &arg2, &arg3, &arg4) == FAILURE) {
  1721. WRONG_PARAM_COUNT;
  1722. }
  1723. break;
  1724. case 5:
  1725. if (zend_get_parameters_ex(5, &arg1, &arg2, &arg3, &arg4, &arg5) == FAILURE) {
  1726. WRONG_PARAM_COUNT;
  1727. }
  1728. convert_to_string_ex(arg5);
  1729. string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg5));
  1730. if (string.no_encoding == mbfl_no_encoding_invalid) {
  1731. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg5));
  1732. RETURN_FALSE;
  1733. }
  1734. break;
  1735. default:
  1736. WRONG_PARAM_COUNT;
  1737. }
  1738. convert_to_string_ex(arg1);
  1739. string.val = (unsigned char *)Z_STRVAL_PP(arg1);
  1740. string.len = Z_STRLEN_PP(arg1);
  1741. convert_to_long_ex(arg2);
  1742. from = Z_LVAL_PP(arg2);
  1743. if (from < 0 || from > Z_STRLEN_PP(arg1)) {
  1744. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of reange");
  1745. RETURN_FALSE;
  1746. }
  1747. convert_to_long_ex(arg3);
  1748. width = Z_LVAL_PP(arg3);
  1749. if (width < 0) {
  1750. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
  1751. RETURN_FALSE;
  1752. }
  1753. if (ZEND_NUM_ARGS() >= 4) {
  1754. convert_to_string_ex(arg4);
  1755. marker.val = (unsigned char *)Z_STRVAL_PP(arg4);
  1756. marker.len = Z_STRLEN_PP(arg4);
  1757. }
  1758. ret = mbfl_strimwidth(&string, &marker, &result, from, width);
  1759. if (ret != NULL) {
  1760. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  1761. } else {
  1762. RETVAL_FALSE;
  1763. }
  1764. }
  1765. /* }}} */
  1766. /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
  1767. MBSTRING_API char * php_mb_convert_encoding(char *input, size_t length, char *_to_encoding, char *_from_encodings, size_t *output_len TSRMLS_DC)
  1768. {
  1769. mbfl_string string, result, *ret;
  1770. enum mbfl_no_encoding from_encoding, to_encoding;
  1771. mbfl_buffer_converter *convd;
  1772. int size, *list;
  1773. char *output=NULL;
  1774. if (output_len) {
  1775. *output_len = 0;
  1776. }
  1777. if (!input) {
  1778. return NULL;
  1779. }
  1780. /* new encoding */
  1781. if (_to_encoding && strlen(_to_encoding)) {
  1782. to_encoding = mbfl_name2no_encoding(_to_encoding);
  1783. if (to_encoding == mbfl_no_encoding_invalid) {
  1784. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
  1785. return NULL;
  1786. }
  1787. } else {
  1788. to_encoding = MBSTRG(current_internal_encoding);
  1789. }
  1790. /* initialize string */
  1791. mbfl_string_init(&string);
  1792. mbfl_string_init(&result);
  1793. from_encoding = MBSTRG(current_internal_encoding);
  1794. string.no_encoding = from_encoding;
  1795. string.no_language = MBSTRG(current_language);
  1796. string.val = (unsigned char *)input;
  1797. string.len = length;
  1798. /* pre-conversion encoding */
  1799. if (_from_encodings) {
  1800. list = NULL;
  1801. size = 0;
  1802. php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
  1803. if (size == 1) {
  1804. from_encoding = *list;
  1805. string.no_encoding = from_encoding;
  1806. } else if (size > 1) {
  1807. /* auto detect */
  1808. from_encoding = mbfl_identify_encoding_no(&string, list, size);
  1809. if (from_encoding != mbfl_no_encoding_invalid) {
  1810. string.no_encoding = from_encoding;
  1811. } else {
  1812. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
  1813. from_encoding = mbfl_no_encoding_pass;
  1814. to_encoding = from_encoding;
  1815. string.no_encoding = from_encoding;
  1816. }
  1817. } else {
  1818. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
  1819. }
  1820. if (list != NULL) {
  1821. efree((void *)list);
  1822. }
  1823. }
  1824. /* initialize converter */
  1825. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  1826. if (convd == NULL) {
  1827. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
  1828. return NULL;
  1829. }
  1830. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  1831. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  1832. /* do it */
  1833. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  1834. if (ret) {
  1835. if (output_len) {
  1836. *output_len = ret->len;
  1837. }
  1838. output = (char *)ret->val;
  1839. }
  1840. mbfl_buffer_converter_delete(convd);
  1841. return output;
  1842. }
  1843. /* }}} */
  1844. /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
  1845. Returns converted string in desired encoding */
  1846. PHP_FUNCTION(mb_convert_encoding)
  1847. {
  1848. pval **arg_str, **arg_new, **arg_old;
  1849. int i;
  1850. size_t size, l, n;
  1851. char *_from_encodings, *ret, *s_free = NULL;
  1852. zval **hash_entry;
  1853. HashTable *target_hash;
  1854. _from_encodings = NULL;
  1855. if (ZEND_NUM_ARGS() == 2) {
  1856. if (zend_get_parameters_ex(2, &arg_str, &arg_new) == FAILURE) {
  1857. WRONG_PARAM_COUNT;
  1858. }
  1859. } else if (ZEND_NUM_ARGS() == 3) {
  1860. if (zend_get_parameters_ex(3, &arg_str, &arg_new, &arg_old) == FAILURE) {
  1861. WRONG_PARAM_COUNT;
  1862. }
  1863. switch (Z_TYPE_PP(arg_old)) {
  1864. case IS_ARRAY:
  1865. target_hash = Z_ARRVAL_PP(arg_old);
  1866. zend_hash_internal_pointer_reset(target_hash);
  1867. i = zend_hash_num_elements(target_hash);
  1868. _from_encodings = NULL;
  1869. while (i > 0) {
  1870. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  1871. break;
  1872. }
  1873. convert_to_string_ex(hash_entry);
  1874. if ( _from_encodings) {
  1875. l = strlen(_from_encodings);
  1876. n = strlen(Z_STRVAL_PP(hash_entry));
  1877. _from_encodings = erealloc(_from_encodings, l+n+2);
  1878. strcpy(_from_encodings+l,",");
  1879. strcpy(_from_encodings+l+1,Z_STRVAL_PP(hash_entry));
  1880. } else {
  1881. _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
  1882. }
  1883. zend_hash_move_forward(target_hash);
  1884. i--;
  1885. }
  1886. if (_from_encodings != NULL && !strlen(_from_encodings)) {
  1887. efree(_from_encodings);
  1888. _from_encodings = NULL;
  1889. }
  1890. s_free = _from_encodings;
  1891. break;
  1892. default:
  1893. convert_to_string_ex(arg_old);
  1894. _from_encodings = Z_STRVAL_PP(arg_old);
  1895. break;
  1896. }
  1897. } else {
  1898. WRONG_PARAM_COUNT;
  1899. }
  1900. /* new encoding */
  1901. convert_to_string_ex(arg_str);
  1902. convert_to_string_ex(arg_new);
  1903. ret = php_mb_convert_encoding( Z_STRVAL_PP(arg_str), Z_STRLEN_PP(arg_str), Z_STRVAL_PP(arg_new), _from_encodings, &size TSRMLS_CC);
  1904. if (ret != NULL) {
  1905. RETVAL_STRINGL(ret, size, 0); /* the string is already strdup()'ed */
  1906. } else {
  1907. RETVAL_FALSE;
  1908. }
  1909. if ( s_free) {
  1910. efree(s_free);
  1911. }
  1912. }
  1913. /* }}} */
  1914. /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
  1915. Returns a case-folded version of sourcestring */
  1916. PHP_FUNCTION(mb_convert_case)
  1917. {
  1918. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1919. int str_len, from_encoding_len;
  1920. long case_mode = 0;
  1921. char *newstr;
  1922. size_t ret_len;
  1923. RETVAL_FALSE;
  1924. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
  1925. &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
  1926. RETURN_FALSE;
  1927. newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  1928. if (newstr) {
  1929. RETVAL_STRINGL(newstr, ret_len, 0);
  1930. }
  1931. }
  1932. /* }}} */
  1933. /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
  1934. * Returns a uppercased version of sourcestring
  1935. */
  1936. PHP_FUNCTION(mb_strtoupper)
  1937. {
  1938. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1939. int str_len, from_encoding_len;
  1940. char *newstr;
  1941. size_t ret_len;
  1942. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
  1943. &from_encoding, &from_encoding_len) == FAILURE) {
  1944. return;
  1945. }
  1946. newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  1947. if (newstr) {
  1948. RETURN_STRINGL(newstr, ret_len, 0);
  1949. }
  1950. RETURN_FALSE;
  1951. }
  1952. /* }}} */
  1953. /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
  1954. * Returns a lowercased version of sourcestring
  1955. */
  1956. PHP_FUNCTION(mb_strtolower)
  1957. {
  1958. char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
  1959. int str_len, from_encoding_len;
  1960. char *newstr;
  1961. size_t ret_len;
  1962. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
  1963. &from_encoding, &from_encoding_len) == FAILURE) {
  1964. return;
  1965. }
  1966. newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
  1967. if (newstr) {
  1968. RETURN_STRINGL(newstr, ret_len, 0);
  1969. }
  1970. RETURN_FALSE;
  1971. }
  1972. /* }}} */
  1973. /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
  1974. Encodings of the given string is returned (as a string) */
  1975. PHP_FUNCTION(mb_detect_encoding)
  1976. {
  1977. pval **arg_str, **arg_list, **arg_strict;
  1978. mbfl_string string;
  1979. const char *ret;
  1980. enum mbfl_no_encoding *elist;
  1981. int size, *list, strict = 0;
  1982. if (ZEND_NUM_ARGS() == 1) {
  1983. if (zend_get_parameters_ex(1, &arg_str) == FAILURE) {
  1984. WRONG_PARAM_COUNT;
  1985. }
  1986. } else if (ZEND_NUM_ARGS() == 2) {
  1987. if (zend_get_parameters_ex(2, &arg_str, &arg_list) == FAILURE) {
  1988. WRONG_PARAM_COUNT;
  1989. }
  1990. } else if (ZEND_NUM_ARGS() == 3) {
  1991. if (zend_get_parameters_ex(3, &arg_str, &arg_list, &arg_strict) == FAILURE) {
  1992. WRONG_PARAM_COUNT;
  1993. }
  1994. } else {
  1995. WRONG_PARAM_COUNT;
  1996. }
  1997. /* make encoding list */
  1998. list = NULL;
  1999. size = 0;
  2000. if (ZEND_NUM_ARGS() >= 2 && Z_STRVAL_PP(arg_list)) {
  2001. switch (Z_TYPE_PP(arg_list)) {
  2002. case IS_ARRAY:
  2003. if (!php_mb_parse_encoding_array(*arg_list, &list, &size, 0 TSRMLS_CC)) {
  2004. if (list) {
  2005. efree(list);
  2006. size = 0;
  2007. }
  2008. }
  2009. break;
  2010. default:
  2011. convert_to_string_ex(arg_list);
  2012. if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg_list), Z_STRLEN_PP(arg_list), &list, &size, 0 TSRMLS_CC)) {
  2013. if (list) {
  2014. efree(list);
  2015. size = 0;
  2016. }
  2017. }
  2018. break;
  2019. }
  2020. if (size <= 0) {
  2021. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
  2022. }
  2023. }
  2024. if (ZEND_NUM_ARGS() == 3) {
  2025. convert_to_long_ex(arg_strict);
  2026. strict = Z_LVAL_PP(arg_strict);
  2027. }
  2028. if (size > 0 && list != NULL) {
  2029. elist = list;
  2030. } else {
  2031. elist = MBSTRG(current_detect_order_list);
  2032. size = MBSTRG(current_detect_order_list_size);
  2033. }
  2034. convert_to_string_ex(arg_str);
  2035. mbfl_string_init(&string);
  2036. string.no_language = MBSTRG(current_language);
  2037. string.val = (unsigned char *)Z_STRVAL_PP(arg_str);
  2038. string.len = Z_STRLEN_PP(arg_str);
  2039. ret = mbfl_identify_encoding_name(&string, elist, size, strict);
  2040. if (list != NULL) {
  2041. efree((void *)list);
  2042. }
  2043. if (ret != NULL) {
  2044. RETVAL_STRING((char *)ret, 1);
  2045. } else {
  2046. RETVAL_FALSE;
  2047. }
  2048. }
  2049. /* }}} */
  2050. /* {{{ proto array mb_list_encodings()
  2051. Returns an array of all supported encodings */
  2052. PHP_FUNCTION(mb_list_encodings)
  2053. {
  2054. const mbfl_encoding **encodings;
  2055. const mbfl_encoding *encoding;
  2056. int i;
  2057. array_init(return_value);
  2058. i = 0;
  2059. encodings = mbfl_get_supported_encodings();
  2060. while ((encoding = encodings[i++]) != NULL) {
  2061. add_next_index_string(return_value, (char *) encoding->name, 1);
  2062. }
  2063. }
  2064. /* }}} */
  2065. /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed]]])
  2066. Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
  2067. PHP_FUNCTION(mb_encode_mimeheader)
  2068. {
  2069. enum mbfl_no_encoding charset, transenc;
  2070. mbfl_string string, result, *ret;
  2071. char *charset_name = NULL;
  2072. int charset_name_len;
  2073. char *trans_enc_name = NULL;
  2074. int trans_enc_name_len;
  2075. char *linefeed = "\r\n";
  2076. int linefeed_len;
  2077. mbfl_string_init(&string);
  2078. string.no_language = MBSTRG(current_language);
  2079. string.no_encoding = MBSTRG(current_internal_encoding);
  2080. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sss", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len) == FAILURE) {
  2081. return;
  2082. }
  2083. charset = mbfl_no_encoding_pass;
  2084. transenc = mbfl_no_encoding_base64;
  2085. if (charset_name != NULL) {
  2086. charset = mbfl_name2no_encoding(charset_name);
  2087. if (charset == mbfl_no_encoding_invalid) {
  2088. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
  2089. RETURN_FALSE;
  2090. }
  2091. } else {
  2092. const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language));
  2093. if (lang != NULL) {
  2094. charset = lang->mail_charset;
  2095. transenc = lang->mail_header_encoding;
  2096. }
  2097. }
  2098. if (trans_enc_name != NULL) {
  2099. if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
  2100. transenc = mbfl_no_encoding_base64;
  2101. } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
  2102. transenc = mbfl_no_encoding_qprint;
  2103. }
  2104. }
  2105. mbfl_string_init(&result);
  2106. ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, 0);
  2107. if (ret != NULL) {
  2108. RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
  2109. } else {
  2110. RETVAL_FALSE;
  2111. }
  2112. }
  2113. /* }}} */
  2114. /* {{{ proto string mb_decode_mimeheader(string string)
  2115. Decodes the MIME "encoded-word" in the string */
  2116. PHP_FUNCTION(mb_decode_mimeheader)
  2117. {
  2118. mbfl_string string, result, *ret;
  2119. mbfl_string_init(&string);
  2120. string.no_language = MBSTRG(current_language);
  2121. string.no_encoding = MBSTRG(current_internal_encoding);
  2122. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
  2123. return;
  2124. }
  2125. mbfl_string_init(&result);
  2126. ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
  2127. if (ret != NULL) {
  2128. RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
  2129. } else {
  2130. RETVAL_FALSE;
  2131. }
  2132. }
  2133. /* }}} */
  2134. /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
  2135. Conversion between full-width character and half-width character (Japanese) */
  2136. PHP_FUNCTION(mb_convert_kana)
  2137. {
  2138. int opt, i;
  2139. mbfl_string string, result, *ret;
  2140. char *optstr = NULL;
  2141. int optstr_len;
  2142. char *encname = NULL;
  2143. int encname_len;
  2144. mbfl_string_init(&string);
  2145. string.no_language = MBSTRG(current_language);
  2146. string.no_encoding = MBSTRG(current_internal_encoding);
  2147. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
  2148. return;
  2149. }
  2150. /* option */
  2151. if (optstr != NULL) {
  2152. char *p = optstr;
  2153. int n = optstr_len;
  2154. i = 0;
  2155. opt = 0;
  2156. while (i < n) {
  2157. i++;
  2158. switch (*p++) {
  2159. case 'A':
  2160. opt |= 0x1;
  2161. break;
  2162. case 'a':
  2163. opt |= 0x10;
  2164. break;
  2165. case 'R':
  2166. opt |= 0x2;
  2167. break;
  2168. case 'r':
  2169. opt |= 0x20;
  2170. break;
  2171. case 'N':
  2172. opt |= 0x4;
  2173. break;
  2174. case 'n':
  2175. opt |= 0x40;
  2176. break;
  2177. case 'S':
  2178. opt |= 0x8;
  2179. break;
  2180. case 's':
  2181. opt |= 0x80;
  2182. break;
  2183. case 'K':
  2184. opt |= 0x100;
  2185. break;
  2186. case 'k':
  2187. opt |= 0x1000;
  2188. break;
  2189. case 'H':
  2190. opt |= 0x200;
  2191. break;
  2192. case 'h':
  2193. opt |= 0x2000;
  2194. break;
  2195. case 'V':
  2196. opt |= 0x800;
  2197. break;
  2198. case 'C':
  2199. opt |= 0x10000;
  2200. break;
  2201. case 'c':
  2202. opt |= 0x20000;
  2203. break;
  2204. case 'M':
  2205. opt |= 0x100000;
  2206. break;
  2207. case 'm':
  2208. opt |= 0x200000;
  2209. break;
  2210. }
  2211. }
  2212. } else {
  2213. opt = 0x900;
  2214. }
  2215. /* encoding */
  2216. if (encname != NULL) {
  2217. string.no_encoding = mbfl_name2no_encoding(encname);
  2218. if (string.no_encoding == mbfl_no_encoding_invalid) {
  2219. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
  2220. RETURN_FALSE;
  2221. }
  2222. }
  2223. ret = mbfl_ja_jp_hantozen(&string, &result, opt);
  2224. if (ret != NULL) {
  2225. RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
  2226. } else {
  2227. RETVAL_FALSE;
  2228. }
  2229. }
  2230. /* }}} */
  2231. #define PHP_MBSTR_STACK_BLOCK_SIZE 32
  2232. /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding [, mixed ...])
  2233. Converts the string resource in variables to desired encoding */
  2234. PHP_FUNCTION(mb_convert_variables)
  2235. {
  2236. pval ***args, ***stack, **var, **hash_entry;
  2237. HashTable *target_hash;
  2238. mbfl_string string, result, *ret;
  2239. enum mbfl_no_encoding from_encoding, to_encoding;
  2240. mbfl_encoding_detector *identd;
  2241. mbfl_buffer_converter *convd;
  2242. int n, argc, stack_level, stack_max, elistsz;
  2243. enum mbfl_no_encoding *elist;
  2244. char *name;
  2245. void *ptmp;
  2246. argc = ZEND_NUM_ARGS();
  2247. if (argc < 3) {
  2248. WRONG_PARAM_COUNT;
  2249. }
  2250. args = (pval ***)ecalloc(argc, sizeof(pval **));
  2251. if (zend_get_parameters_array_ex(argc, args) == FAILURE) {
  2252. efree((void *)args);
  2253. WRONG_PARAM_COUNT;
  2254. }
  2255. /* new encoding */
  2256. convert_to_string_ex(args[0]);
  2257. to_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(args[0]));
  2258. if (to_encoding == mbfl_no_encoding_invalid) {
  2259. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(args[0]));
  2260. efree((void *)args);
  2261. RETURN_FALSE;
  2262. }
  2263. /* initialize string */
  2264. mbfl_string_init(&string);
  2265. mbfl_string_init(&result);
  2266. from_encoding = MBSTRG(current_internal_encoding);
  2267. string.no_encoding = from_encoding;
  2268. string.no_language = MBSTRG(current_language);
  2269. /* pre-conversion encoding */
  2270. elist = NULL;
  2271. elistsz = 0;
  2272. switch (Z_TYPE_PP(args[1])) {
  2273. case IS_ARRAY:
  2274. php_mb_parse_encoding_array(*args[1], &elist, &elistsz, 0 TSRMLS_CC);
  2275. break;
  2276. default:
  2277. convert_to_string_ex(args[1]);
  2278. php_mb_parse_encoding_list(Z_STRVAL_PP(args[1]), Z_STRLEN_PP(args[1]), &elist, &elistsz, 0 TSRMLS_CC);
  2279. break;
  2280. }
  2281. if (elistsz <= 0) {
  2282. from_encoding = mbfl_no_encoding_pass;
  2283. } else if (elistsz == 1) {
  2284. from_encoding = *elist;
  2285. } else {
  2286. /* auto detect */
  2287. from_encoding = mbfl_no_encoding_invalid;
  2288. stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
  2289. stack = (pval ***)safe_emalloc(stack_max, sizeof(pval **), 0);
  2290. stack_level = 0;
  2291. identd = mbfl_encoding_detector_new(elist, elistsz);
  2292. if (identd != NULL) {
  2293. n = 2;
  2294. while (n < argc || stack_level > 0) {
  2295. if (stack_level <= 0) {
  2296. var = args[n++];
  2297. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2298. target_hash = HASH_OF(*var);
  2299. if (target_hash != NULL) {
  2300. zend_hash_internal_pointer_reset(target_hash);
  2301. }
  2302. }
  2303. } else {
  2304. stack_level--;
  2305. var = stack[stack_level];
  2306. }
  2307. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2308. target_hash = HASH_OF(*var);
  2309. if (target_hash != NULL) {
  2310. while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
  2311. zend_hash_move_forward(target_hash);
  2312. if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
  2313. if (stack_level >= stack_max) {
  2314. stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
  2315. ptmp = erealloc(stack, sizeof(pval **)*stack_max);
  2316. stack = (pval ***)ptmp;
  2317. }
  2318. stack[stack_level] = var;
  2319. stack_level++;
  2320. var = hash_entry;
  2321. target_hash = HASH_OF(*var);
  2322. if (target_hash != NULL) {
  2323. zend_hash_internal_pointer_reset(target_hash);
  2324. continue;
  2325. }
  2326. } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
  2327. string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
  2328. string.len = Z_STRLEN_PP(hash_entry);
  2329. if (mbfl_encoding_detector_feed(identd, &string)) {
  2330. goto detect_end; /* complete detecting */
  2331. }
  2332. }
  2333. }
  2334. }
  2335. } else if (Z_TYPE_PP(var) == IS_STRING) {
  2336. string.val = (unsigned char *)Z_STRVAL_PP(var);
  2337. string.len = Z_STRLEN_PP(var);
  2338. if (mbfl_encoding_detector_feed(identd, &string)) {
  2339. goto detect_end; /* complete detecting */
  2340. }
  2341. }
  2342. }
  2343. detect_end:
  2344. from_encoding = mbfl_encoding_detector_judge(identd);
  2345. mbfl_encoding_detector_delete(identd);
  2346. }
  2347. efree(stack);
  2348. if (from_encoding == mbfl_no_encoding_invalid) {
  2349. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
  2350. from_encoding = mbfl_no_encoding_pass;
  2351. }
  2352. }
  2353. if (elist != NULL) {
  2354. efree((void *)elist);
  2355. }
  2356. /* create converter */
  2357. convd = NULL;
  2358. if (from_encoding != mbfl_no_encoding_pass) {
  2359. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
  2360. if (convd == NULL) {
  2361. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
  2362. RETURN_FALSE;
  2363. }
  2364. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  2365. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  2366. }
  2367. /* convert */
  2368. if (convd != NULL) {
  2369. stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
  2370. stack = (pval ***)safe_emalloc(stack_max, sizeof(pval **), 0);
  2371. stack_level = 0;
  2372. n = 2;
  2373. while (n < argc || stack_level > 0) {
  2374. if (stack_level <= 0) {
  2375. var = args[n++];
  2376. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2377. target_hash = HASH_OF(*var);
  2378. if (target_hash != NULL) {
  2379. zend_hash_internal_pointer_reset(target_hash);
  2380. }
  2381. }
  2382. } else {
  2383. stack_level--;
  2384. var = stack[stack_level];
  2385. }
  2386. if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
  2387. target_hash = HASH_OF(*var);
  2388. if (target_hash != NULL) {
  2389. while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
  2390. zend_hash_move_forward(target_hash);
  2391. if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
  2392. if (stack_level >= stack_max) {
  2393. stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
  2394. ptmp = erealloc(stack, sizeof(pval **)*stack_max);
  2395. stack = (pval ***)ptmp;
  2396. }
  2397. stack[stack_level] = var;
  2398. stack_level++;
  2399. var = hash_entry;
  2400. SEPARATE_ZVAL(hash_entry);
  2401. target_hash = HASH_OF(*var);
  2402. if (target_hash != NULL) {
  2403. zend_hash_internal_pointer_reset(target_hash);
  2404. continue;
  2405. }
  2406. } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
  2407. string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
  2408. string.len = Z_STRLEN_PP(hash_entry);
  2409. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  2410. if (ret != NULL) {
  2411. if ((*hash_entry)->refcount > 1) {
  2412. ZVAL_DELREF(*hash_entry);
  2413. MAKE_STD_ZVAL(*hash_entry);
  2414. } else {
  2415. zval_dtor(*hash_entry);
  2416. }
  2417. ZVAL_STRINGL(*hash_entry, ret->val, ret->len, 0);
  2418. }
  2419. }
  2420. }
  2421. }
  2422. } else if (Z_TYPE_PP(var) == IS_STRING) {
  2423. string.val = (unsigned char *)Z_STRVAL_PP(var);
  2424. string.len = Z_STRLEN_PP(var);
  2425. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  2426. if (ret != NULL) {
  2427. zval_dtor(*var);
  2428. ZVAL_STRINGL(*var, ret->val, ret->len, 0);
  2429. }
  2430. }
  2431. }
  2432. efree(stack);
  2433. mbfl_buffer_converter_delete(convd);
  2434. }
  2435. efree((void *)args);
  2436. name = (char *)mbfl_no_encoding2name(from_encoding);
  2437. if (name != NULL) {
  2438. RETURN_STRING(name, 1);
  2439. } else {
  2440. RETURN_FALSE;
  2441. }
  2442. }
  2443. /* }}} */
  2444. /* {{{ HTML numeric entity */
  2445. /* {{{ static void php_mb_numericentity_exec() */
  2446. static void
  2447. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
  2448. {
  2449. pval **arg1, **arg2, **arg3, **hash_entry;
  2450. HashTable *target_hash;
  2451. int argc, i, *convmap, *mapelm, mapsize=0;
  2452. mbfl_string string, result, *ret;
  2453. enum mbfl_no_encoding no_encoding;
  2454. argc = ZEND_NUM_ARGS();
  2455. if ((argc == 2 && zend_get_parameters_ex(2, &arg1, &arg2) == FAILURE) ||
  2456. (argc == 3 && zend_get_parameters_ex(3, &arg1, &arg2, &arg3) == FAILURE) ||
  2457. argc < 2 || argc > 3) {
  2458. WRONG_PARAM_COUNT;
  2459. }
  2460. convert_to_string_ex(arg1);
  2461. mbfl_string_init(&string);
  2462. string.no_language = MBSTRG(current_language);
  2463. string.no_encoding = MBSTRG(current_internal_encoding);
  2464. string.val = (unsigned char *)Z_STRVAL_PP(arg1);
  2465. string.len = Z_STRLEN_PP(arg1);
  2466. /* encoding */
  2467. if (argc == 3) {
  2468. convert_to_string_ex(arg3);
  2469. no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(arg3));
  2470. if (no_encoding == mbfl_no_encoding_invalid) {
  2471. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg3));
  2472. RETURN_FALSE;
  2473. } else {
  2474. string.no_encoding = no_encoding;
  2475. }
  2476. }
  2477. /* conversion map */
  2478. convmap = NULL;
  2479. if (Z_TYPE_PP(arg2) == IS_ARRAY){
  2480. target_hash = Z_ARRVAL_PP(arg2);
  2481. zend_hash_internal_pointer_reset(target_hash);
  2482. i = zend_hash_num_elements(target_hash);
  2483. if (i > 0) {
  2484. convmap = (int *)safe_emalloc(i, sizeof(int), 0);
  2485. mapelm = convmap;
  2486. mapsize = 0;
  2487. while (i > 0) {
  2488. if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
  2489. break;
  2490. }
  2491. convert_to_long_ex(hash_entry);
  2492. *mapelm++ = Z_LVAL_PP(hash_entry);
  2493. mapsize++;
  2494. i--;
  2495. zend_hash_move_forward(target_hash);
  2496. }
  2497. }
  2498. }
  2499. if (convmap == NULL) {
  2500. RETURN_FALSE;
  2501. }
  2502. mapsize /= 4;
  2503. ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
  2504. if (ret != NULL) {
  2505. RETVAL_STRINGL((char *)ret->val, ret->len, 0);
  2506. } else {
  2507. RETVAL_FALSE;
  2508. }
  2509. efree((void *)convmap);
  2510. }
  2511. /* }}} */
  2512. /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
  2513. Converts specified characters to HTML numeric entities */
  2514. PHP_FUNCTION(mb_encode_numericentity)
  2515. {
  2516. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  2517. }
  2518. /* }}} */
  2519. /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
  2520. Converts HTML numeric entities to character code */
  2521. PHP_FUNCTION(mb_decode_numericentity)
  2522. {
  2523. php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  2524. }
  2525. /* }}} */
  2526. /* }}} */
  2527. /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
  2528. * Sends an email message with MIME scheme
  2529. */
  2530. #if HAVE_SENDMAIL
  2531. #define APPEND_ONE_CHAR(ch) do { \
  2532. if (token.a > 0) { \
  2533. smart_str_appendc(&token, ch); \
  2534. } else {\
  2535. token.len++; \
  2536. } \
  2537. } while (0)
  2538. #define SEPARATE_SMART_STR(str) do {\
  2539. if ((str)->a == 0) { \
  2540. char *tmp_ptr; \
  2541. (str)->a = 1; \
  2542. while ((str)->a < (str)->len) { \
  2543. (str)->a <<= 1; \
  2544. } \
  2545. tmp_ptr = emalloc((str)->a + 1); \
  2546. memcpy(tmp_ptr, (str)->c, (str)->len); \
  2547. (str)->c = tmp_ptr; \
  2548. } \
  2549. } while (0)
  2550. static void my_smart_str_dtor(smart_str *s)
  2551. {
  2552. if (s->a > 0) {
  2553. smart_str_free(s);
  2554. }
  2555. }
  2556. static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
  2557. {
  2558. const char *ps;
  2559. size_t icnt;
  2560. int state = 0;
  2561. int crlf_state = -1;
  2562. smart_str token = { 0, 0, 0 };
  2563. smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
  2564. ps = str;
  2565. icnt = str_len;
  2566. /*
  2567. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  2568. * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
  2569. * state 0 1 2 3
  2570. *
  2571. * C o n t e n t - T y p e : t e x t / h t m l \r\n
  2572. * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
  2573. * crlf_state -1 0 1 -1
  2574. *
  2575. */
  2576. while (icnt > 0) {
  2577. switch (*ps) {
  2578. case ':':
  2579. if (crlf_state == 1) {
  2580. APPEND_ONE_CHAR('\r');
  2581. }
  2582. if (state == 0 || state == 1) {
  2583. fld_name = token;
  2584. state = 2;
  2585. } else {
  2586. APPEND_ONE_CHAR(*ps);
  2587. }
  2588. crlf_state = 0;
  2589. break;
  2590. case '\n':
  2591. if (crlf_state == -1) {
  2592. goto out;
  2593. }
  2594. crlf_state = -1;
  2595. break;
  2596. case '\r':
  2597. if (crlf_state == 1) {
  2598. APPEND_ONE_CHAR('\r');
  2599. } else {
  2600. crlf_state = 1;
  2601. }
  2602. break;
  2603. case ' ': case '\t':
  2604. if (crlf_state == -1) {
  2605. if (state == 3) {
  2606. /* continuing from the previous line */
  2607. SEPARATE_SMART_STR(&token);
  2608. state = 4;
  2609. } else {
  2610. /* simply skipping this new line */
  2611. state = 5;
  2612. }
  2613. } else {
  2614. if (crlf_state == 1) {
  2615. APPEND_ONE_CHAR('\r');
  2616. }
  2617. if (state == 1 || state == 3) {
  2618. APPEND_ONE_CHAR(*ps);
  2619. }
  2620. }
  2621. crlf_state = 0;
  2622. break;
  2623. default:
  2624. switch (state) {
  2625. case 0:
  2626. token.c = (char *)ps;
  2627. token.len = 0;
  2628. token.a = 0;
  2629. state = 1;
  2630. break;
  2631. case 2:
  2632. if (crlf_state != -1) {
  2633. token.c = (char *)ps;
  2634. token.len = 0;
  2635. token.a = 0;
  2636. state = 3;
  2637. break;
  2638. }
  2639. /* break is missing intentionally */
  2640. case 3:
  2641. if (crlf_state == -1) {
  2642. fld_val = token;
  2643. if (fld_name.c != NULL && fld_val.c != NULL) {
  2644. char *dummy;
  2645. /* FIXME: some locale free implementation is
  2646. * really required here,,, */
  2647. SEPARATE_SMART_STR(&fld_name);
  2648. php_strtoupper(fld_name.c, fld_name.len);
  2649. zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
  2650. my_smart_str_dtor(&fld_name);
  2651. }
  2652. memset(&fld_name, 0, sizeof(smart_str));
  2653. memset(&fld_val, 0, sizeof(smart_str));
  2654. token.c = (char *)ps;
  2655. token.len = 0;
  2656. token.a = 0;
  2657. state = 1;
  2658. }
  2659. break;
  2660. case 4:
  2661. APPEND_ONE_CHAR(' ');
  2662. state = 3;
  2663. break;
  2664. }
  2665. if (crlf_state == 1) {
  2666. APPEND_ONE_CHAR('\r');
  2667. }
  2668. APPEND_ONE_CHAR(*ps);
  2669. crlf_state = 0;
  2670. break;
  2671. }
  2672. ps++, icnt--;
  2673. }
  2674. out:
  2675. if (state == 2) {
  2676. token.c = "";
  2677. token.len = 0;
  2678. token.a = 0;
  2679. state = 3;
  2680. }
  2681. if (state == 3) {
  2682. fld_val = token;
  2683. if (fld_name.c != NULL && fld_val.c != NULL) {
  2684. void *dummy;
  2685. /* FIXME: some locale free implementation is
  2686. * really required here,,, */
  2687. SEPARATE_SMART_STR(&fld_name);
  2688. php_strtoupper(fld_name.c, fld_name.len);
  2689. zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
  2690. my_smart_str_dtor(&fld_name);
  2691. }
  2692. }
  2693. return state;
  2694. }
  2695. PHP_FUNCTION(mb_send_mail)
  2696. {
  2697. int n;
  2698. char *to=NULL;
  2699. int to_len;
  2700. char *message=NULL;
  2701. int message_len;
  2702. char *headers=NULL;
  2703. int headers_len;
  2704. char *subject=NULL;
  2705. int subject_len;
  2706. char *extra_cmd=NULL;
  2707. int extra_cmd_len;
  2708. char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
  2709. struct {
  2710. int cnt_type:1;
  2711. int cnt_trans_enc:1;
  2712. } suppressed_hdrs = { 0, 0 };
  2713. char *message_buf=NULL, *subject_buf=NULL, *p;
  2714. mbfl_string orig_str, conv_str;
  2715. mbfl_string *pstr; /* pointer to mbfl string for return value */
  2716. enum mbfl_no_encoding
  2717. tran_cs, /* transfar text charset */
  2718. head_enc, /* header transfar encoding */
  2719. body_enc; /* body transfar encoding */
  2720. mbfl_memory_device device; /* automatic allocateable buffer for additional header */
  2721. const mbfl_language *lang;
  2722. int err = 0;
  2723. HashTable ht_headers;
  2724. smart_str *s;
  2725. extern void mbfl_memory_device_unput(mbfl_memory_device *device);
  2726. /* initialize */
  2727. mbfl_memory_device_init(&device, 0, 0);
  2728. mbfl_string_init(&orig_str);
  2729. mbfl_string_init(&conv_str);
  2730. /* character-set, transfer-encoding */
  2731. tran_cs = mbfl_no_encoding_utf8;
  2732. head_enc = mbfl_no_encoding_base64;
  2733. body_enc = mbfl_no_encoding_base64;
  2734. lang = mbfl_no2language(MBSTRG(current_language));
  2735. if (lang != NULL) {
  2736. tran_cs = lang->mail_charset;
  2737. head_enc = lang->mail_header_encoding;
  2738. body_enc = lang->mail_body_encoding;
  2739. }
  2740. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
  2741. return;
  2742. }
  2743. zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
  2744. if (headers != NULL) {
  2745. _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
  2746. }
  2747. if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
  2748. char *tmp;
  2749. char *param_name;
  2750. char *charset = NULL;
  2751. SEPARATE_SMART_STR(s);
  2752. smart_str_0(s);
  2753. p = strchr(s->c, ';');
  2754. if (p != NULL) {
  2755. /* skipping the padded spaces */
  2756. do {
  2757. ++p;
  2758. } while (*p == ' ' || *p == '\t');
  2759. if (*p != '\0') {
  2760. if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
  2761. if (strcasecmp(param_name, "charset") == 0) {
  2762. enum mbfl_no_encoding _tran_cs = tran_cs;
  2763. charset = php_strtok_r(NULL, "= ", &tmp);
  2764. if (charset != NULL) {
  2765. _tran_cs = mbfl_name2no_encoding(charset);
  2766. }
  2767. if (_tran_cs == mbfl_no_encoding_invalid) {
  2768. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
  2769. _tran_cs = mbfl_no_encoding_ascii;
  2770. }
  2771. tran_cs = _tran_cs;
  2772. }
  2773. }
  2774. }
  2775. }
  2776. suppressed_hdrs.cnt_type = 1;
  2777. }
  2778. if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
  2779. enum mbfl_no_encoding _body_enc;
  2780. SEPARATE_SMART_STR(s);
  2781. smart_str_0(s);
  2782. _body_enc = mbfl_name2no_encoding(s->c);
  2783. switch (_body_enc) {
  2784. case mbfl_no_encoding_base64:
  2785. case mbfl_no_encoding_7bit:
  2786. case mbfl_no_encoding_8bit:
  2787. body_enc = _body_enc;
  2788. break;
  2789. default:
  2790. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
  2791. body_enc = mbfl_no_encoding_8bit;
  2792. break;
  2793. }
  2794. suppressed_hdrs.cnt_trans_enc = 1;
  2795. }
  2796. /* To: */
  2797. if (to == NULL || to_len <= 0) {
  2798. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
  2799. err = 1;
  2800. }
  2801. /* Subject: */
  2802. if (subject != NULL && subject_len >= 0) {
  2803. orig_str.no_language = MBSTRG(current_language);
  2804. orig_str.val = (unsigned char *)subject;
  2805. orig_str.len = subject_len;
  2806. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  2807. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  2808. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  2809. orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
  2810. }
  2811. pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
  2812. if (pstr != NULL) {
  2813. subject_buf = subject = (char *)pstr->val;
  2814. }
  2815. } else {
  2816. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
  2817. err = 1;
  2818. }
  2819. /* message body */
  2820. if (message != NULL) {
  2821. orig_str.no_language = MBSTRG(current_language);
  2822. orig_str.val = message;
  2823. orig_str.len = message_len;
  2824. orig_str.no_encoding = MBSTRG(current_internal_encoding);
  2825. if (orig_str.no_encoding == mbfl_no_encoding_invalid
  2826. || orig_str.no_encoding == mbfl_no_encoding_pass) {
  2827. orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
  2828. }
  2829. pstr = NULL;
  2830. {
  2831. mbfl_string tmpstr;
  2832. if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
  2833. tmpstr.no_encoding=mbfl_no_encoding_8bit;
  2834. pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
  2835. efree(tmpstr.val);
  2836. }
  2837. }
  2838. if (pstr != NULL) {
  2839. message_buf = message = (char *)pstr->val;
  2840. }
  2841. } else {
  2842. /* this is not really an error, so it is allowed. */
  2843. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
  2844. message = NULL;
  2845. }
  2846. /* other headers */
  2847. #define PHP_MBSTR_MAIL_MIME_HEADER1 "Mime-Version: 1.0"
  2848. #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
  2849. #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
  2850. #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
  2851. if (headers != NULL) {
  2852. p = headers;
  2853. n = headers_len;
  2854. mbfl_memory_device_strncat(&device, p, n);
  2855. if (n > 0 && p[n - 1] != '\n') {
  2856. mbfl_memory_device_strncat(&device, "\n", 1);
  2857. }
  2858. }
  2859. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
  2860. mbfl_memory_device_strncat(&device, "\n", 1);
  2861. if (!suppressed_hdrs.cnt_type) {
  2862. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
  2863. p = (char *)mbfl_no2preferred_mime_name(tran_cs);
  2864. if (p != NULL) {
  2865. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
  2866. mbfl_memory_device_strcat(&device, p);
  2867. }
  2868. mbfl_memory_device_strncat(&device, "\n", 1);
  2869. }
  2870. if (!suppressed_hdrs.cnt_trans_enc) {
  2871. mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
  2872. p = (char *)mbfl_no2preferred_mime_name(body_enc);
  2873. if (p == NULL) {
  2874. p = "7bit";
  2875. }
  2876. mbfl_memory_device_strcat(&device, p);
  2877. mbfl_memory_device_strncat(&device, "\n", 1);
  2878. }
  2879. mbfl_memory_device_unput(&device);
  2880. mbfl_memory_device_output('\0', &device);
  2881. headers = (char *)device.buffer;
  2882. if (force_extra_parameters) {
  2883. extra_cmd = estrdup(force_extra_parameters);
  2884. } else if (extra_cmd) {
  2885. extra_cmd = php_escape_shell_cmd(extra_cmd);
  2886. }
  2887. if (!err && php_mail(to, subject, message, headers, extra_cmd TSRMLS_CC)) {
  2888. RETVAL_TRUE;
  2889. } else {
  2890. RETVAL_FALSE;
  2891. }
  2892. if (extra_cmd) {
  2893. efree(extra_cmd);
  2894. }
  2895. if (subject_buf) {
  2896. efree((void *)subject_buf);
  2897. }
  2898. if (message_buf) {
  2899. efree((void *)message_buf);
  2900. }
  2901. mbfl_memory_device_clear(&device);
  2902. zend_hash_destroy(&ht_headers);
  2903. }
  2904. #undef APPEND_ONE_CHAR
  2905. #undef SEPARATE_SMART_STR
  2906. #undef PHP_MBSTR_MAIL_MIME_HEADER1
  2907. #undef PHP_MBSTR_MAIL_MIME_HEADER2
  2908. #undef PHP_MBSTR_MAIL_MIME_HEADER3
  2909. #undef PHP_MBSTR_MAIL_MIME_HEADER4
  2910. #else /* HAVE_SENDMAIL */
  2911. PHP_FUNCTION(mb_send_mail)
  2912. {
  2913. RETURN_FALSE;
  2914. }
  2915. #endif /* HAVE_SENDMAIL */
  2916. /* }}} */
  2917. /* {{{ proto string mb_get_info([string type])
  2918. Returns the current settings of mbstring */
  2919. PHP_FUNCTION(mb_get_info)
  2920. {
  2921. char *typ = NULL;
  2922. int typ_len;
  2923. char *name;
  2924. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
  2925. RETURN_FALSE;
  2926. }
  2927. if (!typ || !strcasecmp("all", typ)) {
  2928. array_init(return_value);
  2929. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
  2930. add_assoc_string(return_value, "internal_encoding", name, 1);
  2931. }
  2932. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
  2933. add_assoc_string(return_value, "http_input", name, 1);
  2934. }
  2935. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
  2936. add_assoc_string(return_value, "http_output", name, 1);
  2937. }
  2938. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(func_overload))) != NULL) {
  2939. add_assoc_string(return_value, "func_overload", name, 1);
  2940. }
  2941. } else if (!strcasecmp("internal_encoding", typ)) {
  2942. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
  2943. RETVAL_STRING(name, 1);
  2944. }
  2945. } else if (!strcasecmp("http_input", typ)) {
  2946. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
  2947. RETVAL_STRING(name, 1);
  2948. }
  2949. } else if (!strcasecmp("http_output", typ)) {
  2950. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
  2951. RETVAL_STRING(name, 1);
  2952. }
  2953. } else if (!strcasecmp("func_overload", typ)) {
  2954. if ((name = (char *)mbfl_no_encoding2name(MBSTRG(func_overload))) != NULL) {
  2955. RETVAL_STRING(name, 1);
  2956. }
  2957. } else {
  2958. RETURN_FALSE;
  2959. }
  2960. }
  2961. /* }}} */
  2962. /* {{{ MBSTRING_API int php_mb_encoding_translation() */
  2963. MBSTRING_API int php_mb_encoding_translation(TSRMLS_D)
  2964. {
  2965. return MBSTRG(encoding_translation);
  2966. }
  2967. /* }}} */
  2968. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
  2969. MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
  2970. {
  2971. if (enc != NULL) {
  2972. if (enc->flag & MBFL_ENCTYPE_MBCS) {
  2973. if (enc->mblen_table != NULL) {
  2974. if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
  2975. }
  2976. } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
  2977. return 2;
  2978. } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
  2979. return 4;
  2980. }
  2981. }
  2982. return 1;
  2983. }
  2984. /* }}} */
  2985. /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
  2986. MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
  2987. {
  2988. return php_mb_mbchar_bytes_ex(s,
  2989. mbfl_no2encoding(MBSTRG(internal_encoding)));
  2990. }
  2991. /* }}} */
  2992. /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
  2993. MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
  2994. {
  2995. register const char *p = s;
  2996. char *last=NULL;
  2997. if (nbytes == (size_t)-1) {
  2998. size_t nb = 0;
  2999. while (*p != '\0') {
  3000. if (nb == 0) {
  3001. if ((unsigned char)*p == (unsigned char)c) {
  3002. last = (char *)p;
  3003. }
  3004. nb = php_mb_mbchar_bytes_ex(p, enc);
  3005. if (nb == 0) {
  3006. return NULL; /* something is going wrong! */
  3007. }
  3008. }
  3009. --nb;
  3010. ++p;
  3011. }
  3012. } else {
  3013. register size_t bcnt = nbytes;
  3014. register size_t nbytes_char;
  3015. while (bcnt > 0) {
  3016. if ((unsigned char)*p == (unsigned char)c) {
  3017. last = (char *)p;
  3018. }
  3019. nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
  3020. if (bcnt < nbytes_char) {
  3021. return NULL;
  3022. }
  3023. p += nbytes_char;
  3024. bcnt -= nbytes_char;
  3025. }
  3026. }
  3027. return last;
  3028. }
  3029. /* }}} */
  3030. /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
  3031. MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
  3032. {
  3033. return php_mb_safe_strrchr_ex(s, c, nbytes,
  3034. mbfl_no2encoding(MBSTRG(internal_encoding)));
  3035. }
  3036. /* }}} */
  3037. /* {{{ MBSTRING_API char *php_mb_strrchr() */
  3038. MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC)
  3039. {
  3040. return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC);
  3041. }
  3042. /* }}} */
  3043. /* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */
  3044. MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
  3045. {
  3046. if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){
  3047. return php_mb_mbchar_bytes_ex(s,
  3048. mbfl_no2encoding(MBSTRG(http_input_identify)));
  3049. } else {
  3050. return php_mb_mbchar_bytes_ex(s,
  3051. mbfl_no2encoding(MBSTRG(internal_encoding)));
  3052. }
  3053. }
  3054. /* }}} */
  3055. /* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
  3056. MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from
  3057. TSRMLS_DC)
  3058. {
  3059. int i;
  3060. mbfl_string string, result, *ret = NULL;
  3061. enum mbfl_no_encoding from_encoding, to_encoding;
  3062. mbfl_buffer_converter *convd;
  3063. if (encoding_to) {
  3064. /* new encoding */
  3065. to_encoding = mbfl_name2no_encoding(encoding_to);
  3066. if (to_encoding == mbfl_no_encoding_invalid) {
  3067. return -1;
  3068. }
  3069. } else {
  3070. to_encoding = MBSTRG(current_internal_encoding);
  3071. }
  3072. if (encoding_from) {
  3073. /* old encoding */
  3074. from_encoding = mbfl_name2no_encoding(encoding_from);
  3075. if (from_encoding == mbfl_no_encoding_invalid) {
  3076. return -1;
  3077. }
  3078. } else {
  3079. from_encoding = MBSTRG(http_input_identify);
  3080. }
  3081. if (from_encoding == mbfl_no_encoding_pass) {
  3082. return 0;
  3083. }
  3084. /* initialize string */
  3085. mbfl_string_init(&string);
  3086. mbfl_string_init(&result);
  3087. string.no_encoding = from_encoding;
  3088. string.no_language = MBSTRG(current_language);
  3089. for (i=0; i<num; i++){
  3090. string.val = (char*)str[i];
  3091. string.len = len[i];
  3092. /* initialize converter */
  3093. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  3094. if (convd == NULL) {
  3095. return -1;
  3096. }
  3097. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  3098. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  3099. /* do it */
  3100. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3101. if (ret != NULL) {
  3102. efree(str[i]);
  3103. str[i] = ret->val;
  3104. len[i] = ret->len;
  3105. }
  3106. mbfl_buffer_converter_delete(convd);
  3107. }
  3108. return ret ? 0 : -1;
  3109. }
  3110. /* {{{ MBSTRING_API int php_mb_gpc_encoding_detector()
  3111. */
  3112. MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC)
  3113. {
  3114. mbfl_string string;
  3115. enum mbfl_no_encoding *elist;
  3116. enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid;
  3117. mbfl_encoding_detector *identd = NULL;
  3118. int size;
  3119. enum mbfl_no_encoding *list;
  3120. if (MBSTRG(http_input_list_size) == 1 &&
  3121. MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) {
  3122. MBSTRG(http_input_identify) = mbfl_no_encoding_pass;
  3123. return SUCCESS;
  3124. }
  3125. if (MBSTRG(http_input_list_size) == 1 &&
  3126. MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto &&
  3127. mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) {
  3128. MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0];
  3129. return SUCCESS;
  3130. }
  3131. if (arg_list && strlen(arg_list)>0) {
  3132. /* make encoding list */
  3133. list = NULL;
  3134. size = 0;
  3135. php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
  3136. if (size > 0 && list != NULL) {
  3137. elist = list;
  3138. } else {
  3139. elist = MBSTRG(current_detect_order_list);
  3140. size = MBSTRG(current_detect_order_list_size);
  3141. if (size <= 0){
  3142. elist = MBSTRG(default_detect_order_list);
  3143. size = MBSTRG(default_detect_order_list_size);
  3144. }
  3145. }
  3146. } else {
  3147. elist = MBSTRG(current_detect_order_list);
  3148. size = MBSTRG(current_detect_order_list_size);
  3149. if (size <= 0){
  3150. elist = MBSTRG(default_detect_order_list);
  3151. size = MBSTRG(default_detect_order_list_size);
  3152. }
  3153. }
  3154. mbfl_string_init(&string);
  3155. string.no_language = MBSTRG(current_language);
  3156. identd = mbfl_encoding_detector_new(elist, size);
  3157. if (identd) {
  3158. int n = 0;
  3159. while(n < num){
  3160. string.val = (unsigned char *)arg_string[n];
  3161. string.len = arg_length[n];
  3162. if (mbfl_encoding_detector_feed(identd, &string)) {
  3163. break;
  3164. }
  3165. n++;
  3166. }
  3167. encoding = mbfl_encoding_detector_judge(identd);
  3168. mbfl_encoding_detector_delete(identd);
  3169. }
  3170. if (encoding != mbfl_no_encoding_invalid) {
  3171. MBSTRG(http_input_identify) = encoding;
  3172. return SUCCESS;
  3173. } else {
  3174. return FAILURE;
  3175. }
  3176. }
  3177. /* }}} */
  3178. #ifdef ZEND_MULTIBYTE
  3179. /* {{{ MBSTRING_API int php_mb_set_zend_encoding() */
  3180. MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
  3181. {
  3182. /* 'd better use mbfl_memory_device? */
  3183. char *name, *list = NULL;
  3184. int n, *entry, list_size = 0;
  3185. zend_encoding_detector encoding_detector;
  3186. zend_encoding_converter encoding_converter;
  3187. zend_encoding_oddlen encoding_oddlen;
  3188. /* notify script encoding to Zend Engine */
  3189. entry = MBSTRG(script_encoding_list);
  3190. n = MBSTRG(script_encoding_list_size);
  3191. while (n > 0) {
  3192. name = (char *)mbfl_no_encoding2name(*entry);
  3193. if (name) {
  3194. list_size += strlen(name) + 1;
  3195. if (!list) {
  3196. list = (char*)emalloc(list_size);
  3197. *list = (char)NULL;
  3198. } else {
  3199. list = (char*)erealloc(list, list_size);
  3200. strcat(list, ",");
  3201. }
  3202. strcat(list, name);
  3203. }
  3204. entry++;
  3205. n--;
  3206. }
  3207. zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
  3208. if (list) {
  3209. efree(list);
  3210. }
  3211. encoding_detector = php_mb_encoding_detector;
  3212. encoding_converter = php_mb_encoding_converter;
  3213. encoding_oddlen = php_mb_oddlen;
  3214. /* TODO: make independent from mbstring.encoding_translation? */
  3215. if (MBSTRG(encoding_translation)) {
  3216. /* notify internal encoding to Zend Engine */
  3217. name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
  3218. zend_multibyte_set_internal_encoding(name, strlen(name) TSRMLS_CC);
  3219. }
  3220. zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
  3221. return 0;
  3222. }
  3223. /* }}} */
  3224. /* {{{ char *php_mb_encoding_detector()
  3225. * Interface for Zend Engine
  3226. */
  3227. char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg_list TSRMLS_DC)
  3228. {
  3229. mbfl_string string;
  3230. const char *ret;
  3231. enum mbfl_no_encoding *elist;
  3232. int size, *list;
  3233. /* make encoding list */
  3234. list = NULL;
  3235. size = 0;
  3236. php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
  3237. if (size <= 0) {
  3238. return NULL;
  3239. }
  3240. if (size > 0 && list != NULL) {
  3241. elist = list;
  3242. } else {
  3243. elist = MBSTRG(current_detect_order_list);
  3244. size = MBSTRG(current_detect_order_list_size);
  3245. }
  3246. mbfl_string_init(&string);
  3247. string.no_language = MBSTRG(current_language);
  3248. string.val = (char*)arg_string;
  3249. string.len = arg_length;
  3250. ret = mbfl_identify_encoding_name(&string, elist, size, 0);
  3251. if (list != NULL) {
  3252. efree((void *)list);
  3253. }
  3254. if (ret != NULL) {
  3255. return estrdup(ret);
  3256. } else {
  3257. return NULL;
  3258. }
  3259. }
  3260. /* }}} */
  3261. /* {{{ int php_mb_encoding_converter() */
  3262. int php_mb_encoding_converter(char **to, int *to_length, const char *from,
  3263. int from_length, const char *encoding_to, const char *encoding_from
  3264. TSRMLS_DC)
  3265. {
  3266. mbfl_string string, result, *ret;
  3267. enum mbfl_no_encoding from_encoding, to_encoding;
  3268. mbfl_buffer_converter *convd;
  3269. /* new encoding */
  3270. to_encoding = mbfl_name2no_encoding(encoding_to);
  3271. if (to_encoding == mbfl_no_encoding_invalid) {
  3272. return -1;
  3273. }
  3274. /* old encoding */
  3275. from_encoding = mbfl_name2no_encoding(encoding_from);
  3276. if (from_encoding == mbfl_no_encoding_invalid) {
  3277. return -1;
  3278. }
  3279. /* initialize string */
  3280. mbfl_string_init(&string);
  3281. mbfl_string_init(&result);
  3282. string.no_encoding = from_encoding;
  3283. string.no_language = MBSTRG(current_language);
  3284. string.val = (char*)from;
  3285. string.len = from_length;
  3286. /* initialize converter */
  3287. convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
  3288. if (convd == NULL) {
  3289. return -1;
  3290. }
  3291. mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
  3292. mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
  3293. /* do it */
  3294. ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
  3295. if (ret != NULL) {
  3296. *to = ret->val;
  3297. *to_length = ret->len;
  3298. }
  3299. mbfl_buffer_converter_delete(convd);
  3300. return ret ? 0 : -1;
  3301. }
  3302. /* }}} */
  3303. /* {{{ int php_mb_oddlen()
  3304. * returns number of odd (e.g. appears only first byte of multibyte
  3305. * character) chars
  3306. */
  3307. int php_mb_oddlen(const char *string, int length, const char *encoding TSRMLS_DC)
  3308. {
  3309. mbfl_string mb_string;
  3310. mbfl_string_init(&mb_string);
  3311. mb_string.no_language = MBSTRG(current_language);
  3312. mb_string.no_encoding = mbfl_name2no_encoding(encoding);
  3313. mb_string.val = (char*)string;
  3314. mb_string.len = length;
  3315. if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
  3316. return 0;
  3317. }
  3318. return mbfl_oddlen(&mb_string);
  3319. }
  3320. /* }}} */
  3321. #endif /* ZEND_MULTIBYTE */
  3322. #endif /* HAVE_MBSTRING */
  3323. /*
  3324. * Local variables:
  3325. * tab-width: 4
  3326. * c-basic-offset: 4
  3327. * End:
  3328. * vim600: fdm=marker
  3329. * vim: noet sw=4 ts=4
  3330. */