You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1401 lines
34 KiB

20 years ago
20 years ago
20 years ago
19 years ago
19 years ago
19 years ago
19 years ago
24 years ago
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2009 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
  16. +----------------------------------------------------------------------+
  17. */
  18. /* $Id$ */
  19. #ifdef HAVE_CONFIG_H
  20. #include "config.h"
  21. #endif
  22. #include "php.h"
  23. #include "php_ini.h"
  24. #if HAVE_MBREGEX
  25. #include "ext/standard/php_smart_str.h"
  26. #include "ext/standard/info.h"
  27. #include "php_mbregex.h"
  28. #include "mbstring.h"
  29. #include "php_onig_compat.h" /* must come prior to the oniguruma header */
  30. #include <oniguruma.h>
  31. #undef UChar
  32. ZEND_EXTERN_MODULE_GLOBALS(mbstring)
  33. struct _zend_mb_regex_globals {
  34. OnigEncoding default_mbctype;
  35. OnigEncoding current_mbctype;
  36. HashTable ht_rc;
  37. zval *search_str;
  38. zval *search_str_val;
  39. unsigned int search_pos;
  40. php_mb_regex_t *search_re;
  41. OnigRegion *search_regs;
  42. OnigOptionType regex_default_options;
  43. OnigSyntaxType *regex_default_syntax;
  44. };
  45. #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
  46. /* {{{ static void php_mb_regex_free_cache() */
  47. static void php_mb_regex_free_cache(php_mb_regex_t **pre)
  48. {
  49. onig_free(*pre);
  50. }
  51. /* }}} */
  52. /* {{{ _php_mb_regex_globals_ctor */
  53. static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
  54. {
  55. pglobals->default_mbctype = ONIG_ENCODING_EUC_JP;
  56. pglobals->current_mbctype = ONIG_ENCODING_EUC_JP;
  57. zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
  58. pglobals->search_str = (zval*) NULL;
  59. pglobals->search_re = (php_mb_regex_t*)NULL;
  60. pglobals->search_pos = 0;
  61. pglobals->search_regs = (OnigRegion*)NULL;
  62. pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
  63. pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
  64. return SUCCESS;
  65. }
  66. /* }}} */
  67. /* {{{ _php_mb_regex_globals_dtor */
  68. static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC)
  69. {
  70. zend_hash_destroy(&pglobals->ht_rc);
  71. }
  72. /* }}} */
  73. /* {{{ php_mb_regex_globals_alloc */
  74. zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
  75. {
  76. zend_mb_regex_globals *pglobals = pemalloc(
  77. sizeof(zend_mb_regex_globals), 1);
  78. if (!pglobals) {
  79. return NULL;
  80. }
  81. if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
  82. pefree(pglobals, 1);
  83. return NULL;
  84. }
  85. return pglobals;
  86. }
  87. /* }}} */
  88. /* {{{ php_mb_regex_globals_free */
  89. void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
  90. {
  91. if (!pglobals) {
  92. return;
  93. }
  94. _php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
  95. pefree(pglobals, 1);
  96. }
  97. /* }}} */
  98. /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
  99. PHP_MINIT_FUNCTION(mb_regex)
  100. {
  101. onig_init();
  102. return SUCCESS;
  103. }
  104. /* }}} */
  105. /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
  106. PHP_MSHUTDOWN_FUNCTION(mb_regex)
  107. {
  108. onig_end();
  109. return SUCCESS;
  110. }
  111. /* }}} */
  112. /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
  113. PHP_RINIT_FUNCTION(mb_regex)
  114. {
  115. return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
  116. }
  117. /* }}} */
  118. /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
  119. PHP_RSHUTDOWN_FUNCTION(mb_regex)
  120. {
  121. MBREX(current_mbctype) = MBREX(default_mbctype);
  122. if (MBREX(search_str) != NULL) {
  123. zval_ptr_dtor(&MBREX(search_str));
  124. MBREX(search_str) = (zval *)NULL;
  125. }
  126. MBREX(search_pos) = 0;
  127. if (MBREX(search_regs) != NULL) {
  128. onig_region_free(MBREX(search_regs), 1);
  129. MBREX(search_regs) = (OnigRegion *)NULL;
  130. }
  131. zend_hash_clean(&MBREX(ht_rc));
  132. return SUCCESS;
  133. }
  134. /* }}} */
  135. /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
  136. PHP_MINFO_FUNCTION(mb_regex)
  137. {
  138. char buf[32];
  139. php_info_print_table_start();
  140. php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
  141. snprintf(buf, sizeof(buf), "%d.%d.%d",
  142. ONIGURUMA_VERSION_MAJOR,
  143. ONIGURUMA_VERSION_MINOR,
  144. ONIGURUMA_VERSION_TEENY);
  145. #ifdef PHP_ONIG_BUNDLED
  146. #ifdef USE_COMBINATION_EXPLOSION_CHECK
  147. php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
  148. #else /* USE_COMBINATION_EXPLOSION_CHECK */
  149. php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
  150. #endif /* USE_COMBINATION_EXPLOSION_CHECK */
  151. #endif /* PHP_BUNDLED_ONIG */
  152. php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
  153. php_info_print_table_end();
  154. }
  155. /* }}} */
  156. /*
  157. * encoding name resolver
  158. */
  159. /* {{{ encoding name map */
  160. typedef struct _php_mb_regex_enc_name_map_t {
  161. const char *names;
  162. OnigEncoding code;
  163. } php_mb_regex_enc_name_map_t;
  164. php_mb_regex_enc_name_map_t enc_name_map[] = {
  165. #ifdef ONIG_ENCODING_EUC_JP
  166. {
  167. "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
  168. ONIG_ENCODING_EUC_JP
  169. },
  170. #endif
  171. #ifdef ONIG_ENCODING_UTF8
  172. {
  173. "UTF-8\0UTF8\0",
  174. ONIG_ENCODING_UTF8
  175. },
  176. #endif
  177. #ifdef ONIG_ENCODING_UTF16_BE
  178. {
  179. "UTF-16\0UTF-16BE\0",
  180. ONIG_ENCODING_UTF16_BE
  181. },
  182. #endif
  183. #ifdef ONIG_ENCODING_UTF16_LE
  184. {
  185. "UTF-16LE\0",
  186. ONIG_ENCODING_UTF16_LE
  187. },
  188. #endif
  189. #ifdef ONIG_ENCODING_UTF32_BE
  190. {
  191. "UCS-4\0UTF-32\0UTF-32BE\0",
  192. ONIG_ENCODING_UTF32_BE
  193. },
  194. #endif
  195. #ifdef ONIG_ENCODING_UTF32_LE
  196. {
  197. "UCS-4LE\0UTF-32LE\0",
  198. ONIG_ENCODING_UTF32_LE
  199. },
  200. #endif
  201. #ifdef ONIG_ENCODING_SJIS
  202. {
  203. "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
  204. ONIG_ENCODING_SJIS
  205. },
  206. #endif
  207. #ifdef ONIG_ENCODING_BIG5
  208. {
  209. "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
  210. ONIG_ENCODING_BIG5
  211. },
  212. #endif
  213. #ifdef ONIG_ENCODING_EUC_CN
  214. {
  215. "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
  216. ONIG_ENCODING_EUC_CN
  217. },
  218. #endif
  219. #ifdef ONIG_ENCODING_EUC_TW
  220. {
  221. "EUC-TW\0EUCTW\0EUC_TW\0",
  222. ONIG_ENCODING_EUC_TW
  223. },
  224. #endif
  225. #ifdef ONIG_ENCODING_EUC_KR
  226. {
  227. "EUC-KR\0EUCKR\0EUC_KR\0",
  228. ONIG_ENCODING_EUC_KR
  229. },
  230. #endif
  231. #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
  232. {
  233. "KOI8\0KOI-8\0",
  234. ONIG_ENCODING_KOI8
  235. },
  236. #endif
  237. #ifdef ONIG_ENCODING_KOI8_R
  238. {
  239. "KOI8R\0KOI8-R\0KOI-8R\0",
  240. ONIG_ENCODING_KOI8_R
  241. },
  242. #endif
  243. #ifdef ONIG_ENCODING_ISO_8859_1
  244. {
  245. "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
  246. ONIG_ENCODING_ISO_8859_1
  247. },
  248. #endif
  249. #ifdef ONIG_ENCODING_ISO_8859_2
  250. {
  251. "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
  252. ONIG_ENCODING_ISO_8859_2
  253. },
  254. #endif
  255. #ifdef ONIG_ENCODING_ISO_8859_3
  256. {
  257. "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
  258. ONIG_ENCODING_ISO_8859_3
  259. },
  260. #endif
  261. #ifdef ONIG_ENCODING_ISO_8859_4
  262. {
  263. "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
  264. ONIG_ENCODING_ISO_8859_4
  265. },
  266. #endif
  267. #ifdef ONIG_ENCODING_ISO_8859_5
  268. {
  269. "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
  270. ONIG_ENCODING_ISO_8859_5
  271. },
  272. #endif
  273. #ifdef ONIG_ENCODING_ISO_8859_6
  274. {
  275. "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
  276. ONIG_ENCODING_ISO_8859_6
  277. },
  278. #endif
  279. #ifdef ONIG_ENCODING_ISO_8859_7
  280. {
  281. "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
  282. ONIG_ENCODING_ISO_8859_7
  283. },
  284. #endif
  285. #ifdef ONIG_ENCODING_ISO_8859_8
  286. {
  287. "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
  288. ONIG_ENCODING_ISO_8859_8
  289. },
  290. #endif
  291. #ifdef ONIG_ENCODING_ISO_8859_9
  292. {
  293. "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
  294. ONIG_ENCODING_ISO_8859_9
  295. },
  296. #endif
  297. #ifdef ONIG_ENCODING_ISO_8859_10
  298. {
  299. "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
  300. ONIG_ENCODING_ISO_8859_10
  301. },
  302. #endif
  303. #ifdef ONIG_ENCODING_ISO_8859_11
  304. {
  305. "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
  306. ONIG_ENCODING_ISO_8859_11
  307. },
  308. #endif
  309. #ifdef ONIG_ENCODING_ISO_8859_13
  310. {
  311. "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
  312. ONIG_ENCODING_ISO_8859_13
  313. },
  314. #endif
  315. #ifdef ONIG_ENCODING_ISO_8859_14
  316. {
  317. "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
  318. ONIG_ENCODING_ISO_8859_14
  319. },
  320. #endif
  321. #ifdef ONIG_ENCODING_ISO_8859_15
  322. {
  323. "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
  324. ONIG_ENCODING_ISO_8859_15
  325. },
  326. #endif
  327. #ifdef ONIG_ENCODING_ISO_8859_16
  328. {
  329. "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
  330. ONIG_ENCODING_ISO_8859_16
  331. },
  332. #endif
  333. #ifdef ONIG_ENCODING_ASCII
  334. {
  335. "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
  336. ONIG_ENCODING_ASCII
  337. },
  338. #endif
  339. { NULL, ONIG_ENCODING_UNDEF }
  340. };
  341. /* }}} */
  342. /* {{{ php_mb_regex_name2mbctype */
  343. static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
  344. {
  345. const char *p;
  346. php_mb_regex_enc_name_map_t *mapping;
  347. if (pname == NULL) {
  348. return ONIG_ENCODING_UNDEF;
  349. }
  350. for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
  351. for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
  352. if (strcasecmp(p, pname) == 0) {
  353. return mapping->code;
  354. }
  355. }
  356. }
  357. return ONIG_ENCODING_UNDEF;
  358. }
  359. /* }}} */
  360. /* {{{ php_mb_regex_mbctype2name */
  361. static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
  362. {
  363. php_mb_regex_enc_name_map_t *mapping;
  364. for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
  365. if (mapping->code == mbctype) {
  366. return mapping->names;
  367. }
  368. }
  369. return NULL;
  370. }
  371. /* }}} */
  372. /* {{{ php_mb_regex_set_mbctype */
  373. int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
  374. {
  375. OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
  376. if (mbctype == ONIG_ENCODING_UNDEF) {
  377. return FAILURE;
  378. }
  379. MBREX(current_mbctype) = mbctype;
  380. return SUCCESS;
  381. }
  382. /* }}} */
  383. /* {{{ php_mb_regex_set_default_mbctype */
  384. int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
  385. {
  386. OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
  387. if (mbctype == ONIG_ENCODING_UNDEF) {
  388. return FAILURE;
  389. }
  390. MBREX(default_mbctype) = mbctype;
  391. return SUCCESS;
  392. }
  393. /* }}} */
  394. /* {{{ php_mb_regex_get_mbctype */
  395. const char *php_mb_regex_get_mbctype(TSRMLS_D)
  396. {
  397. return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
  398. }
  399. /* }}} */
  400. /* {{{ php_mb_regex_get_default_mbctype */
  401. const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
  402. {
  403. return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
  404. }
  405. /* }}} */
  406. /*
  407. * regex cache
  408. */
  409. /* {{{ php_mbregex_compile_pattern */
  410. static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
  411. {
  412. int err_code = 0;
  413. int found = 0;
  414. php_mb_regex_t *retval = NULL, **rc = NULL;
  415. OnigErrorInfo err_info;
  416. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  417. found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
  418. if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
  419. if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
  420. onig_error_code_to_str(err_str, err_code, err_info);
  421. php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
  422. retval = NULL;
  423. goto out;
  424. }
  425. zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL);
  426. } else if (found == SUCCESS) {
  427. retval = *rc;
  428. }
  429. out:
  430. return retval;
  431. }
  432. /* }}} */
  433. /* {{{ _php_mb_regex_get_option_string */
  434. static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
  435. {
  436. size_t len_left = len;
  437. size_t len_req = 0;
  438. char *p = str;
  439. char c;
  440. if ((option & ONIG_OPTION_IGNORECASE) != 0) {
  441. if (len_left > 0) {
  442. --len_left;
  443. *(p++) = 'i';
  444. }
  445. ++len_req;
  446. }
  447. if ((option & ONIG_OPTION_EXTEND) != 0) {
  448. if (len_left > 0) {
  449. --len_left;
  450. *(p++) = 'x';
  451. }
  452. ++len_req;
  453. }
  454. if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
  455. (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
  456. if (len_left > 0) {
  457. --len_left;
  458. *(p++) = 'p';
  459. }
  460. ++len_req;
  461. } else {
  462. if ((option & ONIG_OPTION_MULTILINE) != 0) {
  463. if (len_left > 0) {
  464. --len_left;
  465. *(p++) = 'm';
  466. }
  467. ++len_req;
  468. }
  469. if ((option & ONIG_OPTION_SINGLELINE) != 0) {
  470. if (len_left > 0) {
  471. --len_left;
  472. *(p++) = 's';
  473. }
  474. ++len_req;
  475. }
  476. }
  477. if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
  478. if (len_left > 0) {
  479. --len_left;
  480. *(p++) = 'l';
  481. }
  482. ++len_req;
  483. }
  484. if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
  485. if (len_left > 0) {
  486. --len_left;
  487. *(p++) = 'n';
  488. }
  489. ++len_req;
  490. }
  491. c = 0;
  492. if (syntax == ONIG_SYNTAX_JAVA) {
  493. c = 'j';
  494. } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
  495. c = 'u';
  496. } else if (syntax == ONIG_SYNTAX_GREP) {
  497. c = 'g';
  498. } else if (syntax == ONIG_SYNTAX_EMACS) {
  499. c = 'c';
  500. } else if (syntax == ONIG_SYNTAX_RUBY) {
  501. c = 'r';
  502. } else if (syntax == ONIG_SYNTAX_PERL) {
  503. c = 'z';
  504. } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
  505. c = 'b';
  506. } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
  507. c = 'd';
  508. }
  509. if (c != 0) {
  510. if (len_left > 0) {
  511. --len_left;
  512. *(p++) = c;
  513. }
  514. ++len_req;
  515. }
  516. if (len_left > 0) {
  517. --len_left;
  518. *(p++) = '\0';
  519. }
  520. ++len_req;
  521. if (len < len_req) {
  522. return len_req;
  523. }
  524. return 0;
  525. }
  526. /* }}} */
  527. /* {{{ _php_mb_regex_init_options */
  528. static void
  529. _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
  530. {
  531. int n;
  532. char c;
  533. int optm = 0;
  534. *syntax = ONIG_SYNTAX_RUBY;
  535. if (parg != NULL) {
  536. n = 0;
  537. while(n < narg) {
  538. c = parg[n++];
  539. switch (c) {
  540. case 'i':
  541. optm |= ONIG_OPTION_IGNORECASE;
  542. break;
  543. case 'x':
  544. optm |= ONIG_OPTION_EXTEND;
  545. break;
  546. case 'm':
  547. optm |= ONIG_OPTION_MULTILINE;
  548. break;
  549. case 's':
  550. optm |= ONIG_OPTION_SINGLELINE;
  551. break;
  552. case 'p':
  553. optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
  554. break;
  555. case 'l':
  556. optm |= ONIG_OPTION_FIND_LONGEST;
  557. break;
  558. case 'n':
  559. optm |= ONIG_OPTION_FIND_NOT_EMPTY;
  560. break;
  561. case 'j':
  562. *syntax = ONIG_SYNTAX_JAVA;
  563. break;
  564. case 'u':
  565. *syntax = ONIG_SYNTAX_GNU_REGEX;
  566. break;
  567. case 'g':
  568. *syntax = ONIG_SYNTAX_GREP;
  569. break;
  570. case 'c':
  571. *syntax = ONIG_SYNTAX_EMACS;
  572. break;
  573. case 'r':
  574. *syntax = ONIG_SYNTAX_RUBY;
  575. break;
  576. case 'z':
  577. *syntax = ONIG_SYNTAX_PERL;
  578. break;
  579. case 'b':
  580. *syntax = ONIG_SYNTAX_POSIX_BASIC;
  581. break;
  582. case 'd':
  583. *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
  584. break;
  585. case 'e':
  586. if (eval != NULL) *eval = 1;
  587. break;
  588. default:
  589. break;
  590. }
  591. }
  592. if (option != NULL) *option|=optm;
  593. }
  594. }
  595. /* }}} */
  596. /*
  597. * php funcions
  598. */
  599. /* {{{ proto string mb_regex_encoding([string encoding])
  600. Returns the current encoding for regex as a string. */
  601. PHP_FUNCTION(mb_regex_encoding)
  602. {
  603. size_t argc = ZEND_NUM_ARGS();
  604. char *encoding;
  605. int encoding_len;
  606. OnigEncoding mbctype;
  607. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) {
  608. return;
  609. }
  610. if (argc == 0) {
  611. const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
  612. if (retval == NULL) {
  613. RETURN_FALSE;
  614. }
  615. RETURN_STRING((char *)retval, 1);
  616. } else if (argc == 1) {
  617. mbctype = _php_mb_regex_name2mbctype(encoding);
  618. if (mbctype == ONIG_ENCODING_UNDEF) {
  619. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
  620. RETURN_FALSE;
  621. }
  622. MBREX(current_mbctype) = mbctype;
  623. RETURN_TRUE;
  624. }
  625. }
  626. /* }}} */
  627. /* {{{ _php_mb_regex_ereg_exec */
  628. static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
  629. {
  630. zval **arg_pattern, *array;
  631. char *string;
  632. int string_len;
  633. php_mb_regex_t *re;
  634. OnigRegion *regs = NULL;
  635. int i, match_len, beg, end;
  636. OnigOptionType options;
  637. char *str;
  638. array = NULL;
  639. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {
  640. RETURN_FALSE;
  641. }
  642. options = MBREX(regex_default_options);
  643. if (icase) {
  644. options |= ONIG_OPTION_IGNORECASE;
  645. }
  646. /* compile the regular expression from the supplied regex */
  647. if (Z_TYPE_PP(arg_pattern) != IS_STRING) {
  648. /* we convert numbers to integers and treat them as a string */
  649. if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) {
  650. convert_to_long_ex(arg_pattern); /* get rid of decimal places */
  651. }
  652. convert_to_string_ex(arg_pattern);
  653. /* don't bother doing an extended regex with just a number */
  654. }
  655. if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) {
  656. php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
  657. RETVAL_FALSE;
  658. goto out;
  659. }
  660. re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
  661. if (re == NULL) {
  662. RETVAL_FALSE;
  663. goto out;
  664. }
  665. regs = onig_region_new();
  666. /* actually execute the regular expression */
  667. if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
  668. RETVAL_FALSE;
  669. goto out;
  670. }
  671. match_len = 1;
  672. str = string;
  673. if (array != NULL) {
  674. match_len = regs->end[0] - regs->beg[0];
  675. zval_dtor(array);
  676. array_init(array);
  677. for (i = 0; i < regs->num_regs; i++) {
  678. beg = regs->beg[i];
  679. end = regs->end[i];
  680. if (beg >= 0 && beg < end && end <= string_len) {
  681. add_index_stringl(array, i, (char *)&str[beg], end - beg, 1);
  682. } else {
  683. add_index_bool(array, i, 0);
  684. }
  685. }
  686. }
  687. if (match_len == 0) {
  688. match_len = 1;
  689. }
  690. RETVAL_LONG(match_len);
  691. out:
  692. if (regs != NULL) {
  693. onig_region_free(regs, 1);
  694. }
  695. }
  696. /* }}} */
  697. /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
  698. Regular expression match for multibyte string */
  699. PHP_FUNCTION(mb_ereg)
  700. {
  701. _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  702. }
  703. /* }}} */
  704. /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
  705. Case-insensitive regular expression match for multibyte string */
  706. PHP_FUNCTION(mb_eregi)
  707. {
  708. _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  709. }
  710. /* }}} */
  711. /* {{{ _php_mb_regex_ereg_replace_exec */
  712. static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options)
  713. {
  714. zval **arg_pattern_zval;
  715. char *arg_pattern;
  716. int arg_pattern_len;
  717. char *replace;
  718. int replace_len;
  719. char *string;
  720. int string_len;
  721. char *p;
  722. php_mb_regex_t *re;
  723. OnigSyntaxType *syntax;
  724. OnigRegion *regs = NULL;
  725. smart_str out_buf = { 0 };
  726. smart_str eval_buf = { 0 };
  727. smart_str *pbuf;
  728. int i, err, eval, n;
  729. OnigUChar *pos;
  730. OnigUChar *string_lim;
  731. char *description = NULL;
  732. char pat_buf[2];
  733. const mbfl_encoding *enc;
  734. {
  735. const char *current_enc_name;
  736. current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
  737. if (current_enc_name == NULL ||
  738. (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
  739. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
  740. RETURN_FALSE;
  741. }
  742. }
  743. eval = 0;
  744. {
  745. char *option_str = NULL;
  746. int option_str_len = 0;
  747. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s",
  748. &arg_pattern_zval,
  749. &replace, &replace_len,
  750. &string, &string_len,
  751. &option_str, &option_str_len) == FAILURE) {
  752. RETURN_FALSE;
  753. }
  754. if (option_str != NULL) {
  755. _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
  756. } else {
  757. options |= MBREX(regex_default_options);
  758. syntax = MBREX(regex_default_syntax);
  759. }
  760. }
  761. if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) {
  762. arg_pattern = Z_STRVAL_PP(arg_pattern_zval);
  763. arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval);
  764. } else {
  765. /* FIXME: this code is not multibyte aware! */
  766. convert_to_long_ex(arg_pattern_zval);
  767. pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval);
  768. pat_buf[1] = '\0';
  769. arg_pattern = pat_buf;
  770. arg_pattern_len = 1;
  771. }
  772. /* create regex pattern buffer */
  773. re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
  774. if (re == NULL) {
  775. RETURN_FALSE;
  776. }
  777. if (eval) {
  778. pbuf = &eval_buf;
  779. description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);
  780. } else {
  781. pbuf = &out_buf;
  782. description = NULL;
  783. }
  784. /* do the actual work */
  785. err = 0;
  786. pos = (OnigUChar *)string;
  787. string_lim = (OnigUChar*)(string + string_len);
  788. regs = onig_region_new();
  789. while (err >= 0) {
  790. err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
  791. if (err <= -2) {
  792. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  793. onig_error_code_to_str(err_str, err);
  794. php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
  795. break;
  796. }
  797. if (err >= 0) {
  798. #if moriyoshi_0
  799. if (regs->beg[0] == regs->end[0]) {
  800. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
  801. break;
  802. }
  803. #endif
  804. /* copy the part of the string before the match */
  805. smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
  806. /* copy replacement and backrefs */
  807. i = 0;
  808. p = replace;
  809. while (i < replace_len) {
  810. int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
  811. n = -1;
  812. if ((replace_len - i) >= 2 && fwd == 1 &&
  813. p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
  814. n = p[1] - '0';
  815. }
  816. if (n >= 0 && n < regs->num_regs) {
  817. if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
  818. smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
  819. }
  820. p += 2;
  821. i += 2;
  822. } else {
  823. smart_str_appendl(pbuf, p, fwd);
  824. p += fwd;
  825. i += fwd;
  826. }
  827. }
  828. if (eval) {
  829. zval v;
  830. /* null terminate buffer */
  831. smart_str_0(&eval_buf);
  832. /* do eval */
  833. if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) {
  834. efree(description);
  835. php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c);
  836. /* zend_error() does not return in this case */
  837. }
  838. /* result of eval */
  839. convert_to_string(&v);
  840. smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
  841. /* Clean up */
  842. eval_buf.len = 0;
  843. zval_dtor(&v);
  844. }
  845. n = regs->end[0];
  846. if ((pos - (OnigUChar *)string) < n) {
  847. pos = (OnigUChar *)string + n;
  848. } else {
  849. if (pos < string_lim) {
  850. smart_str_appendl(&out_buf, pos, 1);
  851. }
  852. pos++;
  853. }
  854. } else { /* nomatch */
  855. /* stick that last bit of string on our output */
  856. if (string_lim - pos > 0) {
  857. smart_str_appendl(&out_buf, pos, string_lim - pos);
  858. }
  859. }
  860. onig_region_free(regs, 0);
  861. }
  862. if (description) {
  863. efree(description);
  864. }
  865. if (regs != NULL) {
  866. onig_region_free(regs, 1);
  867. }
  868. smart_str_free(&eval_buf);
  869. if (err <= -2) {
  870. smart_str_free(&out_buf);
  871. RETVAL_FALSE;
  872. } else {
  873. smart_str_appendc(&out_buf, '\0');
  874. RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0);
  875. }
  876. }
  877. /* }}} */
  878. /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
  879. Replace regular expression for multibyte string */
  880. PHP_FUNCTION(mb_ereg_replace)
  881. {
  882. _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  883. }
  884. /* }}} */
  885. /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
  886. Case insensitive replace regular expression for multibyte string */
  887. PHP_FUNCTION(mb_eregi_replace)
  888. {
  889. _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE);
  890. }
  891. /* }}} */
  892. /* {{{ proto array mb_split(string pattern, string string [, int limit])
  893. split multibyte string into array by regular expression */
  894. PHP_FUNCTION(mb_split)
  895. {
  896. char *arg_pattern;
  897. int arg_pattern_len;
  898. php_mb_regex_t *re;
  899. OnigRegion *regs = NULL;
  900. char *string;
  901. OnigUChar *pos;
  902. int string_len;
  903. int n, err;
  904. long count = -1;
  905. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
  906. RETURN_FALSE;
  907. }
  908. if (count == 0) {
  909. count = 1;
  910. }
  911. /* create regex pattern buffer */
  912. if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
  913. RETURN_FALSE;
  914. }
  915. array_init(return_value);
  916. pos = (OnigUChar *)string;
  917. err = 0;
  918. regs = onig_region_new();
  919. /* churn through str, generating array entries as we go */
  920. while ((--count != 0) &&
  921. (err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0)) >= 0) {
  922. if (regs->beg[0] == regs->end[0]) {
  923. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
  924. break;
  925. }
  926. /* add it to the array */
  927. if (regs->beg[0] < string_len && regs->beg[0] >= (pos - (OnigUChar *)string)) {
  928. add_next_index_stringl(return_value, (char *)pos, ((OnigUChar *)(string + regs->beg[0]) - pos), 1);
  929. } else {
  930. err = -2;
  931. break;
  932. }
  933. /* point at our new starting point */
  934. n = regs->end[0];
  935. if ((pos - (OnigUChar *)string) < n) {
  936. pos = (OnigUChar *)string + n;
  937. }
  938. if (count < 0) {
  939. count = 0;
  940. }
  941. onig_region_free(regs, 0);
  942. }
  943. onig_region_free(regs, 1);
  944. /* see if we encountered an error */
  945. if (err <= -2) {
  946. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  947. onig_error_code_to_str(err_str, err);
  948. php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
  949. zval_dtor(return_value);
  950. RETURN_FALSE;
  951. }
  952. /* otherwise we just have one last element to add to the array */
  953. n = ((OnigUChar *)(string + string_len) - pos);
  954. if (n > 0) {
  955. add_next_index_stringl(return_value, (char *)pos, n, 1);
  956. } else {
  957. add_next_index_stringl(return_value, "", 0, 1);
  958. }
  959. }
  960. /* }}} */
  961. /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
  962. Regular expression match for multibyte string */
  963. PHP_FUNCTION(mb_ereg_match)
  964. {
  965. char *arg_pattern;
  966. int arg_pattern_len;
  967. char *string;
  968. int string_len;
  969. php_mb_regex_t *re;
  970. OnigSyntaxType *syntax;
  971. OnigOptionType option = 0;
  972. int err;
  973. {
  974. char *option_str = NULL;
  975. int option_str_len = 0;
  976. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s",
  977. &arg_pattern, &arg_pattern_len, &string, &string_len,
  978. &option_str, &option_str_len)==FAILURE) {
  979. RETURN_FALSE;
  980. }
  981. if (option_str != NULL) {
  982. _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
  983. } else {
  984. option |= MBREX(regex_default_options);
  985. syntax = MBREX(regex_default_syntax);
  986. }
  987. }
  988. if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
  989. RETURN_FALSE;
  990. }
  991. /* match */
  992. err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
  993. if (err >= 0) {
  994. RETVAL_TRUE;
  995. } else {
  996. RETVAL_FALSE;
  997. }
  998. }
  999. /* }}} */
  1000. /* regex search */
  1001. /* {{{ _php_mb_regex_ereg_search_exec */
  1002. static void
  1003. _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
  1004. {
  1005. size_t argc = ZEND_NUM_ARGS();
  1006. char *arg_pattern, *arg_options;
  1007. int arg_pattern_len, arg_options_len;
  1008. int n, i, err, pos, len, beg, end;
  1009. OnigOptionType option;
  1010. OnigUChar *str;
  1011. OnigSyntaxType *syntax;
  1012. if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
  1013. return;
  1014. }
  1015. option = MBREX(regex_default_options);
  1016. if (argc == 2) {
  1017. option = 0;
  1018. _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
  1019. }
  1020. if (argc > 0) {
  1021. /* create regex pattern buffer */
  1022. if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
  1023. RETURN_FALSE;
  1024. }
  1025. }
  1026. pos = MBREX(search_pos);
  1027. str = NULL;
  1028. len = 0;
  1029. if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){
  1030. str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
  1031. len = Z_STRLEN_P(MBREX(search_str));
  1032. }
  1033. if (MBREX(search_re) == NULL) {
  1034. php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
  1035. RETURN_FALSE;
  1036. }
  1037. if (str == NULL) {
  1038. php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given");
  1039. RETURN_FALSE;
  1040. }
  1041. if (MBREX(search_regs)) {
  1042. onig_region_free(MBREX(search_regs), 1);
  1043. }
  1044. MBREX(search_regs) = onig_region_new();
  1045. err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
  1046. if (err == ONIG_MISMATCH) {
  1047. MBREX(search_pos) = len;
  1048. RETVAL_FALSE;
  1049. } else if (err <= -2) {
  1050. OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
  1051. onig_error_code_to_str(err_str, err);
  1052. php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
  1053. RETVAL_FALSE;
  1054. } else {
  1055. if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
  1056. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
  1057. }
  1058. switch (mode) {
  1059. case 1:
  1060. array_init(return_value);
  1061. beg = MBREX(search_regs)->beg[0];
  1062. end = MBREX(search_regs)->end[0];
  1063. add_next_index_long(return_value, beg);
  1064. add_next_index_long(return_value, end - beg);
  1065. break;
  1066. case 2:
  1067. array_init(return_value);
  1068. n = MBREX(search_regs)->num_regs;
  1069. for (i = 0; i < n; i++) {
  1070. beg = MBREX(search_regs)->beg[i];
  1071. end = MBREX(search_regs)->end[i];
  1072. if (beg >= 0 && beg <= end && end <= len) {
  1073. add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
  1074. } else {
  1075. add_index_bool(return_value, i, 0);
  1076. }
  1077. }
  1078. break;
  1079. default:
  1080. RETVAL_TRUE;
  1081. break;
  1082. }
  1083. end = MBREX(search_regs)->end[0];
  1084. if (pos < end) {
  1085. MBREX(search_pos) = end;
  1086. } else {
  1087. MBREX(search_pos) = pos + 1;
  1088. }
  1089. }
  1090. if (err < 0) {
  1091. onig_region_free(MBREX(search_regs), 1);
  1092. MBREX(search_regs) = (OnigRegion *)NULL;
  1093. }
  1094. }
  1095. /* }}} */
  1096. /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
  1097. Regular expression search for multibyte string */
  1098. PHP_FUNCTION(mb_ereg_search)
  1099. {
  1100. _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
  1101. }
  1102. /* }}} */
  1103. /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
  1104. Regular expression search for multibyte string */
  1105. PHP_FUNCTION(mb_ereg_search_pos)
  1106. {
  1107. _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
  1108. }
  1109. /* }}} */
  1110. /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
  1111. Regular expression search for multibyte string */
  1112. PHP_FUNCTION(mb_ereg_search_regs)
  1113. {
  1114. _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
  1115. }
  1116. /* }}} */
  1117. /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
  1118. Initialize string and regular expression for search. */
  1119. PHP_FUNCTION(mb_ereg_search_init)
  1120. {
  1121. size_t argc = ZEND_NUM_ARGS();
  1122. zval *arg_str;
  1123. char *arg_pattern, *arg_options;
  1124. int arg_pattern_len, arg_options_len;
  1125. OnigSyntaxType *syntax = NULL;
  1126. OnigOptionType option;
  1127. if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
  1128. return;
  1129. }
  1130. option = MBREX(regex_default_options);
  1131. syntax = MBREX(regex_default_syntax);
  1132. if (argc == 3) {
  1133. option = 0;
  1134. _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
  1135. }
  1136. if (argc > 1) {
  1137. /* create regex pattern buffer */
  1138. if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
  1139. RETURN_FALSE;
  1140. }
  1141. }
  1142. if (MBREX(search_str) != NULL) {
  1143. zval_ptr_dtor(&MBREX(search_str));
  1144. MBREX(search_str) = (zval *)NULL;
  1145. }
  1146. MBREX(search_str) = arg_str;
  1147. Z_ADDREF_P(MBREX(search_str));
  1148. SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
  1149. MBREX(search_pos) = 0;
  1150. if (MBREX(search_regs) != NULL) {
  1151. onig_region_free(MBREX(search_regs), 1);
  1152. MBREX(search_regs) = (OnigRegion *) NULL;
  1153. }
  1154. RETURN_TRUE;
  1155. }
  1156. /* }}} */
  1157. /* {{{ proto array mb_ereg_search_getregs(void)
  1158. Get matched substring of the last time */
  1159. PHP_FUNCTION(mb_ereg_search_getregs)
  1160. {
  1161. int n, i, len, beg, end;
  1162. OnigUChar *str;
  1163. if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) {
  1164. array_init(return_value);
  1165. str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
  1166. len = Z_STRLEN_P(MBREX(search_str));
  1167. n = MBREX(search_regs)->num_regs;
  1168. for (i = 0; i < n; i++) {
  1169. beg = MBREX(search_regs)->beg[i];
  1170. end = MBREX(search_regs)->end[i];
  1171. if (beg >= 0 && beg <= end && end <= len) {
  1172. add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
  1173. } else {
  1174. add_index_bool(return_value, i, 0);
  1175. }
  1176. }
  1177. } else {
  1178. RETVAL_FALSE;
  1179. }
  1180. }
  1181. /* }}} */
  1182. /* {{{ proto int mb_ereg_search_getpos(void)
  1183. Get search start position */
  1184. PHP_FUNCTION(mb_ereg_search_getpos)
  1185. {
  1186. RETVAL_LONG(MBREX(search_pos));
  1187. }
  1188. /* }}} */
  1189. /* {{{ proto bool mb_ereg_search_setpos(int position)
  1190. Set search start position */
  1191. PHP_FUNCTION(mb_ereg_search_setpos)
  1192. {
  1193. long position;
  1194. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) {
  1195. return;
  1196. }
  1197. if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) {
  1198. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
  1199. MBREX(search_pos) = 0;
  1200. RETURN_FALSE;
  1201. }
  1202. MBREX(search_pos) = position;
  1203. RETURN_TRUE;
  1204. }
  1205. /* }}} */
  1206. /* {{{ php_mb_regex_set_options */
  1207. static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC)
  1208. {
  1209. if (prev_options != NULL) {
  1210. *prev_options = MBREX(regex_default_options);
  1211. }
  1212. if (prev_syntax != NULL) {
  1213. *prev_syntax = MBREX(regex_default_syntax);
  1214. }
  1215. MBREX(regex_default_options) = options;
  1216. MBREX(regex_default_syntax) = syntax;
  1217. }
  1218. /* }}} */
  1219. /* {{{ proto string mb_regex_set_options([string options])
  1220. Set or get the default options for mbregex functions */
  1221. PHP_FUNCTION(mb_regex_set_options)
  1222. {
  1223. OnigOptionType opt;
  1224. OnigSyntaxType *syntax;
  1225. char *string = NULL;
  1226. int string_len;
  1227. char buf[16];
  1228. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s",
  1229. &string, &string_len) == FAILURE) {
  1230. RETURN_FALSE;
  1231. }
  1232. if (string != NULL) {
  1233. opt = 0;
  1234. syntax = NULL;
  1235. _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
  1236. _php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
  1237. } else {
  1238. opt = MBREX(regex_default_options);
  1239. syntax = MBREX(regex_default_syntax);
  1240. }
  1241. _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
  1242. RETVAL_STRING(buf, 1);
  1243. }
  1244. /* }}} */
  1245. #endif /* HAVE_MBREGEX */
  1246. /*
  1247. * Local variables:
  1248. * tab-width: 4
  1249. * c-basic-offset: 4
  1250. * End:
  1251. * vim600: fdm=marker
  1252. * vim: noet sw=4 ts=4
  1253. */