You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

213 lines
5.2 KiB

25 years ago
23 years ago
18 years ago
18 years ago
18 years ago
18 years ago
24 years ago
25 years ago
19 years ago
24 years ago
25 years ago
25 years ago
18 years ago
24 years ago
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2008 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Andrei Zmievski <andrei@php.net> |
  16. +----------------------------------------------------------------------+
  17. */
  18. /* $Id$ */
  19. #ifdef HAVE_CONFIG_H
  20. #include "config.h"
  21. #endif
  22. #include "php.h"
  23. #include "php_ini.h"
  24. #include "ext/standard/info.h"
  25. #include "php_tokenizer.h"
  26. #include "zend.h"
  27. #include "zend_language_scanner.h"
  28. #include "zend_language_scanner_defs.h"
  29. #include <zend_language_parser.h>
  30. #define zendtext LANG_SCNG(yy_text)
  31. #define zendleng LANG_SCNG(yy_leng)
  32. /* {{{ arginfo */
  33. static
  34. ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1)
  35. ZEND_ARG_INFO(0, source)
  36. ZEND_END_ARG_INFO()
  37. static
  38. ZEND_BEGIN_ARG_INFO_EX(arginfo_token_name, 0, 0, 1)
  39. ZEND_ARG_INFO(0, token)
  40. ZEND_END_ARG_INFO()
  41. /* }}} */
  42. /* {{{ tokenizer_functions[]
  43. *
  44. * Every user visible function must have an entry in tokenizer_functions[].
  45. */
  46. const zend_function_entry tokenizer_functions[] = {
  47. PHP_FE(token_get_all, arginfo_token_get_all)
  48. PHP_FE(token_name, arginfo_token_name)
  49. {NULL, NULL, NULL} /* Must be the last line in tokenizer_functions[] */
  50. };
  51. /* }}} */
  52. /* {{{ tokenizer_module_entry
  53. */
  54. zend_module_entry tokenizer_module_entry = {
  55. #if ZEND_MODULE_API_NO >= 20010901
  56. STANDARD_MODULE_HEADER,
  57. #endif
  58. "tokenizer",
  59. tokenizer_functions,
  60. PHP_MINIT(tokenizer),
  61. NULL,
  62. NULL,
  63. NULL,
  64. PHP_MINFO(tokenizer),
  65. #if ZEND_MODULE_API_NO >= 20010901
  66. "0.1", /* Replace with version number for your extension */
  67. #endif
  68. STANDARD_MODULE_PROPERTIES
  69. };
  70. /* }}} */
  71. #ifdef COMPILE_DL_TOKENIZER
  72. ZEND_GET_MODULE(tokenizer)
  73. #endif
  74. /* {{{ PHP_MINIT_FUNCTION
  75. */
  76. PHP_MINIT_FUNCTION(tokenizer)
  77. {
  78. tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU);
  79. return SUCCESS;
  80. }
  81. /* }}} */
  82. /* {{{ PHP_MINFO_FUNCTION
  83. */
  84. PHP_MINFO_FUNCTION(tokenizer)
  85. {
  86. php_info_print_table_start();
  87. php_info_print_table_row(2, "Tokenizer Support", "enabled");
  88. php_info_print_table_end();
  89. }
  90. /* }}} */
  91. static void tokenize(zval *return_value TSRMLS_DC)
  92. {
  93. zval token;
  94. zval *keyword;
  95. int token_type;
  96. zend_bool destroy;
  97. int token_line = 1;
  98. array_init(return_value);
  99. ZVAL_NULL(&token);
  100. while ((token_type = lex_scan(&token TSRMLS_CC))) {
  101. destroy = 1;
  102. switch (token_type) {
  103. case T_CLOSE_TAG:
  104. if (zendtext[zendleng - 1] != '>') {
  105. CG(zend_lineno)++;
  106. }
  107. case T_OPEN_TAG:
  108. case T_OPEN_TAG_WITH_ECHO:
  109. case T_WHITESPACE:
  110. case T_COMMENT:
  111. case T_DOC_COMMENT:
  112. destroy = 0;
  113. break;
  114. }
  115. if (token_type >= 256) {
  116. MAKE_STD_ZVAL(keyword);
  117. array_init(keyword);
  118. add_next_index_long(keyword, token_type);
  119. if (token_type == T_END_HEREDOC || token_type == T_END_NOWDOC) {
  120. if (CG(increment_lineno)) {
  121. token_line = ++CG(zend_lineno);
  122. CG(increment_lineno) = 0;
  123. }
  124. add_next_index_stringl(keyword, Z_STRVAL(token), Z_STRLEN(token), 1);
  125. efree(Z_STRVAL(token));
  126. } else {
  127. add_next_index_stringl(keyword, zendtext, zendleng, 1);
  128. }
  129. add_next_index_long(keyword, token_line);
  130. add_next_index_zval(return_value, keyword);
  131. } else {
  132. add_next_index_stringl(return_value, zendtext, zendleng, 1);
  133. }
  134. if (destroy && Z_TYPE(token) != IS_NULL) {
  135. zval_dtor(&token);
  136. }
  137. ZVAL_NULL(&token);
  138. token_line = CG(zend_lineno);
  139. }
  140. }
  141. /* {{{ proto array token_get_all(string source)
  142. */
  143. PHP_FUNCTION(token_get_all)
  144. {
  145. char *source = NULL;
  146. int argc = ZEND_NUM_ARGS();
  147. int source_len;
  148. zval source_z;
  149. zend_lex_state original_lex_state;
  150. if (zend_parse_parameters(argc TSRMLS_CC, "s", &source, &source_len) == FAILURE)
  151. return;
  152. ZVAL_STRINGL(&source_z, source, source_len, 1);
  153. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  154. if (zend_prepare_string_for_scanning(&source_z, "" TSRMLS_CC) == FAILURE) {
  155. RETURN_EMPTY_STRING();
  156. }
  157. LANG_SCNG(yy_state) = yycINITIAL;
  158. tokenize(return_value TSRMLS_CC);
  159. while (!zend_stack_is_empty(&LANG_SCNG(state_stack))) {
  160. zend_stack_del_top(&LANG_SCNG(state_stack));
  161. }
  162. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  163. zval_dtor(&source_z);
  164. }
  165. /* }}} */
  166. /* {{{ proto string token_name(int type)
  167. */
  168. PHP_FUNCTION(token_name)
  169. {
  170. int argc = ZEND_NUM_ARGS();
  171. long type;
  172. if (zend_parse_parameters(argc TSRMLS_CC, "l", &type) == FAILURE) {
  173. return;
  174. }
  175. RETVAL_STRING(get_token_type_name(type), 1);
  176. }
  177. /* }}} */
  178. /*
  179. * Local variables:
  180. * tab-width: 4
  181. * c-basic-offset: 4
  182. * End:
  183. * vim600: noet sw=4 ts=4 fdm=marker
  184. * vim<600: noet sw=4 ts=4
  185. */