You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2112 lines
51 KiB

27 years ago
17 years ago
27 years ago
24 years ago
23 years ago
24 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
25 years ago
25 years ago
27 years ago
27 years ago
23 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
24 years ago
27 years ago
25 years ago
27 years ago
24 years ago
27 years ago
27 years ago
23 years ago
25 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
23 years ago
23 years ago
23 years ago
23 years ago
27 years ago
27 years ago
27 years ago
27 years ago
24 years ago
27 years ago
23 years ago
27 years ago
27 years ago
27 years ago
27 years ago
24 years ago
27 years ago
27 years ago
23 years ago
27 years ago
27 years ago
23 years ago
23 years ago
23 years ago
23 years ago
23 years ago
23 years ago
27 years ago
24 years ago
27 years ago
27 years ago
25 years ago
25 years ago
27 years ago
18 years ago
27 years ago
27 years ago
27 years ago
24 years ago
27 years ago
27 years ago
23 years ago
26 years ago
27 years ago
26 years ago
27 years ago
24 years ago
27 years ago
27 years ago
27 years ago
24 years ago
27 years ago
25 years ago
24 years ago
27 years ago
27 years ago
24 years ago
27 years ago
25 years ago
24 years ago
27 years ago
27 years ago
27 years ago
18 years ago
18 years ago
27 years ago
27 years ago
27 years ago
18 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
18 years ago
27 years ago
27 years ago
27 years ago
27 years ago
23 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
26 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
23 years ago
24 years ago
24 years ago
24 years ago
23 years ago
24 years ago
23 years ago
23 years ago
27 years ago
27 years ago
23 years ago
27 years ago
27 years ago
27 years ago
27 years ago
18 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
26 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
22 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
23 years ago
23 years ago
23 years ago
27 years ago
27 years ago
23 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
27 years ago
18 years ago
18 years ago
18 years ago
18 years ago
18 years ago
18 years ago
27 years ago
27 years ago
27 years ago
25 years ago
27 years ago
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Zend Engine |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1998-2009 Zend Technologies Ltd. (http://www.zend.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 2.00 of the Zend license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.zend.com/license/2_00.txt. |
  11. | If you did not receive a copy of the Zend license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@zend.com so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Authors: Marcus Boerger <helly@php.net> |
  16. | Nuno Lopes <nlopess@php.net> |
  17. | Scott MacVicar <scottmac@php.net> |
  18. | Flex version authors: |
  19. | Andi Gutmans <andi@zend.com> |
  20. | Zeev Suraski <zeev@zend.com> |
  21. +----------------------------------------------------------------------+
  22. */
  23. /* $Id$ */
  24. #if 0
  25. # define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
  26. #else
  27. # define YYDEBUG(s, c)
  28. #endif
  29. #include "zend_language_scanner_defs.h"
  30. #include <errno.h>
  31. #include "zend.h"
  32. #include "zend_alloc.h"
  33. #include <zend_language_parser.h>
  34. #include "zend_compile.h"
  35. #include "zend_language_scanner.h"
  36. #include "zend_highlight.h"
  37. #include "zend_constants.h"
  38. #include "zend_variables.h"
  39. #include "zend_operators.h"
  40. #include "zend_API.h"
  41. #include "zend_strtod.h"
  42. #include "zend_exceptions.h"
  43. #include "tsrm_virtual_cwd.h"
  44. #include "tsrm_config_common.h"
  45. #define YYCTYPE unsigned char
  46. #define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { zend_error(E_COMPILE_ERROR, "Exceeded YYLIMIT bounds during scanning. Please report this."); return 0; } }
  47. #define YYCURSOR SCNG(yy_cursor)
  48. #define YYLIMIT SCNG(yy_limit)
  49. #define YYMARKER SCNG(yy_marker)
  50. #define YYGETCONDITION() SCNG(yy_state)
  51. #define YYSETCONDITION(s) SCNG(yy_state) = s
  52. #define STATE(name) yyc##name
  53. /* emulate flex constructs */
  54. #define BEGIN(state) YYSETCONDITION(STATE(state))
  55. #define YYSTATE YYGETCONDITION()
  56. #define yytext ((char*)SCNG(yy_text))
  57. #define yyleng SCNG(yy_leng)
  58. #define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \
  59. yyleng = (unsigned int)x; } while(0)
  60. #define yymore() goto yymore_restart
  61. /* perform sanity check. If this message is triggered you should
  62. increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
  63. /*!max:re2c */
  64. #if ZEND_MMAP_AHEAD < YYMAXFILL
  65. # error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
  66. #endif
  67. #ifdef HAVE_STDARG_H
  68. # include <stdarg.h>
  69. #endif
  70. #ifdef HAVE_UNISTD_H
  71. # include <unistd.h>
  72. #endif
  73. /* Globals Macros */
  74. #define SCNG LANG_SCNG
  75. #ifdef ZTS
  76. ZEND_API ts_rsrc_id language_scanner_globals_id;
  77. #else
  78. ZEND_API zend_php_scanner_globals language_scanner_globals;
  79. #endif
  80. #define HANDLE_NEWLINES(s, l) \
  81. do { \
  82. char *p = (s), *boundary = p+(l); \
  83. \
  84. while (p<boundary) { \
  85. if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \
  86. CG(zend_lineno)++; \
  87. } \
  88. p++; \
  89. } \
  90. } while (0)
  91. #define HANDLE_NEWLINE(c) \
  92. { \
  93. if (c == '\n' || c == '\r') { \
  94. CG(zend_lineno)++; \
  95. } \
  96. }
  97. #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7')
  98. #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
  99. BEGIN_EXTERN_C()
  100. static void _yy_push_state(int new_state TSRMLS_DC)
  101. {
  102. zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
  103. YYSETCONDITION(new_state);
  104. }
  105. #define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
  106. static void yy_pop_state(TSRMLS_D)
  107. {
  108. int *stack_state;
  109. zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
  110. YYSETCONDITION(*stack_state);
  111. zend_stack_del_top(&SCNG(state_stack));
  112. }
  113. static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
  114. {
  115. YYCURSOR = (YYCTYPE*)str;
  116. SCNG(yy_start) = YYCURSOR;
  117. YYLIMIT = YYCURSOR + len;
  118. }
  119. void startup_scanner(TSRMLS_D)
  120. {
  121. CG(heredoc) = NULL;
  122. CG(heredoc_len) = 0;
  123. CG(doc_comment) = NULL;
  124. CG(doc_comment_len) = 0;
  125. zend_llist_init(&SCNG(used_state_stacks), sizeof(zend_stack), (llist_dtor_func_t) zend_stack_destroy, 0);
  126. zend_stack_init(&SCNG(state_stack));
  127. zend_llist_add_element(&SCNG(used_state_stacks), &SCNG(state_stack));
  128. }
  129. void shutdown_scanner(TSRMLS_D)
  130. {
  131. if (CG(heredoc)) {
  132. efree(CG(heredoc));
  133. CG(heredoc_len)=0;
  134. }
  135. zend_llist_destroy(&SCNG(used_state_stacks));
  136. RESET_DOC_COMMENT();
  137. }
  138. static int compare_stacks(zend_stack *stack1, zend_stack *stack2)
  139. {
  140. return (stack1 == stack2);
  141. }
  142. ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
  143. {
  144. lex_state->yy_leng = SCNG(yy_leng);
  145. lex_state->yy_start = SCNG(yy_start);
  146. lex_state->yy_text = SCNG(yy_text);
  147. lex_state->yy_cursor = SCNG(yy_cursor);
  148. lex_state->yy_marker = SCNG(yy_marker);
  149. lex_state->yy_limit = SCNG(yy_limit);
  150. lex_state->state_stack = SCNG(state_stack);
  151. zend_stack_init(&SCNG(state_stack));
  152. zend_llist_add_element(&SCNG(used_state_stacks), &SCNG(state_stack));
  153. lex_state->in = SCNG(yy_in);
  154. lex_state->yy_state = YYSTATE;
  155. lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
  156. lex_state->lineno = CG(zend_lineno);
  157. #ifdef ZEND_MULTIBYTE
  158. lex_state->script_org = SCNG(script_org);
  159. lex_state->script_org_size = SCNG(script_org_size);
  160. lex_state->script_filtered = SCNG(script_filtered);
  161. lex_state->script_filtered_size = SCNG(script_filtered_size);
  162. lex_state->input_filter = SCNG(input_filter);
  163. lex_state->output_filter = SCNG(output_filter);
  164. lex_state->script_encoding = SCNG(script_encoding);
  165. lex_state->internal_encoding = SCNG(internal_encoding);
  166. #endif /* ZEND_MULTIBYTE */
  167. }
  168. ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
  169. {
  170. SCNG(yy_leng) = lex_state->yy_leng;
  171. SCNG(yy_start) = lex_state->yy_start;
  172. SCNG(yy_text) = lex_state->yy_text;
  173. SCNG(yy_cursor) = lex_state->yy_cursor;
  174. SCNG(yy_marker) = lex_state->yy_marker;
  175. SCNG(yy_limit) = lex_state->yy_limit;
  176. zend_llist_del_element(&SCNG(used_state_stacks), &SCNG(state_stack), (int (*)(void *, void *)) compare_stacks);
  177. SCNG(state_stack) = lex_state->state_stack;
  178. SCNG(yy_in) = lex_state->in;
  179. YYSETCONDITION(lex_state->yy_state);
  180. CG(zend_lineno) = lex_state->lineno;
  181. zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
  182. #ifdef ZEND_MULTIBYTE
  183. if (SCNG(script_org)) {
  184. efree(SCNG(script_org));
  185. SCNG(script_org) = NULL;
  186. }
  187. if (SCNG(script_filtered)) {
  188. efree(SCNG(script_filtered));
  189. SCNG(script_filtered) = NULL;
  190. }
  191. SCNG(script_org) = lex_state->script_org;
  192. SCNG(script_org_size) = lex_state->script_org_size;
  193. SCNG(script_filtered) = lex_state->script_filtered;
  194. SCNG(script_filtered_size) = lex_state->script_filtered_size;
  195. SCNG(input_filter) = lex_state->input_filter;
  196. SCNG(output_filter) = lex_state->output_filter;
  197. SCNG(script_encoding) = lex_state->script_encoding;
  198. SCNG(internal_encoding) = lex_state->internal_encoding;
  199. #endif /* ZEND_MULTIBYTE */
  200. }
  201. ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
  202. {
  203. zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
  204. /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
  205. file_handle->opened_path = NULL;
  206. if (file_handle->free_filename) {
  207. file_handle->filename = NULL;
  208. }
  209. }
  210. ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
  211. {
  212. char *file_path = NULL, *buf;
  213. size_t size;
  214. if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
  215. return FAILURE;
  216. }
  217. zend_llist_add_element(&CG(open_files), file_handle);
  218. if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
  219. zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
  220. size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
  221. fh->handle.stream.handle = (void*)(((char*)fh) + diff);
  222. file_handle->handle.stream.handle = fh->handle.stream.handle;
  223. }
  224. /* Reset the scanner for scanning the new file */
  225. SCNG(yy_in) = file_handle;
  226. if (size != -1) {
  227. #ifdef ZEND_MULTIBYTE
  228. if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) != 0) {
  229. return FAILURE;
  230. }
  231. SCNG(yy_in) = NULL;
  232. zend_multibyte_set_filter(NULL TSRMLS_CC);
  233. if (!SCNG(input_filter)) {
  234. SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1);
  235. memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
  236. SCNG(script_filtered_size) = SCNG(script_org_size);
  237. } else {
  238. SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
  239. }
  240. yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC);
  241. #else /* !ZEND_MULTIBYTE */
  242. yy_scan_buffer(buf, size TSRMLS_CC);
  243. #endif /* ZEND_MULTIBYTE */
  244. } else {
  245. zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
  246. }
  247. BEGIN(INITIAL);
  248. if (file_handle->opened_path) {
  249. file_path = file_handle->opened_path;
  250. } else {
  251. file_path = file_handle->filename;
  252. }
  253. zend_set_compiled_filename(file_path TSRMLS_CC);
  254. if (CG(start_lineno)) {
  255. CG(zend_lineno) = CG(start_lineno);
  256. CG(start_lineno) = 0;
  257. } else {
  258. CG(zend_lineno) = 1;
  259. }
  260. CG(increment_lineno) = 0;
  261. return SUCCESS;
  262. }
  263. END_EXTERN_C()
  264. ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
  265. {
  266. zend_lex_state original_lex_state;
  267. zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
  268. zend_op_array *original_active_op_array = CG(active_op_array);
  269. zend_op_array *retval=NULL;
  270. int compiler_result;
  271. zend_bool compilation_successful=0;
  272. znode retval_znode;
  273. zend_bool original_in_compilation = CG(in_compilation);
  274. retval_znode.op_type = IS_CONST;
  275. retval_znode.u.constant.type = IS_LONG;
  276. retval_znode.u.constant.value.lval = 1;
  277. Z_UNSET_ISREF(retval_znode.u.constant);
  278. Z_SET_REFCOUNT(retval_znode.u.constant, 1);
  279. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  280. retval = op_array; /* success oriented */
  281. if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
  282. if (type==ZEND_REQUIRE) {
  283. zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
  284. zend_bailout();
  285. } else {
  286. zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
  287. }
  288. compilation_successful=0;
  289. } else {
  290. init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
  291. CG(in_compilation) = 1;
  292. CG(active_op_array) = op_array;
  293. compiler_result = zendparse(TSRMLS_C);
  294. zend_do_return(&retval_znode, 0 TSRMLS_CC);
  295. CG(in_compilation) = original_in_compilation;
  296. if (compiler_result==1) { /* parser error */
  297. zend_bailout();
  298. }
  299. compilation_successful=1;
  300. }
  301. if (retval) {
  302. CG(active_op_array) = original_active_op_array;
  303. if (compilation_successful) {
  304. pass_two(op_array TSRMLS_CC);
  305. zend_release_labels(TSRMLS_C);
  306. } else {
  307. efree(op_array);
  308. retval = NULL;
  309. }
  310. }
  311. if (compilation_successful) {
  312. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  313. }
  314. return retval;
  315. }
  316. zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
  317. {
  318. zend_file_handle file_handle;
  319. zval tmp;
  320. zend_op_array *retval;
  321. char *opened_path = NULL;
  322. if (filename->type != IS_STRING) {
  323. tmp = *filename;
  324. zval_copy_ctor(&tmp);
  325. convert_to_string(&tmp);
  326. filename = &tmp;
  327. }
  328. file_handle.filename = filename->value.str.val;
  329. file_handle.free_filename = 0;
  330. file_handle.type = ZEND_HANDLE_FILENAME;
  331. file_handle.opened_path = NULL;
  332. file_handle.handle.fp = NULL;
  333. retval = zend_compile_file(&file_handle, type TSRMLS_CC);
  334. if (retval && file_handle.handle.stream.handle) {
  335. int dummy = 1;
  336. if (!file_handle.opened_path) {
  337. file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len);
  338. }
  339. zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
  340. if (opened_path) {
  341. efree(opened_path);
  342. }
  343. }
  344. zend_destroy_file_handle(&file_handle TSRMLS_CC);
  345. if (filename==&tmp) {
  346. zval_dtor(&tmp);
  347. }
  348. return retval;
  349. }
  350. ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
  351. {
  352. /* enforce two trailing NULLs for flex... */
  353. str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD);
  354. memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);
  355. SCNG(yy_in)=NULL;
  356. #ifdef ZEND_MULTIBYTE
  357. SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val);
  358. SCNG(script_org_size) = str->value.str.len;
  359. zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC);
  360. if (!SCNG(input_filter)) {
  361. SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1);
  362. memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
  363. SCNG(script_filtered_size) = SCNG(script_org_size);
  364. } else {
  365. SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
  366. }
  367. yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC);
  368. #else /* !ZEND_MULTIBYTE */
  369. yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC);
  370. #endif /* ZEND_MULTIBYTE */
  371. zend_set_compiled_filename(filename TSRMLS_CC);
  372. CG(zend_lineno) = 1;
  373. CG(increment_lineno) = 0;
  374. return SUCCESS;
  375. }
  376. ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
  377. {
  378. size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
  379. #ifdef ZEND_MULTIBYTE
  380. if (SCNG(input_filter)) {
  381. size_t original_offset = offset, length = 0; do {
  382. unsigned char *p = NULL;
  383. SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC);
  384. if (!p) {
  385. break;
  386. }
  387. efree(p);
  388. if (length > original_offset) {
  389. offset--;
  390. } else if (length < original_offset) {
  391. offset++;
  392. }
  393. } while (original_offset != length);
  394. }
  395. #endif
  396. return offset;
  397. }
  398. zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
  399. {
  400. zend_lex_state original_lex_state;
  401. zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
  402. zend_op_array *original_active_op_array = CG(active_op_array);
  403. zend_op_array *retval;
  404. zval tmp;
  405. int compiler_result;
  406. zend_bool original_in_compilation = CG(in_compilation);
  407. if (source_string->value.str.len==0) {
  408. efree(op_array);
  409. return NULL;
  410. }
  411. CG(in_compilation) = 1;
  412. tmp = *source_string;
  413. zval_copy_ctor(&tmp);
  414. convert_to_string(&tmp);
  415. source_string = &tmp;
  416. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  417. if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
  418. efree(op_array);
  419. retval = NULL;
  420. } else {
  421. zend_bool orig_interactive = CG(interactive);
  422. CG(interactive) = 0;
  423. init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
  424. CG(interactive) = orig_interactive;
  425. CG(active_op_array) = op_array;
  426. BEGIN(ST_IN_SCRIPTING);
  427. compiler_result = zendparse(TSRMLS_C);
  428. #ifdef ZEND_MULTIBYTE
  429. if (SCNG(script_org)) {
  430. efree(SCNG(script_org));
  431. SCNG(script_org) = NULL;
  432. }
  433. if (SCNG(script_filtered)) {
  434. efree(SCNG(script_filtered));
  435. SCNG(script_filtered) = NULL;
  436. }
  437. #endif /* ZEND_MULTIBYTE */
  438. if (compiler_result==1) {
  439. CG(active_op_array) = original_active_op_array;
  440. CG(unclean_shutdown)=1;
  441. retval = NULL;
  442. } else {
  443. zend_do_return(NULL, 0 TSRMLS_CC);
  444. CG(active_op_array) = original_active_op_array;
  445. pass_two(op_array TSRMLS_CC);
  446. zend_release_labels(TSRMLS_C);
  447. retval = op_array;
  448. }
  449. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  450. }
  451. zval_dtor(&tmp);
  452. CG(in_compilation) = original_in_compilation;
  453. return retval;
  454. }
  455. BEGIN_EXTERN_C()
  456. int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
  457. {
  458. zend_lex_state original_lex_state;
  459. zend_file_handle file_handle;
  460. file_handle.type = ZEND_HANDLE_FILENAME;
  461. file_handle.filename = filename;
  462. file_handle.free_filename = 0;
  463. file_handle.opened_path = NULL;
  464. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  465. if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
  466. zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
  467. return FAILURE;
  468. }
  469. zend_highlight(syntax_highlighter_ini TSRMLS_CC);
  470. #ifdef ZEND_MULTIBYTE
  471. if (SCNG(script_org)) {
  472. efree(SCNG(script_org));
  473. SCNG(script_org) = NULL;
  474. }
  475. if (SCNG(script_filtered)) {
  476. efree(SCNG(script_filtered));
  477. SCNG(script_filtered) = NULL;
  478. }
  479. #endif /* ZEND_MULTIBYTE */
  480. zend_destroy_file_handle(&file_handle TSRMLS_CC);
  481. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  482. return SUCCESS;
  483. }
  484. int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
  485. {
  486. zend_lex_state original_lex_state;
  487. zval tmp = *str;
  488. str = &tmp;
  489. zval_copy_ctor(str);
  490. zend_save_lexical_state(&original_lex_state TSRMLS_CC);
  491. if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
  492. return FAILURE;
  493. }
  494. BEGIN(INITIAL);
  495. zend_highlight(syntax_highlighter_ini TSRMLS_CC);
  496. #ifdef ZEND_MULTIBYTE
  497. if (SCNG(script_org)) {
  498. efree(SCNG(script_org));
  499. SCNG(script_org) = NULL;
  500. }
  501. if (SCNG(script_filtered)) {
  502. efree(SCNG(script_filtered));
  503. SCNG(script_filtered) = NULL;
  504. }
  505. #endif /* ZEND_MULTIBYTE */
  506. zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
  507. zval_dtor(str);
  508. return SUCCESS;
  509. }
  510. END_EXTERN_C()
  511. #ifdef ZEND_MULTIBYTE
  512. BEGIN_EXTERN_C()
  513. ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC)
  514. {
  515. size_t original_offset, offset, free_flag, new_len, length;
  516. unsigned char *p;
  517. /* calculate current position */
  518. offset = original_offset = YYCURSOR - SCNG(yy_start);
  519. if (old_input_filter && offset > 0) {
  520. zend_encoding *new_encoding = SCNG(script_encoding);
  521. zend_encoding_filter new_filter = SCNG(input_filter);
  522. SCNG(script_encoding) = old_encoding;
  523. SCNG(input_filter) = old_input_filter;
  524. offset = zend_get_scanned_file_offset(TSRMLS_C);
  525. SCNG(script_encoding) = new_encoding;
  526. SCNG(input_filter) = new_filter;
  527. }
  528. /* convert and set */
  529. if (!SCNG(input_filter)) {
  530. length = SCNG(script_org_size) - offset;
  531. p = SCNG(script_org) + offset;
  532. free_flag = 0;
  533. } else {
  534. SCNG(input_filter)(&p, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC);
  535. free_flag = 1;
  536. }
  537. new_len = original_offset + length;
  538. if (new_len > YYLIMIT - SCNG(yy_start)) {
  539. unsigned char *new_yy_start = erealloc(SCNG(yy_start), new_len);
  540. SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
  541. SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
  542. SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
  543. SCNG(yy_start) = new_yy_start;
  544. SCNG(script_filtered) = new_yy_start;
  545. SCNG(script_filtered_size) = new_len;
  546. }
  547. SCNG(yy_limit) = SCNG(yy_start) + new_len;
  548. memmove(SCNG(yy_start) + original_offset, p, length);
  549. if (free_flag) {
  550. efree(p);
  551. }
  552. }
  553. ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC)
  554. {
  555. size_t n;
  556. if (CG(interactive) == 0) {
  557. if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) {
  558. return FAILURE;
  559. }
  560. n = len;
  561. return n;
  562. }
  563. /* interactive */
  564. if (SCNG(script_org)) {
  565. efree(SCNG(script_org));
  566. }
  567. if (SCNG(script_filtered)) {
  568. efree(SCNG(script_filtered));
  569. }
  570. SCNG(script_org) = NULL;
  571. SCNG(script_org_size) = 0;
  572. /* TODO: support widechars */
  573. if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) {
  574. return FAILURE;
  575. }
  576. n = len;
  577. SCNG(script_org_size) = n;
  578. SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1);
  579. memcpy(SCNG(script_org), buf, n);
  580. return n;
  581. }
  582. ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC)
  583. {
  584. if (SCNG(script_org)) {
  585. efree(SCNG(script_org));
  586. }
  587. SCNG(script_org_size) = n;
  588. SCNG(script_org) = (unsigned char*)erealloc(SCNG(script_org), SCNG(script_org_size));
  589. memcpy(SCNG(script_org) + SCNG(script_org_size) - n, buf, n);
  590. SCNG(script_org) = (unsigned char*)erealloc(SCNG(script_org), SCNG(script_org_size) + 1);
  591. *(SCNG(script_org)+SCNG(script_org_size)) = '\0';
  592. return 0;
  593. }
  594. # define zend_copy_value(zendlval, yytext, yyleng) \
  595. if (SCNG(output_filter)) { \
  596. size_t sz = 0; \
  597. SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
  598. zendlval->value.str.len = sz; \
  599. } else { \
  600. zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \
  601. zendlval->value.str.len = yyleng; \
  602. }
  603. #else /* ZEND_MULTIBYTE */
  604. # define zend_copy_value(zendlval, yytext, yyleng) \
  605. zendlval->value.str.val = (char *)estrndup(yytext, yyleng); \
  606. zendlval->value.str.len = yyleng;
  607. #endif /* ZEND_MULTIBYTE */
  608. static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
  609. {
  610. register char *s, *t;
  611. char *end;
  612. ZVAL_STRINGL(zendlval, str, len, 1);
  613. /* convert escape sequences */
  614. s = t = zendlval->value.str.val;
  615. end = s+zendlval->value.str.len;
  616. while (s<end) {
  617. if (*s=='\\') {
  618. s++;
  619. if (s >= end) {
  620. *t++ = '\\';
  621. break;
  622. }
  623. switch(*s) {
  624. case 'n':
  625. *t++ = '\n';
  626. zendlval->value.str.len--;
  627. break;
  628. case 'r':
  629. *t++ = '\r';
  630. zendlval->value.str.len--;
  631. break;
  632. case 't':
  633. *t++ = '\t';
  634. zendlval->value.str.len--;
  635. break;
  636. case 'f':
  637. *t++ = '\f';
  638. zendlval->value.str.len--;
  639. break;
  640. case 'v':
  641. *t++ = '\v';
  642. zendlval->value.str.len--;
  643. break;
  644. case '"':
  645. case '`':
  646. if (*s != quote_type) {
  647. *t++ = '\\';
  648. *t++ = *s;
  649. break;
  650. }
  651. case '\\':
  652. case '$':
  653. *t++ = *s;
  654. zendlval->value.str.len--;
  655. break;
  656. case 'x':
  657. case 'X':
  658. if (ZEND_IS_HEX(*(s+1))) {
  659. char hex_buf[3] = { 0, 0, 0 };
  660. zendlval->value.str.len--; /* for the 'x' */
  661. hex_buf[0] = *(++s);
  662. zendlval->value.str.len--;
  663. if (ZEND_IS_HEX(*(s+1))) {
  664. hex_buf[1] = *(++s);
  665. zendlval->value.str.len--;
  666. }
  667. *t++ = (char) strtol(hex_buf, NULL, 16);
  668. } else {
  669. *t++ = '\\';
  670. *t++ = *s;
  671. }
  672. break;
  673. default:
  674. /* check for an octal */
  675. if (ZEND_IS_OCT(*s)) {
  676. char octal_buf[4] = { 0, 0, 0, 0 };
  677. octal_buf[0] = *s;
  678. zendlval->value.str.len--;
  679. if (ZEND_IS_OCT(*(s+1))) {
  680. octal_buf[1] = *(++s);
  681. zendlval->value.str.len--;
  682. if (ZEND_IS_OCT(*(s+1))) {
  683. octal_buf[2] = *(++s);
  684. zendlval->value.str.len--;
  685. }
  686. }
  687. *t++ = (char) strtol(octal_buf, NULL, 8);
  688. } else {
  689. *t++ = '\\';
  690. *t++ = *s;
  691. }
  692. break;
  693. }
  694. } else {
  695. *t++ = *s;
  696. }
  697. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  698. CG(zend_lineno)++;
  699. }
  700. s++;
  701. }
  702. *t = 0;
  703. #ifdef ZEND_MULTIBYTE
  704. if (SCNG(output_filter)) {
  705. size_t sz = 0;
  706. s = zendlval->value.str.val;
  707. SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
  708. zendlval->value.str.len = sz;
  709. efree(s);
  710. }
  711. #endif /* ZEND_MULTIBYTE */
  712. }
  713. int lex_scan(zval *zendlval TSRMLS_DC)
  714. {
  715. restart:
  716. SCNG(yy_text) = YYCURSOR;
  717. yymore_restart:
  718. /*!re2c
  719. re2c:yyfill:check = 0;
  720. LNUM [0-9]+
  721. DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
  722. EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
  723. HNUM "0x"[0-9a-fA-F]+
  724. LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
  725. WHITESPACE [ \n\r\t]+
  726. TABS_AND_SPACES [ \t]*
  727. TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
  728. ANY_CHAR [^\x00]
  729. NEWLINE ("\r"|"\n"|"\r\n")
  730. NULL [\x00]{1}
  731. /*
  732. * LITERAL_DOLLAR matches unescaped $ that aren't followed by a label character
  733. * or a { and therefore will be taken literally. The case of literal $ before
  734. * a variable or "${" is handled in a rule for each string type
  735. */
  736. DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$"\\{\x00]|("\\"{ANY_CHAR})))
  737. BACKQUOTE_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$`\\{\x00]|("\\"{ANY_CHAR})))
  738. HEREDOC_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$\n\r\\{\x00]|("\\"[^\n\r\x00])))
  739. /*
  740. * Usually, HEREDOC_NEWLINE will just function like a simple NEWLINE, but some
  741. * special cases need to be handled. HEREDOC_CHARS doesn't allow a line to
  742. * match when { or $, and/or \ is at the end. (("{"*|"$"*)"\\"?) handles that,
  743. * along with cases where { or $, and/or \ is the ONLY thing on a line
  744. *
  745. * The other case is when a line contains a label, followed by ONLY
  746. * { or $, and/or \ Handled by ({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))
  747. */
  748. HEREDOC_NEWLINE ((({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))|(("{"*|"$"*)"\\"?)){NEWLINE})
  749. /*
  750. * This pattern is just used in the next 2 for matching { or literal $, and/or
  751. * \ escape sequence immediately at the beginning of a line or after a label
  752. */
  753. HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR (("{"+[^$\n\r\\{\x00])|("{"*"\\"[^\n\r\x00])|{HEREDOC_LITERAL_DOLLAR})
  754. /*
  755. * These 2 label-related patterns allow HEREDOC_CHARS to continue "regular"
  756. * matching after a newline that starts with either a non-label character or a
  757. * label that isn't followed by a newline. Like HEREDOC_CHARS, they won't match
  758. * a variable or "{$" Matching a newline, and possibly label, up TO a variable
  759. * or "{$", is handled in the heredoc rules
  760. *
  761. * The HEREDOC_LABEL_NO_NEWLINE pattern (";"[^$\n\r\\{\x00]) handles cases where ;
  762. * follows a label. [^a-zA-Z0-9_\x7f-\xff;$\n\r\\{\x00] is needed to prevent a label
  763. * character or ; from matching on a possible (real) ending label
  764. */
  765. HEREDOC_NON_LABEL ([^a-zA-Z_\x7f-\xff$\n\r\\{\x00]|{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})
  766. HEREDOC_LABEL_NO_NEWLINE ({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{\x00]|(";"[^$\n\r\\{\x00])|(";"?{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR})))
  767. /*
  768. * CHARS matches everything up to a variable or "{$"
  769. * {'s are matched as long as they aren't followed by a $
  770. * The case of { before "{$" is handled in a rule for each string type
  771. *
  772. * For heredocs, matching continues across/after newlines if/when it's known
  773. * that the next line doesn't contain a possible ending label
  774. */
  775. DOUBLE_QUOTES_CHARS ("{"*([^$"\\{\x00]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR})
  776. BACKQUOTE_CHARS ("{"*([^$`\\{\x00]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
  777. HEREDOC_CHARS ("{"*([^$\n\r\\{\x00]|("\\"[^\n\r\x00]))|{HEREDOC_LITERAL_DOLLAR}|({HEREDOC_NEWLINE}+({HEREDOC_NON_LABEL}|{HEREDOC_LABEL_NO_NEWLINE})))
  778. NOWDOC_CHARS ([^\n\r\x00]|{NEWLINE}+([^a-zA-Z_\x7f-\xff\n\r\x00]|({LABEL}([^a-zA-Z0-9_\x7f-\xff;\n\r\x00]|(";"[^\n\r\x00])))))
  779. /* compute yyleng before each rule */
  780. <!*> := yyleng = YYCURSOR - SCNG(yy_text);
  781. <ST_IN_SCRIPTING>"exit" {
  782. return T_EXIT;
  783. }
  784. <ST_IN_SCRIPTING>"die" {
  785. return T_EXIT;
  786. }
  787. <ST_IN_SCRIPTING>"function" {
  788. return T_FUNCTION;
  789. }
  790. <ST_IN_SCRIPTING>"const" {
  791. return T_CONST;
  792. }
  793. <ST_IN_SCRIPTING>"return" {
  794. return T_RETURN;
  795. }
  796. <ST_IN_SCRIPTING>"try" {
  797. return T_TRY;
  798. }
  799. <ST_IN_SCRIPTING>"catch" {
  800. return T_CATCH;
  801. }
  802. <ST_IN_SCRIPTING>"throw" {
  803. return T_THROW;
  804. }
  805. <ST_IN_SCRIPTING>"if" {
  806. return T_IF;
  807. }
  808. <ST_IN_SCRIPTING>"elseif" {
  809. return T_ELSEIF;
  810. }
  811. <ST_IN_SCRIPTING>"endif" {
  812. return T_ENDIF;
  813. }
  814. <ST_IN_SCRIPTING>"else" {
  815. return T_ELSE;
  816. }
  817. <ST_IN_SCRIPTING>"while" {
  818. return T_WHILE;
  819. }
  820. <ST_IN_SCRIPTING>"endwhile" {
  821. return T_ENDWHILE;
  822. }
  823. <ST_IN_SCRIPTING>"do" {
  824. return T_DO;
  825. }
  826. <ST_IN_SCRIPTING>"for" {
  827. return T_FOR;
  828. }
  829. <ST_IN_SCRIPTING>"endfor" {
  830. return T_ENDFOR;
  831. }
  832. <ST_IN_SCRIPTING>"foreach" {
  833. return T_FOREACH;
  834. }
  835. <ST_IN_SCRIPTING>"endforeach" {
  836. return T_ENDFOREACH;
  837. }
  838. <ST_IN_SCRIPTING>"declare" {
  839. return T_DECLARE;
  840. }
  841. <ST_IN_SCRIPTING>"enddeclare" {
  842. return T_ENDDECLARE;
  843. }
  844. <ST_IN_SCRIPTING>"instanceof" {
  845. return T_INSTANCEOF;
  846. }
  847. <ST_IN_SCRIPTING>"as" {
  848. return T_AS;
  849. }
  850. <ST_IN_SCRIPTING>"switch" {
  851. return T_SWITCH;
  852. }
  853. <ST_IN_SCRIPTING>"endswitch" {
  854. return T_ENDSWITCH;
  855. }
  856. <ST_IN_SCRIPTING>"case" {
  857. return T_CASE;
  858. }
  859. <ST_IN_SCRIPTING>"default" {
  860. return T_DEFAULT;
  861. }
  862. <ST_IN_SCRIPTING>"break" {
  863. return T_BREAK;
  864. }
  865. <ST_IN_SCRIPTING>"continue" {
  866. return T_CONTINUE;
  867. }
  868. <ST_IN_SCRIPTING>"goto" {
  869. return T_GOTO;
  870. }
  871. <ST_IN_SCRIPTING>"echo" {
  872. return T_ECHO;
  873. }
  874. <ST_IN_SCRIPTING>"print" {
  875. return T_PRINT;
  876. }
  877. <ST_IN_SCRIPTING>"class" {
  878. return T_CLASS;
  879. }
  880. <ST_IN_SCRIPTING>"interface" {
  881. return T_INTERFACE;
  882. }
  883. <ST_IN_SCRIPTING>"extends" {
  884. return T_EXTENDS;
  885. }
  886. <ST_IN_SCRIPTING>"implements" {
  887. return T_IMPLEMENTS;
  888. }
  889. <ST_IN_SCRIPTING>"->" {
  890. yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
  891. return T_OBJECT_OPERATOR;
  892. }
  893. <ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
  894. zendlval->value.str.val = yytext; /* no copying - intentional */
  895. zendlval->value.str.len = yyleng;
  896. zendlval->type = IS_STRING;
  897. HANDLE_NEWLINES(yytext, yyleng);
  898. return T_WHITESPACE;
  899. }
  900. <ST_LOOKING_FOR_PROPERTY>"->" {
  901. return T_OBJECT_OPERATOR;
  902. }
  903. <ST_LOOKING_FOR_PROPERTY>{LABEL} {
  904. yy_pop_state(TSRMLS_C);
  905. zend_copy_value(zendlval, yytext, yyleng);
  906. zendlval->type = IS_STRING;
  907. return T_STRING;
  908. }
  909. <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
  910. yyless(0);
  911. yy_pop_state(TSRMLS_C);
  912. goto restart;
  913. }
  914. <ST_IN_SCRIPTING>"::" {
  915. return T_PAAMAYIM_NEKUDOTAYIM;
  916. }
  917. <ST_IN_SCRIPTING>"\\" {
  918. return T_NS_SEPARATOR;
  919. }
  920. <ST_IN_SCRIPTING>"new" {
  921. return T_NEW;
  922. }
  923. <ST_IN_SCRIPTING>"clone" {
  924. return T_CLONE;
  925. }
  926. <ST_IN_SCRIPTING>"var" {
  927. return T_VAR;
  928. }
  929. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
  930. return T_INT_CAST;
  931. }
  932. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
  933. return T_DOUBLE_CAST;
  934. }
  935. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"string"{TABS_AND_SPACES}")" {
  936. return T_STRING_CAST;
  937. }
  938. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"binary"{TABS_AND_SPACES}")" {
  939. return T_STRING_CAST;
  940. }
  941. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
  942. return T_ARRAY_CAST;
  943. }
  944. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
  945. return T_OBJECT_CAST;
  946. }
  947. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
  948. return T_BOOL_CAST;
  949. }
  950. <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
  951. return T_UNSET_CAST;
  952. }
  953. <ST_IN_SCRIPTING>"eval" {
  954. return T_EVAL;
  955. }
  956. <ST_IN_SCRIPTING>"include" {
  957. return T_INCLUDE;
  958. }
  959. <ST_IN_SCRIPTING>"include_once" {
  960. return T_INCLUDE_ONCE;
  961. }
  962. <ST_IN_SCRIPTING>"require" {
  963. return T_REQUIRE;
  964. }
  965. <ST_IN_SCRIPTING>"require_once" {
  966. return T_REQUIRE_ONCE;
  967. }
  968. <ST_IN_SCRIPTING>"namespace" {
  969. return T_NAMESPACE;
  970. }
  971. <ST_IN_SCRIPTING>"use" {
  972. return T_USE;
  973. }
  974. <ST_IN_SCRIPTING>"global" {
  975. return T_GLOBAL;
  976. }
  977. <ST_IN_SCRIPTING>"isset" {
  978. return T_ISSET;
  979. }
  980. <ST_IN_SCRIPTING>"empty" {
  981. return T_EMPTY;
  982. }
  983. <ST_IN_SCRIPTING>"__halt_compiler" {
  984. return T_HALT_COMPILER;
  985. }
  986. <ST_IN_SCRIPTING>"static" {
  987. return T_STATIC;
  988. }
  989. <ST_IN_SCRIPTING>"abstract" {
  990. return T_ABSTRACT;
  991. }
  992. <ST_IN_SCRIPTING>"final" {
  993. return T_FINAL;
  994. }
  995. <ST_IN_SCRIPTING>"private" {
  996. return T_PRIVATE;
  997. }
  998. <ST_IN_SCRIPTING>"protected" {
  999. return T_PROTECTED;
  1000. }
  1001. <ST_IN_SCRIPTING>"public" {
  1002. return T_PUBLIC;
  1003. }
  1004. <ST_IN_SCRIPTING>"unset" {
  1005. return T_UNSET;
  1006. }
  1007. <ST_IN_SCRIPTING>"=>" {
  1008. return T_DOUBLE_ARROW;
  1009. }
  1010. <ST_IN_SCRIPTING>"list" {
  1011. return T_LIST;
  1012. }
  1013. <ST_IN_SCRIPTING>"array" {
  1014. return T_ARRAY;
  1015. }
  1016. <ST_IN_SCRIPTING>"++" {
  1017. return T_INC;
  1018. }
  1019. <ST_IN_SCRIPTING>"--" {
  1020. return T_DEC;
  1021. }
  1022. <ST_IN_SCRIPTING>"===" {
  1023. return T_IS_IDENTICAL;
  1024. }
  1025. <ST_IN_SCRIPTING>"!==" {
  1026. return T_IS_NOT_IDENTICAL;
  1027. }
  1028. <ST_IN_SCRIPTING>"==" {
  1029. return T_IS_EQUAL;
  1030. }
  1031. <ST_IN_SCRIPTING>"!="|"<>" {
  1032. return T_IS_NOT_EQUAL;
  1033. }
  1034. <ST_IN_SCRIPTING>"<=" {
  1035. return T_IS_SMALLER_OR_EQUAL;
  1036. }
  1037. <ST_IN_SCRIPTING>">=" {
  1038. return T_IS_GREATER_OR_EQUAL;
  1039. }
  1040. <ST_IN_SCRIPTING>"+=" {
  1041. return T_PLUS_EQUAL;
  1042. }
  1043. <ST_IN_SCRIPTING>"-=" {
  1044. return T_MINUS_EQUAL;
  1045. }
  1046. <ST_IN_SCRIPTING>"*=" {
  1047. return T_MUL_EQUAL;
  1048. }
  1049. <ST_IN_SCRIPTING>"/=" {
  1050. return T_DIV_EQUAL;
  1051. }
  1052. <ST_IN_SCRIPTING>".=" {
  1053. return T_CONCAT_EQUAL;
  1054. }
  1055. <ST_IN_SCRIPTING>"%=" {
  1056. return T_MOD_EQUAL;
  1057. }
  1058. <ST_IN_SCRIPTING>"<<=" {
  1059. return T_SL_EQUAL;
  1060. }
  1061. <ST_IN_SCRIPTING>">>=" {
  1062. return T_SR_EQUAL;
  1063. }
  1064. <ST_IN_SCRIPTING>"&=" {
  1065. return T_AND_EQUAL;
  1066. }
  1067. <ST_IN_SCRIPTING>"|=" {
  1068. return T_OR_EQUAL;
  1069. }
  1070. <ST_IN_SCRIPTING>"^=" {
  1071. return T_XOR_EQUAL;
  1072. }
  1073. <ST_IN_SCRIPTING>"||" {
  1074. return T_BOOLEAN_OR;
  1075. }
  1076. <ST_IN_SCRIPTING>"&&" {
  1077. return T_BOOLEAN_AND;
  1078. }
  1079. <ST_IN_SCRIPTING>"OR" {
  1080. return T_LOGICAL_OR;
  1081. }
  1082. <ST_IN_SCRIPTING>"AND" {
  1083. return T_LOGICAL_AND;
  1084. }
  1085. <ST_IN_SCRIPTING>"XOR" {
  1086. return T_LOGICAL_XOR;
  1087. }
  1088. <ST_IN_SCRIPTING>"<<" {
  1089. return T_SL;
  1090. }
  1091. <ST_IN_SCRIPTING>">>" {
  1092. return T_SR;
  1093. }
  1094. <ST_IN_SCRIPTING>{TOKENS} {
  1095. return yytext[0];
  1096. }
  1097. <ST_IN_SCRIPTING>"{" {
  1098. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  1099. return '{';
  1100. }
  1101. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
  1102. yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
  1103. return T_DOLLAR_OPEN_CURLY_BRACES;
  1104. }
  1105. <ST_IN_SCRIPTING>"}" {
  1106. RESET_DOC_COMMENT();
  1107. if (!zend_stack_is_empty(&SCNG(state_stack))) {
  1108. yy_pop_state(TSRMLS_C);
  1109. }
  1110. return '}';
  1111. }
  1112. <ST_LOOKING_FOR_VARNAME>{LABEL} {
  1113. zend_copy_value(zendlval, yytext, yyleng);
  1114. zendlval->type = IS_STRING;
  1115. yy_pop_state(TSRMLS_C);
  1116. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  1117. return T_STRING_VARNAME;
  1118. }
  1119. <ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
  1120. yyless(0);
  1121. yy_pop_state(TSRMLS_C);
  1122. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  1123. goto restart;
  1124. }
  1125. <ST_IN_SCRIPTING>{LNUM} {
  1126. if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
  1127. zendlval->value.lval = strtol(yytext, NULL, 0);
  1128. } else {
  1129. errno = 0;
  1130. zendlval->value.lval = strtol(yytext, NULL, 0);
  1131. if (errno == ERANGE) { /* Overflow */
  1132. if (yytext[0] == '0') { /* octal overflow */
  1133. zendlval->value.dval = zend_oct_strtod(yytext, NULL);
  1134. } else {
  1135. zendlval->value.dval = zend_strtod(yytext, NULL);
  1136. }
  1137. zendlval->type = IS_DOUBLE;
  1138. return T_DNUMBER;
  1139. }
  1140. }
  1141. zendlval->type = IS_LONG;
  1142. return T_LNUMBER;
  1143. }
  1144. <ST_IN_SCRIPTING>{HNUM} {
  1145. char *hex = yytext + 2; /* Skip "0x" */
  1146. int len = yyleng - 2;
  1147. /* Skip any leading 0s */
  1148. while (*hex == '0') {
  1149. hex++;
  1150. len--;
  1151. }
  1152. if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
  1153. zendlval->value.lval = strtol(hex, NULL, 16);
  1154. zendlval->type = IS_LONG;
  1155. return T_LNUMBER;
  1156. } else {
  1157. zendlval->value.dval = zend_hex_strtod(hex, NULL);
  1158. zendlval->type = IS_DOUBLE;
  1159. return T_DNUMBER;
  1160. }
  1161. }
  1162. <ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
  1163. if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
  1164. zendlval->value.lval = strtol(yytext, NULL, 10);
  1165. zendlval->type = IS_LONG;
  1166. } else {
  1167. zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
  1168. zendlval->value.str.len = yyleng;
  1169. zendlval->type = IS_STRING;
  1170. }
  1171. return T_NUM_STRING;
  1172. }
  1173. <ST_VAR_OFFSET>{LNUM}|{HNUM} { /* Offset must be treated as a string */
  1174. zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
  1175. zendlval->value.str.len = yyleng;
  1176. zendlval->type = IS_STRING;
  1177. return T_NUM_STRING;
  1178. }
  1179. <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
  1180. zendlval->value.dval = zend_strtod(yytext, NULL);
  1181. zendlval->type = IS_DOUBLE;
  1182. return T_DNUMBER;
  1183. }
  1184. <ST_IN_SCRIPTING>"__CLASS__" {
  1185. char *class_name = NULL;
  1186. if (CG(active_class_entry)) {
  1187. class_name = CG(active_class_entry)->name;
  1188. }
  1189. if (!class_name) {
  1190. class_name = "";
  1191. }
  1192. zendlval->value.str.len = strlen(class_name);
  1193. zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len);
  1194. zendlval->type = IS_STRING;
  1195. return T_CLASS_C;
  1196. }
  1197. <ST_IN_SCRIPTING>"__FUNCTION__" {
  1198. char *func_name = NULL;
  1199. if (CG(active_op_array)) {
  1200. func_name = CG(active_op_array)->function_name;
  1201. }
  1202. if (!func_name) {
  1203. func_name = "";
  1204. }
  1205. zendlval->value.str.len = strlen(func_name);
  1206. zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len);
  1207. zendlval->type = IS_STRING;
  1208. return T_FUNC_C;
  1209. }
  1210. <ST_IN_SCRIPTING>"__METHOD__" {
  1211. char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
  1212. char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
  1213. size_t len = 0;
  1214. if (class_name) {
  1215. len += strlen(class_name) + 2;
  1216. }
  1217. if (func_name) {
  1218. len += strlen(func_name);
  1219. }
  1220. zendlval->value.str.len = zend_spprintf(&zendlval->value.str.val, 0, "%s%s%s",
  1221. class_name ? class_name : "",
  1222. class_name && func_name ? "::" : "",
  1223. func_name ? func_name : ""
  1224. );
  1225. zendlval->type = IS_STRING;
  1226. return T_METHOD_C;
  1227. }
  1228. <ST_IN_SCRIPTING>"__LINE__" {
  1229. zendlval->value.lval = CG(zend_lineno);
  1230. zendlval->type = IS_LONG;
  1231. return T_LINE;
  1232. }
  1233. <ST_IN_SCRIPTING>"__FILE__" {
  1234. char *filename = zend_get_compiled_filename(TSRMLS_C);
  1235. if (!filename) {
  1236. filename = "";
  1237. }
  1238. zendlval->value.str.len = strlen(filename);
  1239. zendlval->value.str.val = estrndup(filename, zendlval->value.str.len);
  1240. zendlval->type = IS_STRING;
  1241. return T_FILE;
  1242. }
  1243. <ST_IN_SCRIPTING>"__DIR__" {
  1244. char *filename = zend_get_compiled_filename(TSRMLS_C);
  1245. const size_t filename_len = strlen(filename);
  1246. char *dirname;
  1247. if (!filename) {
  1248. filename = "";
  1249. }
  1250. dirname = estrndup(filename, filename_len);
  1251. zend_dirname(dirname, filename_len);
  1252. if (strcmp(dirname, ".") == 0) {
  1253. dirname = erealloc(dirname, MAXPATHLEN);
  1254. #if HAVE_GETCWD
  1255. VCWD_GETCWD(dirname, MAXPATHLEN);
  1256. #elif HAVE_GETWD
  1257. VCWD_GETWD(dirname);
  1258. #endif
  1259. }
  1260. zendlval->value.str.len = strlen(dirname);
  1261. zendlval->value.str.val = dirname;
  1262. zendlval->type = IS_STRING;
  1263. return T_DIR;
  1264. }
  1265. <ST_IN_SCRIPTING>"__NAMESPACE__" {
  1266. if (CG(current_namespace)) {
  1267. *zendlval = *CG(current_namespace);
  1268. zval_copy_ctor(zendlval);
  1269. } else {
  1270. ZVAL_EMPTY_STRING(zendlval);
  1271. }
  1272. return T_NS_C;
  1273. }
  1274. <INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
  1275. HANDLE_NEWLINES(yytext, yyleng);
  1276. zendlval->value.str.val = yytext; /* no copying - intentional */
  1277. zendlval->value.str.len = yyleng;
  1278. zendlval->type = IS_STRING;
  1279. BEGIN(ST_IN_SCRIPTING);
  1280. return T_OPEN_TAG;
  1281. }
  1282. <INITIAL>"<%=" {
  1283. if (CG(asp_tags)) {
  1284. zendlval->value.str.val = yytext; /* no copying - intentional */
  1285. zendlval->value.str.len = yyleng;
  1286. zendlval->type = IS_STRING;
  1287. BEGIN(ST_IN_SCRIPTING);
  1288. return T_OPEN_TAG_WITH_ECHO;
  1289. } else {
  1290. goto inline_char_handler;
  1291. }
  1292. }
  1293. <INITIAL>"<?=" {
  1294. if (CG(short_tags)) {
  1295. zendlval->value.str.val = yytext; /* no copying - intentional */
  1296. zendlval->value.str.len = yyleng;
  1297. zendlval->type = IS_STRING;
  1298. BEGIN(ST_IN_SCRIPTING);
  1299. return T_OPEN_TAG_WITH_ECHO;
  1300. } else {
  1301. goto inline_char_handler;
  1302. }
  1303. }
  1304. <INITIAL>"<%" {
  1305. if (CG(asp_tags)) {
  1306. zendlval->value.str.val = yytext; /* no copying - intentional */
  1307. zendlval->value.str.len = yyleng;
  1308. zendlval->type = IS_STRING;
  1309. BEGIN(ST_IN_SCRIPTING);
  1310. return T_OPEN_TAG;
  1311. } else {
  1312. goto inline_char_handler;
  1313. }
  1314. }
  1315. <INITIAL>"<?php"([ \t]|{NEWLINE}) {
  1316. zendlval->value.str.val = yytext; /* no copying - intentional */
  1317. zendlval->value.str.len = yyleng;
  1318. zendlval->type = IS_STRING;
  1319. HANDLE_NEWLINE(yytext[yyleng-1]);
  1320. BEGIN(ST_IN_SCRIPTING);
  1321. return T_OPEN_TAG;
  1322. }
  1323. <INITIAL>"<?" {
  1324. if (CG(short_tags)) {
  1325. zendlval->value.str.val = yytext; /* no copying - intentional */
  1326. zendlval->value.str.len = yyleng;
  1327. zendlval->type = IS_STRING;
  1328. BEGIN(ST_IN_SCRIPTING);
  1329. return T_OPEN_TAG;
  1330. } else {
  1331. goto inline_char_handler;
  1332. }
  1333. }
  1334. <INITIAL>{ANY_CHAR} {
  1335. inline_char_handler:
  1336. while (1) {
  1337. YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
  1338. if (ptr == NULL) {
  1339. YYCURSOR = YYLIMIT;
  1340. yyleng = YYCURSOR - SCNG(yy_text);
  1341. break;
  1342. } else {
  1343. YYCURSOR = ptr + 1;
  1344. /* stop if it may be an opening tag (<?, <%, <script>). this condition is not optimal though */
  1345. if (YYCURSOR < YYLIMIT && (*YYCURSOR == '?' || *YYCURSOR == '%' || *YYCURSOR == 's')) {
  1346. --YYCURSOR;
  1347. yyleng = YYCURSOR - SCNG(yy_text);
  1348. break;
  1349. }
  1350. }
  1351. }
  1352. #ifdef ZEND_MULTIBYTE
  1353. if (SCNG(output_filter)) {
  1354. int readsize;
  1355. size_t sz = 0;
  1356. readsize = SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
  1357. zendlval->value.str.len = sz;
  1358. if (readsize < yyleng) {
  1359. yyless(readsize);
  1360. }
  1361. } else {
  1362. zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
  1363. zendlval->value.str.len = yyleng;
  1364. }
  1365. #else /* !ZEND_MULTIBYTE */
  1366. zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
  1367. zendlval->value.str.len = yyleng;
  1368. #endif
  1369. zendlval->type = IS_STRING;
  1370. HANDLE_NEWLINES(yytext, yyleng);
  1371. return T_INLINE_HTML;
  1372. }
  1373. /* Make sure a label character follows "->", otherwise there is no property
  1374. * and "->" will be taken literally
  1375. */
  1376. <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
  1377. yyless(yyleng - 3);
  1378. yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
  1379. zend_copy_value(zendlval, (yytext+1), (yyleng-1));
  1380. zendlval->type = IS_STRING;
  1381. return T_VARIABLE;
  1382. }
  1383. /* A [ always designates a variable offset, regardless of what follows
  1384. */
  1385. <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
  1386. yyless(yyleng - 1);
  1387. yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
  1388. zend_copy_value(zendlval, (yytext+1), (yyleng-1));
  1389. zendlval->type = IS_STRING;
  1390. return T_VARIABLE;
  1391. }
  1392. <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
  1393. zend_copy_value(zendlval, (yytext+1), (yyleng-1));
  1394. zendlval->type = IS_STRING;
  1395. return T_VARIABLE;
  1396. }
  1397. <ST_VAR_OFFSET>"]" {
  1398. yy_pop_state(TSRMLS_C);
  1399. return ']';
  1400. }
  1401. <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
  1402. /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
  1403. return yytext[0];
  1404. }
  1405. <ST_VAR_OFFSET>[ \n\r\t\\'#] {
  1406. /* Invalid rule to return a more explicit parse error with proper line number */
  1407. yyless(0);
  1408. yy_pop_state(TSRMLS_C);
  1409. ZVAL_EMPTY_STRING(zendlval); /* Empty since it won't be used */
  1410. return T_ENCAPSED_AND_WHITESPACE;
  1411. }
  1412. <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
  1413. zend_copy_value(zendlval, yytext, yyleng);
  1414. zendlval->type = IS_STRING;
  1415. return T_STRING;
  1416. }
  1417. <ST_IN_SCRIPTING>"#"|"//" {
  1418. BEGIN(ST_ONE_LINE_COMMENT);
  1419. yymore();
  1420. }
  1421. <ST_ONE_LINE_COMMENT>"?"|"%"|">" {
  1422. yymore();
  1423. }
  1424. <ST_ONE_LINE_COMMENT>[^\n\r?%>\x00]*{ANY_CHAR} {
  1425. switch (yytext[yyleng-1]) {
  1426. case '?': case '%': case '>':
  1427. yyless(yyleng-1);
  1428. yymore();
  1429. break;
  1430. case '\n':
  1431. CG(zend_lineno)++;
  1432. /* intentional fall through */
  1433. default:
  1434. zendlval->value.str.val = yytext; /* no copying - intentional */
  1435. zendlval->value.str.len = yyleng;
  1436. zendlval->type = IS_STRING;
  1437. BEGIN(ST_IN_SCRIPTING);
  1438. return T_COMMENT;
  1439. }
  1440. }
  1441. <ST_ONE_LINE_COMMENT>{NEWLINE} {
  1442. zendlval->value.str.val = yytext; /* no copying - intentional */
  1443. zendlval->value.str.len = yyleng;
  1444. zendlval->type = IS_STRING;
  1445. BEGIN(ST_IN_SCRIPTING);
  1446. CG(zend_lineno)++;
  1447. return T_COMMENT;
  1448. }
  1449. <ST_ONE_LINE_COMMENT>"?>"|"%>" {
  1450. if (CG(asp_tags) || yytext[yyleng-2] != '%') { /* asp comment? */
  1451. zendlval->value.str.val = yytext; /* no copying - intentional */
  1452. zendlval->value.str.len = yyleng-2;
  1453. zendlval->type = IS_STRING;
  1454. yyless(yyleng - 2);
  1455. BEGIN(ST_IN_SCRIPTING);
  1456. return T_COMMENT;
  1457. } else {
  1458. yymore();
  1459. }
  1460. }
  1461. <ST_IN_SCRIPTING>"/**"{WHITESPACE} {
  1462. RESET_DOC_COMMENT();
  1463. BEGIN(ST_DOC_COMMENT);
  1464. yymore();
  1465. }
  1466. <ST_COMMENT,ST_DOC_COMMENT>{NULL} {
  1467. zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
  1468. return 0;
  1469. }
  1470. <ST_IN_SCRIPTING>"/*" {
  1471. BEGIN(ST_COMMENT);
  1472. yymore();
  1473. }
  1474. <ST_COMMENT,ST_DOC_COMMENT>[^*\x00]+ {
  1475. yymore();
  1476. }
  1477. <ST_DOC_COMMENT>"*/" {
  1478. CG(doc_comment) = estrndup(yytext, yyleng);
  1479. CG(doc_comment_len) = yyleng;
  1480. HANDLE_NEWLINES(yytext, yyleng);
  1481. BEGIN(ST_IN_SCRIPTING);
  1482. return T_DOC_COMMENT;
  1483. }
  1484. <ST_COMMENT>"*/" {
  1485. HANDLE_NEWLINES(yytext, yyleng);
  1486. BEGIN(ST_IN_SCRIPTING);
  1487. return T_COMMENT;
  1488. }
  1489. <ST_COMMENT,ST_DOC_COMMENT>"*" {
  1490. yymore();
  1491. }
  1492. <ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
  1493. zendlval->value.str.val = yytext; /* no copying - intentional */
  1494. zendlval->value.str.len = yyleng;
  1495. zendlval->type = IS_STRING;
  1496. BEGIN(INITIAL);
  1497. return T_CLOSE_TAG; /* implicit ';' at php-end tag */
  1498. }
  1499. <ST_IN_SCRIPTING>"%>"{NEWLINE}? {
  1500. if (CG(asp_tags)) {
  1501. BEGIN(INITIAL);
  1502. zendlval->value.str.len = yyleng;
  1503. zendlval->type = IS_STRING;
  1504. zendlval->value.str.val = yytext; /* no copying - intentional */
  1505. return T_CLOSE_TAG; /* implicit ';' at php-end tag */
  1506. } else {
  1507. yyless(1);
  1508. return yytext[0];
  1509. }
  1510. }
  1511. /* ("{"*|"$"*) handles { or $ at the end of a string (or the entire contents)
  1512. */
  1513. <ST_IN_SCRIPTING>(b?["]{DOUBLE_QUOTES_CHARS}*("{"*|"$"*)["]) {
  1514. int bprefix = (yytext[0] != '"') ? 1 : 0;
  1515. zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
  1516. return T_CONSTANT_ENCAPSED_STRING;
  1517. }
  1518. <ST_IN_SCRIPTING>(b?[']([^'\\\x00]|("\\"{ANY_CHAR}))*[']) {
  1519. register char *s, *t;
  1520. char *end;
  1521. int bprefix = (yytext[0] != '\'') ? 1 : 0;
  1522. zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
  1523. zendlval->value.str.len = yyleng-bprefix-2;
  1524. zendlval->type = IS_STRING;
  1525. /* convert escape sequences */
  1526. s = t = zendlval->value.str.val;
  1527. end = s+zendlval->value.str.len;
  1528. while (s<end) {
  1529. if (*s=='\\') {
  1530. s++;
  1531. switch(*s) {
  1532. case '\\':
  1533. case '\'':
  1534. *t++ = *s;
  1535. zendlval->value.str.len--;
  1536. break;
  1537. default:
  1538. *t++ = '\\';
  1539. *t++ = *s;
  1540. break;
  1541. }
  1542. } else {
  1543. *t++ = *s;
  1544. }
  1545. if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
  1546. CG(zend_lineno)++;
  1547. }
  1548. s++;
  1549. }
  1550. *t = 0;
  1551. #ifdef ZEND_MULTIBYTE
  1552. if (SCNG(output_filter)) {
  1553. size_t sz = 0;
  1554. s = zendlval->value.str.val;
  1555. SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
  1556. zendlval->value.str.len = sz;
  1557. efree(s);
  1558. }
  1559. #endif /* ZEND_MULTIBYTE */
  1560. return T_CONSTANT_ENCAPSED_STRING;
  1561. }
  1562. <ST_IN_SCRIPTING>b?["] {
  1563. BEGIN(ST_DOUBLE_QUOTES);
  1564. return '"';
  1565. }
  1566. <ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
  1567. char *s;
  1568. int bprefix = (yytext[0] != '<') ? 1 : 0;
  1569. CG(zend_lineno)++;
  1570. CG(heredoc_len) = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
  1571. s = yytext+bprefix+3;
  1572. while ((*s == ' ') || (*s == '\t')) {
  1573. s++;
  1574. CG(heredoc_len)--;
  1575. }
  1576. if (*s == '\'') {
  1577. s++;
  1578. CG(heredoc_len) -= 2;
  1579. BEGIN(ST_NOWDOC);
  1580. } else {
  1581. if (*s == '"') {
  1582. s++;
  1583. CG(heredoc_len) -= 2;
  1584. }
  1585. BEGIN(ST_HEREDOC);
  1586. }
  1587. CG(heredoc) = estrndup(s, CG(heredoc_len));
  1588. /* Check for ending label on the next line */
  1589. if (CG(heredoc_len) < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, CG(heredoc_len))) {
  1590. unsigned char *end = YYCURSOR + CG(heredoc_len);
  1591. if (*end == ';') {
  1592. end++;
  1593. }
  1594. if (*end == '\n' || *end == '\r') {
  1595. BEGIN(ST_END_HEREDOC);
  1596. }
  1597. }
  1598. return T_START_HEREDOC;
  1599. }
  1600. <ST_IN_SCRIPTING>[`] {
  1601. BEGIN(ST_BACKQUOTE);
  1602. return '`';
  1603. }
  1604. /* Match everything up to and including a possible ending label, so if the label
  1605. * doesn't match, it's kept with the rest of the string
  1606. *
  1607. * {HEREDOC_NEWLINE}+ handles the case of more than one newline sequence that
  1608. * couldn't be matched with HEREDOC_CHARS, because of the following label
  1609. */
  1610. <ST_HEREDOC>{HEREDOC_CHARS}*{HEREDOC_NEWLINE}+{LABEL}";"?[\n\r] {
  1611. char *end = yytext + yyleng - 1;
  1612. if (end[-1] == ';') {
  1613. end--;
  1614. yyleng--;
  1615. }
  1616. if (yyleng > CG(heredoc_len) && !memcmp(end - CG(heredoc_len), CG(heredoc), CG(heredoc_len))) {
  1617. int len = yyleng - CG(heredoc_len) - 2; /* 2 for newline before and after label */
  1618. /* May have matched fooLABEL; make sure there's a newline before it */
  1619. if (yytext[len] != '\n') {
  1620. if (yytext[len] != '\r') {
  1621. yyless(yyleng - 1);
  1622. yymore();
  1623. }
  1624. } else if (len > 0 && yytext[len - 1] == '\r') {
  1625. len--; /* Windows newline */
  1626. }
  1627. /* Go back before label, to match in ST_END_HEREDOC state. yytext will include
  1628. * newline before label, for zend_highlight/strip, tokenizer, etc. */
  1629. yyless(yyleng - CG(heredoc_len) - 1); /* 1 for newline after label */
  1630. CG(increment_lineno) = 1; /* For newline before label */
  1631. BEGIN(ST_END_HEREDOC);
  1632. zend_scan_escape_string(zendlval, yytext, len, 0 TSRMLS_CC);
  1633. return T_ENCAPSED_AND_WHITESPACE;
  1634. } else {
  1635. /* Go back to end of label, so the next match works correctly in case of
  1636. * a variable or another label at the beginning of the next line */
  1637. yyless(yyleng - 1);
  1638. yymore();
  1639. }
  1640. }
  1641. <ST_END_HEREDOC>{ANY_CHAR} {
  1642. YYCURSOR += CG(heredoc_len) - 1;
  1643. yyleng = CG(heredoc_len);
  1644. Z_STRVAL_P(zendlval) = CG(heredoc);
  1645. Z_STRLEN_P(zendlval) = CG(heredoc_len);
  1646. CG(heredoc) = NULL;
  1647. CG(heredoc_len) = 0;
  1648. BEGIN(ST_IN_SCRIPTING);
  1649. return T_END_HEREDOC;
  1650. }
  1651. /* Will only match when $ follows: "{$" */
  1652. <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{" {
  1653. zendlval->value.lval = (long) '{';
  1654. yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
  1655. return T_CURLY_OPEN;
  1656. }
  1657. <ST_DOUBLE_QUOTES>{DOUBLE_QUOTES_CHARS}+ {
  1658. zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
  1659. return T_ENCAPSED_AND_WHITESPACE;
  1660. }
  1661. /* "{"{2,}|"$"{2,} handles { before "{$" or literal $ before a variable or "${"
  1662. * (("{"+|"$"+)["]) handles { or $ at the end of a string
  1663. *
  1664. * Same for backquotes and heredocs, except the second case doesn't apply to
  1665. * heredocs. yyless(yyleng - 1) is used to correct taking one character too many
  1666. */
  1667. <ST_DOUBLE_QUOTES>{DOUBLE_QUOTES_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)["])) {
  1668. yyless(yyleng - 1);
  1669. zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
  1670. return T_ENCAPSED_AND_WHITESPACE;
  1671. }
  1672. <ST_BACKQUOTE>{BACKQUOTE_CHARS}+ {
  1673. zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
  1674. return T_ENCAPSED_AND_WHITESPACE;
  1675. }
  1676. <ST_BACKQUOTE>{BACKQUOTE_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)[`])) {
  1677. yyless(yyleng - 1);
  1678. zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
  1679. return T_ENCAPSED_AND_WHITESPACE;
  1680. }
  1681. /* ({HEREDOC_NEWLINE}+({LABEL}";"?)?)? handles the possible case of newline
  1682. * sequences, possibly followed by a label, that couldn't be matched with
  1683. * HEREDOC_CHARS because of a following variable or "{$"
  1684. *
  1685. * This doesn't affect real ending labels, as they are followed by a newline,
  1686. * which will result in a longer match for the correct rule if present
  1687. */
  1688. <ST_HEREDOC>{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)? {
  1689. zend_scan_escape_string(zendlval, yytext, yyleng, 0 TSRMLS_CC);
  1690. return T_ENCAPSED_AND_WHITESPACE;
  1691. }
  1692. <ST_HEREDOC>{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)?("{"{2,}|"$"{2,}) {
  1693. yyless(yyleng - 1);
  1694. zend_scan_escape_string(zendlval, yytext, yyleng, 0 TSRMLS_CC);
  1695. return T_ENCAPSED_AND_WHITESPACE;
  1696. }
  1697. <ST_NOWDOC>({NOWDOC_CHARS}+{NEWLINE}+|{NEWLINE}+){LABEL}";"?[\n\r] {
  1698. char *end = yytext + yyleng - 1;
  1699. if (end[-1] == ';') {
  1700. end--;
  1701. yyleng--;
  1702. }
  1703. if (yyleng > CG(heredoc_len) && !memcmp(end - CG(heredoc_len), CG(heredoc), CG(heredoc_len))) {
  1704. int len = yyleng - CG(heredoc_len) - 2; /* 2 for newline before and after label */
  1705. /* May have matched fooLABEL; make sure there's a newline before it */
  1706. if (yytext[len] != '\n') {
  1707. if (yytext[len] != '\r') {
  1708. yyless(yyleng - 1);
  1709. yymore();
  1710. }
  1711. } else if (len > 0 && yytext[len - 1] == '\r') {
  1712. len--; /* Windows newline */
  1713. }
  1714. /* Go back before label, to match in ST_END_HEREDOC state. yytext will include
  1715. * newline before label, for zend_highlight/strip, tokenizer, etc. */
  1716. yyless(yyleng - CG(heredoc_len) - 1); /* 1 for newline after label */
  1717. CG(increment_lineno) = 1; /* For newline before label */
  1718. BEGIN(ST_END_HEREDOC);
  1719. zend_copy_value(zendlval, yytext, len);
  1720. zendlval->type = IS_STRING;
  1721. HANDLE_NEWLINES(yytext, len);
  1722. return T_ENCAPSED_AND_WHITESPACE;
  1723. } else {
  1724. /* Go back to end of label, so the next match works correctly in case of
  1725. * another label at the beginning of the next line */
  1726. yyless(yyleng - 1);
  1727. yymore();
  1728. }
  1729. }
  1730. <ST_DOUBLE_QUOTES>["] {
  1731. BEGIN(ST_IN_SCRIPTING);
  1732. return '"';
  1733. }
  1734. <ST_BACKQUOTE>[`] {
  1735. BEGIN(ST_IN_SCRIPTING);
  1736. return '`';
  1737. }
  1738. <*>{NULL} { return 0; } /* EOF */
  1739. <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
  1740. zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
  1741. goto restart;
  1742. }
  1743. */
  1744. }