Browse Source

- added script encoding support to Zend Engine 2.

this enables ZE2 to gracefully parse scripts written in UTF-8 (with BOM),
  UTF-16, UTF-32, Shift_JIS, ISO-2022-JP etc... (when configured with
  '--enable-zend-multibyte' and '--enable-mbstring')
PEAR_1_4DEV
Masaki Fujimoto 23 years ago
parent
commit
ab9dcec292
  1. 13
      Zend/Zend.m4
  2. 11
      Zend/flex.skl
  3. 52
      Zend/zend_compile.c
  4. 32
      Zend/zend_globals.h
  5. 17
      Zend/zend_highlight.c
  6. 16
      Zend/zend_language_scanner.h
  7. 336
      Zend/zend_language_scanner.l
  8. 1133
      Zend/zend_multibyte.c
  9. 79
      Zend/zend_multibyte.h
  10. 20
      ext/mbstring/mbstring.c
  11. 7
      main/main.c

13
Zend/Zend.m4

@ -129,6 +129,13 @@ AC_ARG_ENABLE(memory-limit,
ZEND_MEMORY_LIMIT=no
])
AC_ARG_ENABLE(zend-multibyte,
[ --enable-zend-multibyte Compile with zend multibyte support. ], [
ZEND_MULTIBYTE=$enableval
],[
ZEND_MULTIBYTE=no
])
AC_MSG_CHECKING(whether to enable thread-safety)
AC_MSG_RESULT($ZEND_MAINTAINER_ZTS)
@ -140,6 +147,9 @@ AC_MSG_RESULT($ZEND_MEMORY_LIMIT)
AC_MSG_CHECKING(whether to enable Zend debugging)
AC_MSG_RESULT($ZEND_DEBUG)
AC_MSG_CHECKING(whether to enable Zend multibyte)
AC_MSG_RESULT($ZEND_MULTIBYTE)
if test "$ZEND_DEBUG" = "yes"; then
AC_DEFINE(ZEND_DEBUG,1,[ ])
@ -168,6 +178,9 @@ else
AC_DEFINE(MEMORY_LIMIT, 0, [Memory limit])
fi
if test "$ZEND_MULTIBYTE" = "yes"; then
AC_DEFINE(ZEND_MULTIBYTE, 1, [ ])
fi
changequote({,})
if test -n "$GCC" && test "$ZEND_INLINE_OPTIMIZATION" != "yes"; then

11
Zend/flex.skl

@ -440,12 +440,17 @@ YY_MALLOC_DECL
#define ECHO /* There is no output */
#endif
#define YY_INPUT(buf, result, max_size) \
#ifdef ZEND_MULTIBYTE
# define YY_INPUT(buf, result, max_size) \
if ( ((result = zend_multibyte_yyinput(yyin, buf, max_size TSRMLS_CC)) == 0) \
&& zend_stream_ferror( yyin TSRMLS_CC) ) \
YY_FATAL_ERROR( "input in flex scanner failed" );
#else
# define YY_INPUT(buf, result, max_size) \
if ( ((result = zend_stream_read(yyin, buf, max_size TSRMLS_CC)) == 0) \
&& zend_stream_ferror( yyin TSRMLS_CC) ) \
YY_FATAL_ERROR( "input in flex scanner failed" );
#endif
#ifndef ECHO
%- Standard (non-C++) definition

52
Zend/zend_compile.c

@ -27,6 +27,10 @@
#include "zend_API.h"
#include "zend_fast_cache.h"
#ifdef ZEND_MULTIBYTE
#include "zend_multibyte.h"
#endif /* ZEND_MULTIBYTE */
ZEND_API zend_op_array *(*zend_compile_file)(zend_file_handle *file_handle, int type TSRMLS_DC);
@ -51,7 +55,14 @@ static void build_runtime_defined_function_key(zval *result, char *name, int nam
/* NULL, name length, filename length, line number length */
result->value.str.len = 1+name_length+strlen(filename)+lineno_len;
result->value.str.val = (char *) emalloc(result->value.str.len+1);
#ifdef ZEND_MULTIBYTE
/* must be binary safe */
result->value.str.val[0] = '\0';
memcpy(result->value.str.val+1, name, name_length);
sprintf(result->value.str.val+1+name_length, "%s%s", filename, lineno_buf);
#else
sprintf(result->value.str.val, "%c%s%s%s", '\0', name, filename, lineno_buf);
#endif /* ZEND_MULTIBYTE */
result->type = IS_STRING;
result->refcount = 1;
}
@ -90,6 +101,15 @@ void zend_init_compiler_data_structures(TSRMLS_D)
init_compiler_declarables(TSRMLS_C);
CG(throw_list) = NULL;
zend_hash_apply(CG(auto_globals), (apply_func_t) zend_auto_global_arm TSRMLS_CC);
#ifdef ZEND_MULTIBYTE
CG(script_encoding_list) = NULL;
CG(script_encoding_list_size) = 0;
CG(internal_encoding) = NULL;
CG(encoding_detector) = NULL;
CG(encoding_converter) = NULL;
CG(encoding_oddlen) = NULL;
#endif /* ZEND_MULTIBYTE */
}
@ -114,6 +134,12 @@ void shutdown_compiler(TSRMLS_D)
zend_stack_destroy(&CG(list_stack));
zend_hash_destroy(&CG(filenames_table));
zend_llist_destroy(&CG(open_files));
#ifdef ZEND_MULTIBYTE
if (CG(script_encoding_list)) {
efree(CG(script_encoding_list));
}
#endif /* ZEND_MULTIBYTE */
}
@ -3064,6 +3090,32 @@ void zend_do_declare_stmt(znode *var, znode *val TSRMLS_DC)
if (!zend_binary_strcasecmp(var->u.constant.value.str.val, var->u.constant.value.str.len, "ticks", sizeof("ticks")-1)) {
convert_to_long(&val->u.constant);
CG(declarables).ticks = val->u.constant;
#ifdef ZEND_MULTIBYTE
} else if (!zend_binary_strcasecmp(var->u.constant.value.str.val, var->u.constant.value.str.len, "encoding", sizeof("encoding")-1)) {
zend_encoding *new_encoding, *old_encoding;
zend_encoding_filter old_input_filter;
if (val->u.constant.type == IS_CONSTANT) {
zend_error(E_COMPILE_ERROR, "Cannot use constants as encoding");
}
convert_to_string(&val->u.constant);
new_encoding = zend_multibyte_fetch_encoding(val->u.constant.value.str.val);
if (!new_encoding) {
zend_error(E_COMPILE_WARNING, "Unsupported encoding [%s]", val->u.constant.value.str.val);
} else {
old_input_filter = LANG_SCNG(input_filter);
old_encoding = LANG_SCNG(script_encoding);
zend_multibyte_set_filter(new_encoding TSRMLS_CC);
/* need to re-scan if input filter changed */
if (old_input_filter != LANG_SCNG(input_filter) ||
((old_input_filter == zend_multibyte_script_encoding_filter) &&
(new_encoding != old_encoding))) {
zend_multibyte_yyinput_again(old_input_filter, old_encoding TSRMLS_CC);
}
}
efree(val->u.constant.value.str.val);
#endif /* ZEND_MULTIBYTE */
}
zval_dtor(&var->u.constant);
}

32
Zend/zend_globals.h

@ -35,6 +35,10 @@
#include "zend_objects.h"
#include "zend_objects_API.h"
#ifdef ZEND_MULTIBYTE
#include "zend_multibyte.h"
#endif /* ZEND_MULTIBYTE */
/* Define ZTS if you want a thread-safe Zend */
/*#undef ZTS*/
@ -127,6 +131,18 @@ struct _zend_compiler_globals {
char *doc_comment;
zend_uint doc_comment_len;
#ifdef ZEND_MULTIBYTE
zend_encoding **script_encoding_list;
int script_encoding_list_size;
zend_encoding *internal_encoding;
/* multibyte utility functions */
zend_encoding_detector encoding_detector;
zend_encoding_converter encoding_converter;
zend_encoding_oddlen encoding_oddlen;
#endif /* ZEND_MULTIBYTE */
};
@ -271,6 +287,22 @@ struct _zend_scanner_globals {
int yy_start_stack_ptr;
int yy_start_stack_depth;
int *yy_start_stack;
#ifdef ZEND_MULTIBYTE
/* original (unfiltered) script */
char *script_org;
int script_org_size;
/* filtered script */
char *script_filtered;
int script_filtered_size;
/* input/ouput filters */
zend_encoding_filter input_filter;
zend_encoding_filter output_filter;
zend_encoding *script_encoding;
zend_encoding *internal_encoding;
#endif /* ZEND_MULTIBYTE */
};
#endif /* ZEND_GLOBALS_H */

17
Zend/zend_highlight.c

@ -57,6 +57,17 @@ ZEND_API void zend_html_putc(char c)
ZEND_API void zend_html_puts(const char *s, uint len TSRMLS_DC)
{
const char *ptr=s, *end=s+len;
#ifdef ZEND_MULTIBYTE
char *filtered;
int filtered_len;
if (LANG_SCNG(output_filter)) {
LANG_SCNG(output_filter)(&filtered, &filtered_len, s, len TSRMLS_CC);
ptr = filtered;
end = filtered + filtered_len;
}
#endif /* ZEND_MULTIBYTE */
while (ptr<end) {
if (*ptr==' ') {
@ -75,6 +86,12 @@ ZEND_API void zend_html_puts(const char *s, uint len TSRMLS_DC)
zend_html_putc(*ptr++);
}
}
#ifdef ZEND_MULTIBYTE
if (LANG_SCNG(output_filter)) {
efree(filtered);
}
#endif /* ZEND_MULTIBYTE */
}

16
Zend/zend_language_scanner.h

@ -28,6 +28,22 @@ typedef struct _zend_lex_state {
zend_file_handle *in;
uint lineno;
char *filename;
#ifdef ZEND_MULTIBYTE
/* original (unfiltered) script */
char *script_org;
int script_org_size;
/* filtered script */
char *script_filtered;
int script_filtered_size;
/* input/ouput filters */
zend_encoding_filter input_filter;
zend_encoding_filter output_filter;
zend_encoding *script_encoding;
zend_encoding *internal_encoding;
#endif /* ZEND_MULTIBYTE */
} zend_lex_state;

336
Zend/zend_language_scanner.l

@ -127,6 +127,12 @@ void startup_scanner(TSRMLS_D)
RESET_DOC_COMMENT();
SCNG(yy_start_stack_ptr) = 0;
SCNG(yy_start_stack_depth) = 0;
#ifdef ZEND_MULTIBYTE
SCNG(script_org) = NULL;
SCNG(script_org_size) = 0;
SCNG(script_filtered) = NULL;
SCNG(script_filtered_size) = 0;
#endif /* ZEND_MULTIBYTE */
}
@ -137,6 +143,17 @@ void shutdown_scanner(TSRMLS_D)
CG(heredoc_len)=0;
}
RESET_DOC_COMMENT();
#ifdef ZEND_MULTIBYTE
if (SCNG(script_org)) {
efree(SCNG(script_org));
SCNG(script_org) = NULL;
}
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
#endif /* ZEND_MULTIBYTE */
}
END_EXTERN_C()
@ -148,6 +165,17 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
lex_state->state = YYSTATE;
lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
lex_state->lineno = CG(zend_lineno);
#ifdef ZEND_MULTIBYTE
lex_state->script_org = SCNG(script_org);
lex_state->script_org_size = SCNG(script_org_size);
lex_state->script_filtered = SCNG(script_filtered);
lex_state->script_filtered_size = SCNG(script_filtered_size);
lex_state->input_filter = SCNG(input_filter);
lex_state->output_filter = SCNG(output_filter);
lex_state->script_encoding = SCNG(script_encoding);
lex_state->internal_encoding = SCNG(internal_encoding);
#endif /* ZEND_MULTIBYTE */
}
@ -166,6 +194,17 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
BEGIN(lex_state->state);
CG(zend_lineno) = lex_state->lineno;
zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
#ifdef ZEND_MULTIBYTE
SCNG(script_org) = lex_state->script_org;
SCNG(script_org_size) = lex_state->script_org_size;
SCNG(script_filtered) = lex_state->script_filtered;
SCNG(script_filtered_size) = lex_state->script_filtered_size;
SCNG(input_filter) = lex_state->input_filter;
SCNG(output_filter) = lex_state->output_filter;
SCNG(script_encoding) = lex_state->script_encoding;
SCNG(internal_encoding) = lex_state->internal_encoding;
#endif /* ZEND_MULTIBYTE */
}
@ -235,7 +274,40 @@ ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
/* Reset the scanner for scanning the new file */
SCNG(yy_in) = file_handle;
#ifdef ZEND_MULTIBYTE
if (file_handle->handle.stream.interactive == 0) {
if (zend_multibyte_read_script(TSRMLS_C) != 0) {
return FAILURE;
}
/* force flex to use buffer only */
SCNG(yy_in) = NULL;
SCNG(init) = 0;
SCNG(start) = 1;
zend_multibyte_set_filter(NULL TSRMLS_CC);
if (!SCNG(input_filter)) {
SCNG(script_filtered) = (char*)emalloc(SCNG(script_org_size)+1);
memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
SCNG(script_filtered_size) = SCNG(script_org_size);
} else {
SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
}
/* flex requires doubled null */
SCNG(script_filtered) = (char*)erealloc(SCNG(script_filtered), SCNG(script_filtered_size)+2);
*(SCNG(script_filtered)+SCNG(script_filtered_size)) = (char)NULL;
*(SCNG(script_filtered)+SCNG(script_filtered_size)+1) = (char)NULL;
yy_scan_buffer(SCNG(script_filtered), SCNG(script_filtered_size)+2 TSRMLS_CC);
} else {
yy_switch_to_buffer(yy_create_buffer(SCNG(yy_in), YY_BUF_SIZE TSRMLS_CC) TSRMLS_CC);
}
#else /* !ZEND_MULTIBYTE */
yy_switch_to_buffer(yy_create_buffer(SCNG(yy_in), YY_BUF_SIZE TSRMLS_CC) TSRMLS_CC);
#endif /* ZEND_MULTIBYTE */
BEGIN(INITIAL);
if (file_handle->opened_path) {
@ -300,6 +372,17 @@ ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSR
retval = NULL;
}
compilation_successful=1;
#ifdef ZEND_MULTIBYTE
if (SCNG(script_org)) {
efree(SCNG(script_org));
SCNG(script_org) = NULL;
}
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
#endif /* ZEND_MULTIBYTE */
}
if (retval) {
@ -367,7 +450,29 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_D
str->value.str.val[str->value.str.len+1]=0;
SCNG(yy_in)=NULL;
#ifdef ZEND_MULTIBYTE
SCNG(script_org) = estrdup(str->value.str.val);
SCNG(script_org_size) = str->value.str.len;
zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC);
if (!SCNG(input_filter)) {
SCNG(script_filtered) = (char*)emalloc(SCNG(script_org_size)+1);
memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
SCNG(script_filtered_size) = SCNG(script_org_size);
} else {
SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
}
/* flex requires doubled null */
SCNG(script_filtered) = (char*)erealloc(SCNG(script_filtered), SCNG(script_filtered_size)+2);
*(SCNG(script_filtered)+SCNG(script_filtered_size)) = (char)NULL;
*(SCNG(script_filtered)+SCNG(script_filtered_size)+1) = (char)NULL;
yy_scan_buffer(SCNG(script_filtered), SCNG(script_filtered_size)+2 TSRMLS_CC);
#else /* !ZEND_MULTIBYTE */
yy_scan_buffer(str->value.str.val, str->value.str.len+2 TSRMLS_CC);
#endif /* ZEND_MULTIBYTE */
zend_set_compiled_filename(filename TSRMLS_CC);
CG(zend_lineno) = 1;
@ -408,6 +513,17 @@ zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
BEGIN(ST_IN_SCRIPTING);
compiler_result = zendparse(TSRMLS_C);
#ifdef ZEND_MULTIBYTE
if (SCNG(script_org)) {
efree(SCNG(script_org));
SCNG(script_org) = NULL;
}
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
#endif /* ZEND_MULTIBYTE */
if (compiler_result==1) {
CG(active_op_array) = original_active_op_array;
CG(unclean_shutdown)=1;
@ -442,6 +558,16 @@ int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlight
return FAILURE;
}
zend_highlight(syntax_highlighter_ini TSRMLS_CC);
#ifdef ZEND_MULTIBYTE
if (SCNG(script_org)) {
efree(SCNG(script_org));
SCNG(script_org) = NULL;
}
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
#endif /* ZEND_MULTIBYTE */
zend_destroy_file_handle(&file_handle TSRMLS_CC);
zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
return SUCCESS;
@ -459,12 +585,166 @@ int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_
return FAILURE;
}
zend_highlight(syntax_highlighter_ini TSRMLS_CC);
#ifdef ZEND_MULTIBYTE
if (SCNG(script_org)) {
efree(SCNG(script_org));
SCNG(script_org) = NULL;
}
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
#endif /* ZEND_MULTIBYTE */
zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
zval_dtor(str);
return SUCCESS;
}
END_EXTERN_C()
#ifdef ZEND_MULTIBYTE
BEGIN_EXTERN_C()
ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC)
{
YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
int offset, original_offset, length, free_flag;
char *p;
zend_encoding *new_encoding;
/* calculate current position */
offset = original_offset = yy_c_buf_p - b->yy_ch_buf;
if (old_input_filter && original_offset > 0) {
new_encoding = SCNG(script_encoding);
SCNG(script_encoding) = old_encoding;
do {
(old_input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC);
if (!p) {
SCNG(script_encoding) = new_encoding;
return;
}
efree(p);
if (length > original_offset) {
offset--;
} else if (length < original_offset) {
offset++;
}
} while (original_offset != length);
SCNG(script_encoding) = new_encoding;
}
/* convert and set */
if (!SCNG(input_filter)) {
length = SCNG(script_org_size)-offset-1;
p = SCNG(script_org)+offset+1;
free_flag = 0;
} else {
SCNG(input_filter)(&p, &length, SCNG(script_org)+offset+1, SCNG(script_org_size)-offset-1 TSRMLS_CC);
free_flag = 1;
}
if (original_offset+length+1 > (int)b->yy_buf_size) {
b->yy_buf_size = original_offset+length+1;
b->yy_ch_buf = (char*)erealloc(b->yy_ch_buf, b->yy_buf_size+2);
SCNG(script_filtered) = b->yy_ch_buf;
SCNG(script_filtered_size) = b->yy_buf_size;
}
yy_c_buf_p = b->yy_ch_buf + original_offset;
strncpy(yy_c_buf_p+1, p, length);
b->yy_n_chars = original_offset + length + 1;
SCNG(yy_n_chars) = b->yy_n_chars;
b->yy_ch_buf[SCNG(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
b->yy_ch_buf[SCNG(yy_n_chars)+1] = YY_END_OF_BUFFER_CHAR;
if (free_flag) {
efree(p);
}
}
ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC)
{
int c = '*', n;
if (file_handle->handle.stream.interactive == 0) {
return zend_stream_read(file_handle, buf, len TSRMLS_CC);
}
/* interactive */
if (SCNG(script_org)) {
efree(SCNG(script_org));
}
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
}
SCNG(script_org) = NULL;
SCNG(script_org_size) = 0;
/* TODO: support widechars */
for (n = 0; n < sizeof(buf) && (c = zend_stream_getc(yyin TSRMLS_CC)) != EOF && c != '\n'; ++n) {
buf[n] = (char)c;
}
if (c == '\n') {
buf[n++] = (char) c;
}
SCNG(script_org_size) = n;
SCNG(script_org) = (char*)emalloc(SCNG(script_org_size)+1);
memcpy(SCNG(script_org)+SCNG(script_org_size)-n, buf, n);
return n;
}
ZEND_API int zend_multibyte_read_script(TSRMLS_D)
{
char buf[8192];
int n;
if (SCNG(script_org)) {
efree(SCNG(script_org));
}
SCNG(script_org) = NULL;
SCNG(script_org_size) = 0;
for (;;) {
n = zend_stream_read(yyin, buf, sizeof(buf) TSRMLS_CC);
if (n <= 0) {
break;
}
SCNG(script_org_size) += n;
if (SCNG(script_org)) {
SCNG(script_org) = (char*)erealloc(SCNG(script_org), SCNG(script_org_size)+1);
} else {
SCNG(script_org) = (char*)emalloc(SCNG(script_org_size)+1);
}
memcpy(SCNG(script_org)+SCNG(script_org_size)-n, buf, n);
}
if (n < 0) {
return -1;
}
if (!SCNG(script_org)) {
SCNG(script_org) = emalloc(SCNG(script_org_size)+1);
}
*(SCNG(script_org)+SCNG(script_org_size)) = (char)NULL;
return 0;
}
# define zend_copy_value(zendlval, yytext, yyleng) \
if (SCNG(output_filter)) { \
SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), yytext, yyleng TSRMLS_CC); \
} else { \
zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \
zendlval->value.str.len = yyleng; \
}
#else /* ZEND_MULTIBYTE */
# define zend_copy_value(zendlval, yytext, yyleng) \
zendlval->value.str.val = (char *)estrndup(yytext, yyleng); \
zendlval->value.str.len = yyleng;
#endif /* ZEND_MULTIBYTE */
%}
LNUM [0-9]+
@ -631,8 +911,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
<ST_LOOKING_FOR_PROPERTY>{LABEL} {
yy_pop_state(TSRMLS_C);
zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
zend_copy_value(zendlval, yytext, yyleng);
zendlval->type = IS_STRING;
return T_STRING;
}
@ -889,8 +1168,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
<ST_LOOKING_FOR_VARNAME>{LABEL} {
zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
zend_copy_value(zendlval, yytext, yyleng);
zendlval->type = IS_STRING;
yy_pop_state(TSRMLS_C);
yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
@ -1025,8 +1303,21 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
<INITIAL>(([^<]|"<"[^?%s<]){1,400})|"<s"|"<" {
#ifdef ZEND_MULTIBYTE
if (SCNG(output_filter)) {
int readsize;
readsize = SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), yytext, yyleng TSRMLS_CC);
if (readsize < yyleng) {
yyless(readsize);
}
} else {
zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
}
#else /* !ZEND_MULTIBYTE */
zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
#endif /* ZEND_MULTIBYTE */
zendlval->type = IS_STRING;
HANDLE_NEWLINES(yytext, yyleng);
return T_INLINE_HTML;
@ -1101,22 +1392,19 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL} {
zendlval->value.str.val = (char *)estrndup(yytext+1, yyleng-1);
zendlval->value.str.len = yyleng-1;
zend_copy_value(zendlval, (yytext+1), (yyleng-1));
zendlval->type = IS_STRING;
return T_VARIABLE;
}
<ST_IN_SCRIPTING>{LABEL} {
zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
zend_copy_value(zendlval, yytext, yyleng);
zendlval->type = IS_STRING;
return T_STRING;
}
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{LABEL} {
zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
zend_copy_value(zendlval, yytext, yyleng);
zendlval->type = IS_STRING;
return T_STRING;
}
@ -1302,6 +1590,14 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
*t = 0;
#ifdef ZEND_MULTIBYTE
if (SCNG(output_filter)) {
s = zendlval->value.str.val;
SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), s, zendlval->value.str.len TSRMLS_CC);
efree(s);
}
#endif /* ZEND_MULTIBYTE */
return T_CONSTANT_ENCAPSED_STRING;
}
@ -1342,6 +1638,14 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
*t = 0;
#ifdef ZEND_MULTIBYTE
if (SCNG(output_filter)) {
s = zendlval->value.str.val;
SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), s, zendlval->value.str.len TSRMLS_CC);
efree(s);
}
#endif /* ZEND_MULTIBYTE */
return T_CONSTANT_ENCAPSED_STRING;
}
@ -1409,8 +1713,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
BEGIN(ST_IN_SCRIPTING);
return T_END_HEREDOC;
} else {
zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
zend_copy_value(zendlval, yytext, yyleng);
zendlval->type = IS_STRING;
return T_STRING;
}
@ -1427,24 +1730,21 @@ NEWLINE ("\r"|"\n"|"\r\n")
<ST_SINGLE_QUOTE>([^'\\]|\\[^'\\])+ {
HANDLE_NEWLINES(yytext, yyleng);
zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
zend_copy_value(zendlval, yytext, yyleng);
zendlval->type = IS_STRING;
return T_ENCAPSED_AND_WHITESPACE;
}
<ST_DOUBLE_QUOTES>[`]+ {
zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
zend_copy_value(zendlval, yytext, yyleng);
zendlval->type = IS_STRING;
return T_ENCAPSED_AND_WHITESPACE;
}
<ST_BACKQUOTE>["]+ {
zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
zend_copy_value(zendlval, yytext, yyleng);
zendlval->type = IS_STRING;
return T_ENCAPSED_AND_WHITESPACE;
}

1133
Zend/zend_multibyte.c
File diff suppressed because it is too large
View File

79
Zend/zend_multibyte.h

@ -0,0 +1,79 @@
/*
+----------------------------------------------------------------------+
| Zend Engine |
+----------------------------------------------------------------------+
| Copyright (c) 1998-2003 Zend Technologies Ltd. (http://www.zend.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 2.00 of the Zend license, |
| that is bundled with this package in the file LICENSE, and is |
| available at through the world-wide-web at |
| http://www.zend.com/license/2_00.txt. |
| If you did not receive a copy of the Zend license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@zend.com so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Masaki Fujimoto <fujimoto@php.net> |
| Rui Hirokawa <hirokawa@php.net> |
+----------------------------------------------------------------------+
*/
/* $Id$ */
#ifndef ZEND_MULTIBYTE_H
#define ZEND_MULTIBYTE_H
#ifdef ZEND_MULTIBYTE
#define BOM_UTF32_BE "\x00\x00\xfe\xff"
#define BOM_UTF32_LE "\xff\xfe\x00\x00"
#define BOM_UTF16_BE "\xfe\xff"
#define BOM_UTF16_LE "\xff\xfe"
#define BOM_UTF8 "\xef\xbb\xbf"
typedef int (*zend_encoding_filter)(char **str, int *str_length, const char *buf, int length TSRMLS_DC);
typedef char* (*zend_encoding_detector)(const char *string, int length, char *list TSRMLS_DC);
typedef int (*zend_encoding_converter)(char **to, int *to_length, const char *from, int from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC);
typedef int (*zend_encoding_oddlen)(const char *string, int length, const char *encoding TSRMLS_DC);
typedef struct _zend_encoding {
zend_encoding_filter input_filter; /* escape input filter */
zend_encoding_filter output_filter; /* escape output filter */
const char *name; /* encoding name */
const char *(*aliases)[]; /* encoding name aliases */
int compatible; /* flex compatible or not */
} zend_encoding;
/*
* zend multibyte APIs
*/
BEGIN_EXTERN_C()
ZEND_API int zend_multibyte_set_script_encoding(char *encoding_list, int encoding_list_size TSRMLS_DC);
ZEND_API int zend_multibyte_set_internal_encoding(char *encoding_name, int encoding_name_size TSRMLS_DC);
ZEND_API int zend_multibyte_set_functions(zend_encoding_detector encoding_detector, zend_encoding_converter encoding_converter, zend_encoding_oddlen encoding_oddlen TSRMLS_DC);
ZEND_API int zend_multibyte_set_filter(zend_encoding *onetime_encoding TSRMLS_DC);
ZEND_API zend_encoding* zend_multibyte_fetch_encoding(char *encoding_name);
ZEND_API int zend_multibyte_script_encoding_filter(char **to, int *to_length, const char *from, int from_length TSRMLS_DC);
ZEND_API int zend_multibyte_internal_encoding_filter(char **to, int *to_length, const char *from, int from_length TSRMLS_DC);
/* in zend_language_scanner.l */
ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC);
ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC);
ZEND_API int zend_multibyte_read_script(TSRMLS_D);
END_EXTERN_C()
#endif /* ZEND_MULTIBYTE */
#endif /* ZEND_MULTIBYTE_H */
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: sw=4 ts=4 tw=78
* vim<600: sw=4 ts=4 tw=78
*/

20
ext/mbstring/mbstring.c

@ -848,6 +848,9 @@ PHP_RINIT_FUNCTION(mbstring)
#if HAVE_MBREGEX
PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
#ifdef ZEND_MULTIBYTE
php_mb_set_zend_encoding(TSRMLS_C);
#endif /* ZEND_MULTIBYTE */
return SUCCESS;
}
@ -982,7 +985,10 @@ PHP_FUNCTION(mb_internal_encoding)
} else {
MBSTRG(current_internal_encoding) = no_encoding;
#ifdef ZEND_MULTIBYTE
zend_multibyte_set_internal_encoding(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1) TSRMLS_CC);
/* TODO: make independent from mbstring.encoding_translation? */
if (MBSTRG(encoding_translation)) {
zend_multibyte_set_internal_encoding(name, name_len TSRMLS_CC);
}
#endif /* ZEND_MULTIBYTE */
RETURN_TRUE;
}
@ -3366,7 +3372,7 @@ MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
int n, *entry, list_size = 0;
zend_encoding_detector encoding_detector;
zend_encoding_converter encoding_converter;
zend_multibyte_oddlen multibyte_oddlen;
zend_encoding_oddlen encoding_oddlen;
/* notify script encoding to Zend Engine */
entry = MBSTRG(script_encoding_list);
@ -3392,19 +3398,17 @@ MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
efree(list);
}
encoding_detector = php_mb_encoding_detector;
encoding_converter = NULL;
multibyte_oddlen = php_mb_oddlen;
encoding_converter = php_mb_encoding_converter;
encoding_oddlen = php_mb_oddlen;
/* TODO: make independent from mbstring.encoding_translation? */
if (MBSTRG(encoding_translation)) {
/* notify internal encoding to Zend Engine */
name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
zend_multibyte_set_internal_encoding(name, strlen(name) TSRMLS_CC);
encoding_converter = php_mb_encoding_converter;
}
zend_multibyte_set_functions(encoding_detector, encoding_converter,
multibyte_oddlen TSRMLS_CC);
zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
return 0;
}

7
main/main.c

@ -90,10 +90,6 @@
#include "php_logos.h"
#include "php_streams.h"
#if defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING)
#include "ext/mbstring/mbstring.h"
#endif /* defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING) */
#include "SAPI.h"
#include "rfc1867.h"
/* }}} */
@ -1564,9 +1560,6 @@ PHPAPI int php_execute_script(zend_file_handle *primary_file TSRMLS_DC)
} else {
append_file_p = NULL;
}
#if defined(ZEND_MULTIBYTE) && defined(HAVE_MBSTRING)
php_mb_set_zend_encoding(TSRMLS_C);
#endif /* ZEND_MULTIBYTE && HAVE_MBSTRING */
#ifdef PHP_WIN32
zend_unset_timeout(TSRMLS_C);
#endif

Loading…
Cancel
Save