|
|
|
@ -137,21 +137,38 @@ static const struct { |
|
|
|
{ 0, NULL, 0, 0 } |
|
|
|
}; |
|
|
|
|
|
|
|
#define MB_RETURN { \ |
|
|
|
*newpos = pos; \ |
|
|
|
mbseq[mbpos] = '\0'; \ |
|
|
|
*mbseqlen = mbpos; \ |
|
|
|
return this_char; } |
|
|
|
|
|
|
|
#define MB_WRITE(mbchar) { \ |
|
|
|
mbspace--; \ |
|
|
|
if (mbspace == 0) { \ |
|
|
|
MB_RETURN; \ |
|
|
|
} \ |
|
|
|
mbseq[mbpos++] = (mbchar); } |
|
|
|
|
|
|
|
/* {{{ get_next_char |
|
|
|
*/ |
|
|
|
inline static unsigned short get_next_char(enum entity_charset charset, |
|
|
|
unsigned char *str, |
|
|
|
int *newpos, |
|
|
|
unsigned char *mbseq, |
|
|
|
int *mbseqlen |
|
|
|
) |
|
|
|
unsigned char * str, |
|
|
|
int * newpos, |
|
|
|
unsigned char * mbseq, |
|
|
|
int * mbseqlen) |
|
|
|
{ |
|
|
|
int pos = *newpos; |
|
|
|
int mbpos = 0; |
|
|
|
int mbspace = *mbseqlen; |
|
|
|
unsigned short this_char = str[pos++]; |
|
|
|
|
|
|
|
mbseq[mbpos++] = (unsigned char)this_char; |
|
|
|
if (mbspace <= 0) { |
|
|
|
*mbseqlen = 0; |
|
|
|
return this_char; |
|
|
|
} |
|
|
|
|
|
|
|
MB_WRITE((unsigned char)this_char); |
|
|
|
|
|
|
|
switch(charset) { |
|
|
|
case cs_utf_8: |
|
|
|
@ -232,7 +249,7 @@ inline static unsigned short get_next_char(enum entity_charset charset, |
|
|
|
if (more) |
|
|
|
{ |
|
|
|
this_char = str[pos++]; |
|
|
|
mbseq[mbpos++] = (unsigned char)this_char; |
|
|
|
MB_WRITE((unsigned char)this_char); |
|
|
|
} |
|
|
|
} while(more); |
|
|
|
} |
|
|
|
@ -250,7 +267,7 @@ inline static unsigned short get_next_char(enum entity_charset charset, |
|
|
|
{ |
|
|
|
/* yes, this a wide char */ |
|
|
|
this_char <<= 8; |
|
|
|
mbseq[mbpos++] = next_char; |
|
|
|
MB_WRITE(next_char); |
|
|
|
this_char |= next_char; |
|
|
|
pos++; |
|
|
|
} |
|
|
|
@ -271,7 +288,7 @@ inline static unsigned short get_next_char(enum entity_charset charset, |
|
|
|
{ |
|
|
|
/* yes, this a wide char */ |
|
|
|
this_char <<= 8; |
|
|
|
mbseq[mbpos++] = next_char; |
|
|
|
MB_WRITE(next_char); |
|
|
|
this_char |= next_char; |
|
|
|
pos++; |
|
|
|
} |
|
|
|
@ -289,7 +306,7 @@ inline static unsigned short get_next_char(enum entity_charset charset, |
|
|
|
{ |
|
|
|
/* yes, this a jis kanji char */ |
|
|
|
this_char <<= 8; |
|
|
|
mbseq[mbpos++] = next_char; |
|
|
|
MB_WRITE(next_char); |
|
|
|
this_char |= next_char; |
|
|
|
pos++; |
|
|
|
} |
|
|
|
@ -301,7 +318,7 @@ inline static unsigned short get_next_char(enum entity_charset charset, |
|
|
|
{ |
|
|
|
/* JIS X 0201 kana */ |
|
|
|
this_char <<= 8; |
|
|
|
mbseq[mbpos++] = next_char; |
|
|
|
MB_WRITE(next_char); |
|
|
|
this_char |= next_char; |
|
|
|
pos++; |
|
|
|
} |
|
|
|
@ -315,12 +332,13 @@ inline static unsigned short get_next_char(enum entity_charset charset, |
|
|
|
{ |
|
|
|
/* JIS X 0212 hojo-kanji */ |
|
|
|
this_char <<= 8; |
|
|
|
mbseq[mbpos++] = next_char; |
|
|
|
MB_WRITE(next_char); |
|
|
|
this_char |= next_char; |
|
|
|
pos++; |
|
|
|
this_char <<= 8; |
|
|
|
mbseq[mbpos++] = next2_char; |
|
|
|
MB_WRITE(next2_char); |
|
|
|
this_char |= next2_char; |
|
|
|
pos+=2; |
|
|
|
pos++; |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
@ -331,10 +349,7 @@ inline static unsigned short get_next_char(enum entity_charset charset, |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
*newpos = pos; |
|
|
|
mbseq[mbpos] = '\0'; |
|
|
|
*mbseqlen = mbpos; |
|
|
|
return this_char; |
|
|
|
MB_RETURN; |
|
|
|
} |
|
|
|
/* }}} */ |
|
|
|
|
|
|
|
@ -489,8 +504,8 @@ PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newle |
|
|
|
|
|
|
|
i = 0; |
|
|
|
while (i < oldlen) { |
|
|
|
int mbseqlen; |
|
|
|
unsigned char mbsequence[16]; /* allow up to 15 characters in a multibyte sequence */ |
|
|
|
int mbseqlen = sizeof(mbsequence); |
|
|
|
unsigned short this_char = get_next_char(charset, old, &i, mbsequence, &mbseqlen); |
|
|
|
|
|
|
|
matches_map = 0; |
|
|
|
|