|
|
|
@ -64,6 +64,8 @@ |
|
|
|
|
|
|
|
/* For str_getcsv() support */ |
|
|
|
#include "ext/standard/file.h" |
|
|
|
/* For php_next_utf8_char() */ |
|
|
|
#include "ext/standard/html.h" |
|
|
|
|
|
|
|
#define STR_PAD_LEFT 0 |
|
|
|
#define STR_PAD_RIGHT 1 |
|
|
|
@ -5653,6 +5655,98 @@ PHP_FUNCTION(substr_compare) |
|
|
|
} |
|
|
|
/* }}} */ |
|
|
|
|
|
|
|
/* {{{ */ |
|
|
|
static zend_string *php_utf8_encode(const char *s, size_t len) |
|
|
|
{ |
|
|
|
size_t pos = len; |
|
|
|
zend_string *str; |
|
|
|
unsigned char c; |
|
|
|
|
|
|
|
str = zend_string_safe_alloc(len, 2, 0, 0); |
|
|
|
ZSTR_LEN(str) = 0; |
|
|
|
while (pos > 0) { |
|
|
|
/* The lower 256 codepoints of Unicode are identical to Latin-1, |
|
|
|
* so we don't need to do any mapping here. */ |
|
|
|
c = (unsigned char)(*s); |
|
|
|
if (c < 0x80) { |
|
|
|
ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c; |
|
|
|
/* We only account for the single-byte and two-byte cases because |
|
|
|
* we're only dealing with the first 256 Unicode codepoints. */ |
|
|
|
} else { |
|
|
|
ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6)); |
|
|
|
ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f)); |
|
|
|
} |
|
|
|
pos--; |
|
|
|
s++; |
|
|
|
} |
|
|
|
ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0'; |
|
|
|
str = zend_string_truncate(str, ZSTR_LEN(str), 0); |
|
|
|
return str; |
|
|
|
} |
|
|
|
/* }}} */ |
|
|
|
|
|
|
|
/* {{{ */ |
|
|
|
static zend_string *php_utf8_decode(const char *s, size_t len) |
|
|
|
{ |
|
|
|
size_t pos = 0; |
|
|
|
unsigned int c; |
|
|
|
zend_string *str; |
|
|
|
|
|
|
|
str = zend_string_alloc(len, 0); |
|
|
|
ZSTR_LEN(str) = 0; |
|
|
|
while (pos < len) { |
|
|
|
int status = FAILURE; |
|
|
|
c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status); |
|
|
|
|
|
|
|
/* The lower 256 codepoints of Unicode are identical to Latin-1, |
|
|
|
* so we don't need to do any mapping here beyond replacing non-Latin-1 |
|
|
|
* characters. */ |
|
|
|
if (status == FAILURE || c > 0xFFU) { |
|
|
|
c = '?'; |
|
|
|
} |
|
|
|
|
|
|
|
ZSTR_VAL(str)[ZSTR_LEN(str)++] = c; |
|
|
|
} |
|
|
|
ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0'; |
|
|
|
if (ZSTR_LEN(str) < len) { |
|
|
|
str = zend_string_truncate(str, ZSTR_LEN(str), 0); |
|
|
|
} |
|
|
|
|
|
|
|
return str; |
|
|
|
} |
|
|
|
/* }}} */ |
|
|
|
|
|
|
|
|
|
|
|
/* {{{ proto string utf8_encode(string data) |
|
|
|
Encodes an ISO-8859-1 string to UTF-8 */ |
|
|
|
PHP_FUNCTION(utf8_encode) |
|
|
|
{ |
|
|
|
char *arg; |
|
|
|
size_t arg_len; |
|
|
|
|
|
|
|
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) { |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
RETURN_STR(php_utf8_encode(arg, arg_len)); |
|
|
|
} |
|
|
|
/* }}} */ |
|
|
|
|
|
|
|
/* {{{ proto string utf8_decode(string data) |
|
|
|
Converts a UTF-8 encoded string to ISO-8859-1 */ |
|
|
|
PHP_FUNCTION(utf8_decode) |
|
|
|
{ |
|
|
|
char *arg; |
|
|
|
size_t arg_len; |
|
|
|
|
|
|
|
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) { |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
RETURN_STR(php_utf8_decode(arg, arg_len)); |
|
|
|
} |
|
|
|
/* }}} */ |
|
|
|
|
|
|
|
/* |
|
|
|
* Local variables: |
|
|
|
* tab-width: 4 |
|
|
|
|