Browse Source

Faster strrpos implementation

pull/980/merge
Xinchen Hui 11 years ago
parent
commit
2f1ddff2a5
  1. 3
      UPGRADING
  2. 68
      Zend/zend_operators.c
  3. 45
      Zend/zend_operators.h
  4. 142
      ext/standard/string.c

3
UPGRADING

@ -54,7 +54,8 @@ PHP X.Y UPGRADE NOTES
. zend_function.common.num_args don't include the variadic argument anymore.
. ob_start() no longer issues an E_ERROR, but instead an E_RECOVERABLE_ERROR in case an
output buffer is created in an output buffer handler.
. Add zend_memnstr_ex, which is based on string matching sunday algo.
. Added zend_memnstr_ex, which is based on string matching sunday algo.
. Added zend_memnrstr, zend_memnrstr_ex.
- DBA
. dba_delete() now returns false if the key was not found for the inifile

68
Zend/zend_operators.c

@ -2763,54 +2763,96 @@ process_double:
}
/* }}} */
static zend_always_inline void zend_memstr_ex_pre(unsigned int td[], const char *needle, size_t needle_len) /* {{{ */ {
/*
* String matching - Sunday algorithm
* http://www.iti.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
*/
static zend_always_inline void zend_memnstr_ex_pre(unsigned int td[], const char *needle, size_t needle_len, int reverse) /* {{{ */ {
int i;
for (i = 0; i < 256; i++) {
td[i] = needle_len + 1;
}
for (i = 0; i < needle_len; i++) {
td[(unsigned char)needle[i]] = (int)needle_len - i;
if (reverse) {
for (i = needle_len - 1; i >= 0; i--) {
td[(unsigned char)needle[i]] = i + 1;
}
} else {
for (i = 0; i < needle_len; i++) {
td[(unsigned char)needle[i]] = (int)needle_len - i;
}
}
}
/* }}} */
/*
* String matching - Sunday algorithm
* http://www.iti.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
*/
ZEND_API const char* zend_memnstr_ex(const char *haystack, const char *needle, size_t needle_len, char *end) /* {{{ */
{
unsigned int td[256];
register size_t i;
const unsigned register char *p;
register const char *p;
if (needle_len == 0 || (end - haystack) == 0) {
return NULL;
}
zend_memstr_ex_pre(td, needle, needle_len);
zend_memnstr_ex_pre(td, needle, needle_len, 0);
p = (const unsigned char *)haystack;
p = haystack;
end -= needle_len;
while (p <= (unsigned char *)end) {
while (p <= end) {
for (i = 0; i < needle_len; i++) {
if (needle[i] != p[i]) {
break;
}
}
if (i == needle_len) {
return (const char *)p;
return p;
}
p += td[p[needle_len]];
p += td[(unsigned char)(p[needle_len])];
}
return NULL;
}
/* }}} */
ZEND_API const char* zend_memnrstr_ex(const char *haystack, const char *needle, size_t needle_len, char *end) /* {{{ */
{
unsigned int td[256];
register size_t i;
register const char *p;
if (needle_len == 0 || (end - haystack) == 0) {
return NULL;
}
zend_memnstr_ex_pre(td, needle, needle_len, 1);
p = end;
p -= needle_len;
while (p >= haystack) {
for (i = 0; i < needle_len; i++) {
if (needle[i] != p[i]) {
break;
}
}
if (i == needle_len) {
return (const char *)p;
}
if (p == haystack) {
return NULL;
}
p -= td[(unsigned char)(p[-1])];
}
return NULL;
}
/* }}} */
/*
* Local variables:

45
Zend/zend_operators.h

@ -88,6 +88,7 @@ ZEND_API zend_bool instanceof_function(const zend_class_entry *instance_ce, cons
ZEND_API zend_uchar _is_numeric_string_ex(const char *str, size_t length, zend_long *lval, double *dval, int allow_errors, int *oflow_info);
ZEND_API const char* zend_memnstr_ex(const char *haystack, const char *needle, size_t needle_len, char *end);
ZEND_API const char* zend_memnrstr_ex(const char *haystack, const char *needle, size_t needle_len, char *end);
END_EXTERN_C()
@ -174,11 +175,12 @@ zend_memnstr(const char *haystack, const char *needle, size_t needle_len, char *
size_t off_s;
if (needle_len == 1) {
return (char *)memchr(p, *needle, (end-p));
return (const char *)memchr(p, *needle, (end-p));
}
off_p = end - haystack;
off_s = (off_p > 0) ? (size_t)off_p : 0;
if (needle_len > off_s) {
return NULL;
}
@ -187,7 +189,7 @@ zend_memnstr(const char *haystack, const char *needle, size_t needle_len, char *
end -= needle_len;
while (p <= end) {
if ((p = (char *)memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {
if ((p = (const char *)memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {
if (!memcmp(needle, p, needle_len-1)) {
return p;
}
@ -209,7 +211,6 @@ zend_memnstr(const char *haystack, const char *needle, size_t needle_len, char *
static zend_always_inline const void *zend_memrchr(const void *s, int c, size_t n)
{
register const unsigned char *e;
if (n <= 0) {
return NULL;
}
@ -219,10 +220,46 @@ static zend_always_inline const void *zend_memrchr(const void *s, int c, size_t
return (const void *)e;
}
}
return NULL;
}
static zend_always_inline const char *
zend_memnrstr(const char *haystack, const char *needle, size_t needle_len, char *end)
{
const char *p = end;
const char ne = needle[needle_len-1];
ptrdiff_t off_p;
size_t off_s;
if (needle_len == 1) {
return (const char *)zend_memrchr(haystack, *needle, (p - haystack));
}
off_p = end - haystack;
off_s = (off_p > 0) ? (size_t)off_p : 0;
if (needle_len > off_s) {
return NULL;
}
if (EXPECTED(off_s < 1024 || needle_len < 3)) {
p -= needle_len;
do {
if ((p = (const char *)zend_memrchr(haystack, *needle, (p - haystack) + 1)) && ne == p[needle_len-1]) {
if (!memcmp(needle, p, needle_len - 1)) {
return p;
}
}
} while (p-- >= haystack);
return NULL;
} else {
return zend_memnrstr_ex(haystack, needle, needle_len, end);
}
}
BEGIN_EXTERN_C()
ZEND_API int increment_function(zval *op1);
ZEND_API int decrement_function(zval *op2);

142
ext/standard/string.c

@ -1929,9 +1929,9 @@ PHP_FUNCTION(stripos)
char *found = NULL;
zend_string *haystack;
zend_long offset = 0;
char *needle_dup = NULL, *haystack_dup;
char needle_char[2];
zval *needle;
zend_string *needle_dup = NULL, *haystack_dup;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sz|l", &haystack, &needle, &offset) == FAILURE) {
return;
@ -1946,40 +1946,38 @@ PHP_FUNCTION(stripos)
RETURN_FALSE;
}
haystack_dup = estrndup(haystack->val, haystack->len);
php_strtolower(haystack_dup, haystack->len);
if (Z_TYPE_P(needle) == IS_STRING) {
if (Z_STRLEN_P(needle) == 0 || Z_STRLEN_P(needle) > haystack->len) {
efree(haystack_dup);
RETURN_FALSE;
}
needle_dup = estrndup(Z_STRVAL_P(needle), Z_STRLEN_P(needle));
php_strtolower(needle_dup, Z_STRLEN_P(needle));
found = (char*)php_memnstr(haystack_dup + offset, needle_dup, Z_STRLEN_P(needle), haystack_dup + haystack->len);
haystack_dup = php_string_tolower(haystack);
needle_dup = php_string_tolower(Z_STR_P(needle));
found = (char*)php_memnstr(haystack_dup->val + offset,
needle_dup->val, needle_dup->len, haystack_dup->val + haystack->len);
} else {
if (php_needle_char(needle, needle_char) != SUCCESS) {
efree(haystack_dup);
RETURN_FALSE;
}
haystack_dup = php_string_tolower(haystack);
needle_char[0] = tolower(needle_char[0]);
needle_char[1] = '\0';
found = (char*)php_memnstr(haystack_dup + offset,
found = (char*)php_memnstr(haystack_dup->val + offset,
needle_char,
sizeof(needle_char) - 1,
haystack_dup + haystack->len);
haystack_dup->val + haystack->len);
}
efree(haystack_dup);
if (needle_dup) {
efree(needle_dup);
}
if (found) {
RETURN_LONG(found - haystack_dup);
RETVAL_LONG(found - haystack_dup->val);
} else {
RETURN_FALSE;
RETVAL_FALSE;
}
zend_string_release(haystack_dup);
if (needle_dup) {
zend_string_release(needle_dup);
}
}
/* }}} */
@ -1994,6 +1992,7 @@ PHP_FUNCTION(strrpos)
size_t needle_len;
zend_long offset = 0;
char *p, *e, ord_needle[2];
char *found;
#ifndef FAST_ZPP
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sz|l", &haystack, &zneedle, &offset) == FAILURE) {
@ -2030,37 +2029,22 @@ PHP_FUNCTION(strrpos)
RETURN_FALSE;
}
p = haystack->val + (size_t)offset;
e = haystack->val + haystack->len - needle_len;
e = haystack->val + haystack->len;
} else {
if (offset < -INT_MAX || (size_t)(-offset) > haystack->len) {
php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
RETURN_FALSE;
}
p = haystack->val;
if (needle_len > (size_t)(-offset)) {
e = haystack->val + haystack->len - needle_len;
if (haystack->len + (size_t)offset >= needle_len) {
e = haystack->val + haystack->len + (size_t)offset + needle_len;
} else {
e = haystack->val + haystack->len + offset;
}
}
if (needle_len == 1) {
/* Single character search can shortcut memcmps */
while (e >= p) {
if (*e == *needle) {
RETURN_LONG(e - p + (offset > 0 ? offset : 0));
}
e--;
e = haystack->val + haystack->len;
}
RETURN_FALSE;
}
while (e >= p) {
if (memcmp(e, needle, needle_len) == 0) {
RETURN_LONG(e - p + (offset > 0 ? offset : 0));
}
e--;
if ((found = (char *)zend_memnrstr(p, needle, needle_len, e))) {
RETURN_LONG(found - haystack->val);
}
RETURN_FALSE;
@ -2072,103 +2056,105 @@ PHP_FUNCTION(strrpos)
PHP_FUNCTION(strripos)
{
zval *zneedle;
char *needle;
zend_string *needle;
zend_string *haystack;
size_t needle_len;
zend_long offset = 0;
char *p, *e, ord_needle[2];
char *needle_dup, *haystack_dup;
char *p, *e;
char *found;
zend_string *needle_dup, *haystack_dup, *ord_needle = NULL;
ALLOCA_FLAG(use_heap);
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sz|l", &haystack, &zneedle, &offset) == FAILURE) {
RETURN_FALSE;
}
STR_ALLOCA_ALLOC(ord_needle, 1, use_heap);
if (Z_TYPE_P(zneedle) == IS_STRING) {
needle = Z_STRVAL_P(zneedle);
needle_len = Z_STRLEN_P(zneedle);
needle = Z_STR_P(zneedle);
} else {
if (php_needle_char(zneedle, ord_needle) != SUCCESS) {
if (php_needle_char(zneedle, ord_needle->val) != SUCCESS) {
RETURN_FALSE;
}
ord_needle[1] = '\0';
ord_needle->val[1] = '\0';
needle = ord_needle;
needle_len = 1;
}
if ((haystack->len == 0) || (needle_len == 0)) {
if ((haystack->len == 0) || (needle->len == 0)) {
RETURN_FALSE;
}
if (needle_len == 1) {
if (needle->len == 1) {
/* Single character search can shortcut memcmps
Can also avoid tolower emallocs */
if (offset >= 0) {
if ((size_t)offset > haystack->len) {
STR_ALLOCA_FREE(ord_needle, use_heap);
php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
RETURN_FALSE;
}
p = haystack->val + offset;
p = haystack->val + (size_t)offset;
e = haystack->val + haystack->len - 1;
} else {
p = haystack->val;
if (offset < -INT_MAX || (size_t)(-offset) > haystack->len) {
STR_ALLOCA_FREE(ord_needle, use_heap);
php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
RETURN_FALSE;
}
e = haystack->val + haystack->len + offset;
e = haystack->val + haystack->len + (size_t)offset;
}
/* Borrow that ord_needle buffer to avoid repeatedly tolower()ing needle */
*ord_needle = tolower(*needle);
*ord_needle->val = tolower(*needle->val);
while (e >= p) {
if (tolower(*e) == *ord_needle) {
if (tolower(*e) == *ord_needle->val) {
STR_ALLOCA_FREE(ord_needle, use_heap);
RETURN_LONG(e - p + (offset > 0 ? offset : 0));
}
e--;
}
STR_ALLOCA_FREE(ord_needle, use_heap);
RETURN_FALSE;
}
needle_dup = estrndup(needle, needle_len);
php_strtolower(needle_dup, needle_len);
haystack_dup = estrndup(haystack->val, haystack->len);
php_strtolower(haystack_dup, haystack->len);
haystack_dup = php_string_tolower(haystack);
if (offset >= 0) {
if ((size_t)offset > haystack->len) {
efree(needle_dup);
efree(haystack_dup);
zend_string_release(haystack_dup);
STR_ALLOCA_FREE(ord_needle, use_heap);
php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
RETURN_FALSE;
}
p = haystack_dup + offset;
e = haystack_dup + haystack->len - needle_len;
p = haystack_dup->val + offset;
e = haystack_dup->val + haystack->len;
} else {
if (offset < -INT_MAX || (size_t)(-offset) > haystack->len) {
efree(needle_dup);
efree(haystack_dup);
zend_string_release(haystack_dup);
STR_ALLOCA_FREE(ord_needle, use_heap);
php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
RETURN_FALSE;
}
p = haystack_dup;
if (needle_len > (size_t)(-offset)) {
e = haystack_dup + haystack->len - needle_len;
p = haystack_dup->val;
if (haystack->len + (size_t)offset >= needle->len) {
e = haystack_dup->val + haystack->len + (size_t)offset + needle->len;
} else {
e = haystack_dup + haystack->len + offset;
e = haystack_dup->val + haystack->len;
}
}
while (e >= p) {
if (memcmp(e, needle_dup, needle_len) == 0) {
efree(haystack_dup);
efree(needle_dup);
RETURN_LONG(e - p + (offset > 0 ? offset : 0));
}
e--;
needle_dup = php_string_tolower(needle);
if ((found = (char *)zend_memnrstr(p, needle_dup->val, needle_dup->len, e))) {
RETVAL_LONG(found - haystack_dup->val);
zend_string_release(needle_dup);
zend_string_release(haystack_dup);
STR_ALLOCA_FREE(ord_needle, use_heap);
} else {
zend_string_release(needle_dup);
zend_string_release(haystack_dup);
STR_ALLOCA_FREE(ord_needle, use_heap);
RETURN_FALSE;
}
efree(haystack_dup);
efree(needle_dup);
RETURN_FALSE;
}
/* }}} */

Loading…
Cancel
Save