|
|
|
@ -840,9 +840,6 @@ ensure_unicode(PyObject *obj) |
|
|
|
|
|
|
|
/* --- Unicode Object ----------------------------------------------------- */ |
|
|
|
|
|
|
|
static PyObject * |
|
|
|
fixup(PyObject *self, Py_UCS4 (*fixfct)(PyObject *s)); |
|
|
|
|
|
|
|
static inline Py_ssize_t |
|
|
|
findchar(const void *s, int kind, |
|
|
|
Py_ssize_t size, Py_UCS4 ch, |
|
|
|
@ -9062,42 +9059,6 @@ PyUnicode_Translate(PyObject *str, |
|
|
|
return _PyUnicode_TranslateCharmap(str, mapping, errors); |
|
|
|
} |
|
|
|
|
|
|
|
static Py_UCS4 |
|
|
|
fix_decimal_and_space_to_ascii(PyObject *self) |
|
|
|
{ |
|
|
|
/* No need to call PyUnicode_READY(self) because this function is only |
|
|
|
called as a callback from fixup() which does it already. */ |
|
|
|
const Py_ssize_t len = PyUnicode_GET_LENGTH(self); |
|
|
|
const int kind = PyUnicode_KIND(self); |
|
|
|
void *data = PyUnicode_DATA(self); |
|
|
|
Py_UCS4 maxchar = 127, ch, fixed; |
|
|
|
int modified = 0; |
|
|
|
Py_ssize_t i; |
|
|
|
|
|
|
|
for (i = 0; i < len; ++i) { |
|
|
|
ch = PyUnicode_READ(kind, data, i); |
|
|
|
fixed = 0; |
|
|
|
if (ch > 127) { |
|
|
|
if (Py_UNICODE_ISSPACE(ch)) |
|
|
|
fixed = ' '; |
|
|
|
else { |
|
|
|
const int decimal = Py_UNICODE_TODECIMAL(ch); |
|
|
|
if (decimal >= 0) |
|
|
|
fixed = '0' + decimal; |
|
|
|
} |
|
|
|
if (fixed != 0) { |
|
|
|
modified = 1; |
|
|
|
maxchar = Py_MAX(maxchar, fixed); |
|
|
|
PyUnicode_WRITE(kind, data, i, fixed); |
|
|
|
} |
|
|
|
else |
|
|
|
maxchar = Py_MAX(maxchar, ch); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return (modified) ? maxchar : 0; |
|
|
|
} |
|
|
|
|
|
|
|
PyObject * |
|
|
|
_PyUnicode_TransformDecimalAndSpaceToASCII(PyObject *unicode) |
|
|
|
{ |
|
|
|
@ -9107,12 +9068,42 @@ _PyUnicode_TransformDecimalAndSpaceToASCII(PyObject *unicode) |
|
|
|
} |
|
|
|
if (PyUnicode_READY(unicode) == -1) |
|
|
|
return NULL; |
|
|
|
if (PyUnicode_MAX_CHAR_VALUE(unicode) <= 127) { |
|
|
|
if (PyUnicode_IS_ASCII(unicode)) { |
|
|
|
/* If the string is already ASCII, just return the same string */ |
|
|
|
Py_INCREF(unicode); |
|
|
|
return unicode; |
|
|
|
} |
|
|
|
return fixup(unicode, fix_decimal_and_space_to_ascii); |
|
|
|
|
|
|
|
Py_ssize_t len = PyUnicode_GET_LENGTH(unicode); |
|
|
|
PyObject *result = PyUnicode_New(len, 127); |
|
|
|
if (result == NULL) { |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
|
|
|
|
Py_UCS1 *out = PyUnicode_1BYTE_DATA(result); |
|
|
|
int kind = PyUnicode_KIND(unicode); |
|
|
|
const void *data = PyUnicode_DATA(unicode); |
|
|
|
Py_ssize_t i; |
|
|
|
for (i = 0; i < len; ++i) { |
|
|
|
Py_UCS4 ch = PyUnicode_READ(kind, data, i); |
|
|
|
if (ch < 127) { |
|
|
|
out[i] = ch; |
|
|
|
} |
|
|
|
else if (Py_UNICODE_ISSPACE(ch)) { |
|
|
|
out[i] = ' '; |
|
|
|
} |
|
|
|
else { |
|
|
|
int decimal = Py_UNICODE_TODECIMAL(ch); |
|
|
|
if (decimal < 0) { |
|
|
|
out[i] = '?'; |
|
|
|
_PyUnicode_LENGTH(result) = i + 1; |
|
|
|
break; |
|
|
|
} |
|
|
|
out[i] = '0' + decimal; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return result; |
|
|
|
} |
|
|
|
|
|
|
|
PyObject * |
|
|
|
@ -9588,69 +9579,6 @@ PyUnicode_Tailmatch(PyObject *str, |
|
|
|
return tailmatch(str, substr, start, end, direction); |
|
|
|
} |
|
|
|
|
|
|
|
/* Apply fixfct filter to the Unicode object self and return a |
|
|
|
reference to the modified object */ |
|
|
|
|
|
|
|
static PyObject * |
|
|
|
fixup(PyObject *self, |
|
|
|
Py_UCS4 (*fixfct)(PyObject *s)) |
|
|
|
{ |
|
|
|
PyObject *u; |
|
|
|
Py_UCS4 maxchar_old, maxchar_new = 0; |
|
|
|
PyObject *v; |
|
|
|
|
|
|
|
u = _PyUnicode_Copy(self); |
|
|
|
if (u == NULL) |
|
|
|
return NULL; |
|
|
|
maxchar_old = PyUnicode_MAX_CHAR_VALUE(u); |
|
|
|
|
|
|
|
/* fix functions return the new maximum character in a string, |
|
|
|
if the kind of the resulting unicode object does not change, |
|
|
|
everything is fine. Otherwise we need to change the string kind |
|
|
|
and re-run the fix function. */ |
|
|
|
maxchar_new = fixfct(u); |
|
|
|
|
|
|
|
if (maxchar_new == 0) { |
|
|
|
/* no changes */; |
|
|
|
if (PyUnicode_CheckExact(self)) { |
|
|
|
Py_DECREF(u); |
|
|
|
Py_INCREF(self); |
|
|
|
return self; |
|
|
|
} |
|
|
|
else |
|
|
|
return u; |
|
|
|
} |
|
|
|
|
|
|
|
maxchar_new = align_maxchar(maxchar_new); |
|
|
|
|
|
|
|
if (maxchar_new == maxchar_old) |
|
|
|
return u; |
|
|
|
|
|
|
|
/* In case the maximum character changed, we need to |
|
|
|
convert the string to the new category. */ |
|
|
|
v = PyUnicode_New(PyUnicode_GET_LENGTH(self), maxchar_new); |
|
|
|
if (v == NULL) { |
|
|
|
Py_DECREF(u); |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
if (maxchar_new > maxchar_old) { |
|
|
|
/* If the maxchar increased so that the kind changed, not all |
|
|
|
characters are representable anymore and we need to fix the |
|
|
|
string again. This only happens in very few cases. */ |
|
|
|
_PyUnicode_FastCopyCharacters(v, 0, |
|
|
|
self, 0, PyUnicode_GET_LENGTH(self)); |
|
|
|
maxchar_old = fixfct(v); |
|
|
|
assert(maxchar_old > 0 && maxchar_old <= maxchar_new); |
|
|
|
} |
|
|
|
else { |
|
|
|
_PyUnicode_FastCopyCharacters(v, 0, |
|
|
|
u, 0, PyUnicode_GET_LENGTH(self)); |
|
|
|
} |
|
|
|
Py_DECREF(u); |
|
|
|
assert(_PyUnicode_CheckConsistency(v, 1)); |
|
|
|
return v; |
|
|
|
} |
|
|
|
|
|
|
|
static PyObject * |
|
|
|
ascii_upper_or_lower(PyObject *self, int lower) |
|
|
|
{ |
|
|
|
|