Browse Source

bpo-40596: Fix str.isidentifier() for non-canonicalized strings containing non-BMP characters on Windows. (GH-20053)

pull/20057/head
Serhiy Storchaka 6 years ago
committed by GitHub
parent
commit
5650e76f63
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 7
      Lib/test/test_unicode.py
  2. 2
      Misc/NEWS.d/next/Core and Builtins/2020-05-11-20-53-52.bpo-40596.dwOH_X.rst
  3. 26
      Objects/unicodeobject.c

7
Lib/test/test_unicode.py

@ -720,6 +720,13 @@ class UnicodeTest(string_tests.CommonTest,
self.assertFalse("©".isidentifier())
self.assertFalse("0".isidentifier())
@support.cpython_only
def test_isidentifier_legacy(self):
import _testcapi
u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊'
self.assertTrue(u.isidentifier())
self.assertTrue(_testcapi.unicode_legacy_string(u).isidentifier())
def test_isprintable(self):
self.assertTrue("".isprintable())
self.assertTrue(" ".isprintable())

2
Misc/NEWS.d/next/Core and Builtins/2020-05-11-20-53-52.bpo-40596.dwOH_X.rst

@ -0,0 +1,2 @@
Fixed :meth:`str.isidentifier` for non-canonicalized strings containing
non-BMP characters on Windows.

26
Objects/unicodeobject.c

@ -12356,20 +12356,38 @@ PyUnicode_IsIdentifier(PyObject *self)
return len && i == len;
}
else {
Py_ssize_t i, len = PyUnicode_GET_SIZE(self);
Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self);
if (len == 0) {
/* an empty string is not a valid identifier */
return 0;
}
const wchar_t *wstr = _PyUnicode_WSTR(self);
Py_UCS4 ch = wstr[0];
Py_UCS4 ch = wstr[i++];
#if SIZEOF_WCHAR_T == 2
if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
&& i < len
&& Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
{
ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
i++;
}
#endif
if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
return 0;
}
for (i = 1; i < len; i++) {
ch = wstr[i];
while (i < len) {
ch = wstr[i++];
#if SIZEOF_WCHAR_T == 2
if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
&& i < len
&& Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
{
ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
i++;
}
#endif
if (!_PyUnicode_IsXidContinue(ch)) {
return 0;
}

Loading…
Cancel
Save