@ -198,6 +198,11 @@ extern "C" {
# define OVERALLOCATE_FACTOR 4
# endif
/* bpo-40521: Interned strings are shared by all interpreters. */
# ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
# define INTERNED_STRINGS
# endif
/* This dictionary holds all interned unicode strings. Note that references
to strings in this dictionary are * not * counted in the string ' s ob_refcnt .
When the interned string reaches a refcnt of 0 the string deallocation
@ -206,7 +211,9 @@ extern "C" {
Another way to look at this is that to say that the actual reference
count of a string is : s - > ob_refcnt + ( s - > state ? 2 : 0 )
*/
# ifdef INTERNED_STRINGS
static PyObject * interned = NULL ;
# endif
/* The empty Unicode object is shared to improve performance. */
static PyObject * unicode_empty = NULL ;
@ -281,9 +288,16 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
/* List of static strings. */
static _Py_Identifier * static_strings = NULL ;
/* bpo-40521: Latin1 singletons are shared by all interpreters. */
# ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
# define LATIN1_SINGLETONS
# endif
# ifdef LATIN1_SINGLETONS
/* Single character Unicode strings in the Latin-1 range are being
shared as well . */
static PyObject * unicode_latin1 [ 256 ] = { NULL } ;
# endif
/* Fast detection of the most frequent whitespace characters */
const unsigned char _Py_ascii_whitespace [ ] = {
@ -662,6 +676,7 @@ unicode_result_ready(PyObject *unicode)
return unicode_empty ;
}
# ifdef LATIN1_SINGLETONS
if ( length = = 1 ) {
const void * data = PyUnicode_DATA ( unicode ) ;
int kind = PyUnicode_KIND ( unicode ) ;
@ -683,6 +698,7 @@ unicode_result_ready(PyObject *unicode)
}
}
}
# endif
assert ( _PyUnicode_CheckConsistency ( unicode , 1 ) ) ;
return unicode ;
@ -1913,10 +1929,12 @@ unicode_dealloc(PyObject *unicode)
case SSTATE_INTERNED_MORTAL :
/* revive dead object temporarily for DelItem */
Py_SET_REFCNT ( unicode , 3 ) ;
# ifdef INTERNED_STRINGS
if ( PyDict_DelItem ( interned , unicode ) ! = 0 ) {
_PyErr_WriteUnraisableMsg ( " deletion of interned string failed " ,
NULL ) ;
}
# endif
break ;
case SSTATE_INTERNED_IMMORTAL :
@ -1944,15 +1962,18 @@ unicode_dealloc(PyObject *unicode)
static int
unicode_is_singleton ( PyObject * unicode )
{
PyASCIIObject * ascii = ( PyASCIIObject * ) unicode ;
if ( unicode = = unicode_empty )
if ( unicode = = unicode_empty ) {
return 1 ;
}
# ifdef LATIN1_SINGLETONS
PyASCIIObject * ascii = ( PyASCIIObject * ) unicode ;
if ( ascii - > state . kind ! = PyUnicode_WCHAR_KIND & & ascii - > length = = 1 )
{
Py_UCS4 ch = PyUnicode_READ_CHAR ( unicode , 0 ) ;
if ( ch < 256 & & unicode_latin1 [ ch ] = = unicode )
return 1 ;
}
# endif
return 0 ;
}
# endif
@ -2094,16 +2115,28 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
static PyObject *
get_latin1_char ( unsigned char ch )
{
PyObject * unicode = unicode_latin1 [ ch ] ;
PyObject * unicode ;
# ifdef LATIN1_SINGLETONS
unicode = unicode_latin1 [ ch ] ;
if ( unicode ) {
Py_INCREF ( unicode ) ;
return unicode ;
}
# endif
unicode = PyUnicode_New ( 1 , ch ) ;
if ( ! unicode ) {
unicode = PyUnicode_New ( 1 , ch ) ;
if ( ! unicode )
return NULL ;
PyUnicode_1BYTE_DATA ( unicode ) [ 0 ] = ch ;
assert ( _PyUnicode_CheckConsistency ( unicode , 1 ) ) ;
unicode_latin1 [ ch ] = unicode ;
return NULL ;
}
PyUnicode_1BYTE_DATA ( unicode ) [ 0 ] = ch ;
assert ( _PyUnicode_CheckConsistency ( unicode , 1 ) ) ;
# ifdef LATIN1_SINGLETONS
Py_INCREF ( unicode ) ;
unicode_latin1 [ ch ] = unicode ;
# endif
return unicode ;
}
@ -11270,7 +11303,6 @@ int
_PyUnicode_EqualToASCIIId ( PyObject * left , _Py_Identifier * right )
{
PyObject * right_uni ;
Py_hash_t hash ;
assert ( _PyUnicode_CHECK ( left ) ) ;
assert ( right - > string ) ;
@ -11302,10 +11334,12 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
if ( PyUnicode_CHECK_INTERNED ( left ) )
return 0 ;
# ifdef INTERNED_STRINGS
assert ( _PyUnicode_HASH ( right_uni ) ! = - 1 ) ;
hash = _PyUnicode_HASH ( left ) ;
Py_hash_t hash = _PyUnicode_HASH ( left ) ;
if ( hash ! = - 1 & & hash ! = _PyUnicode_HASH ( right_uni ) )
return 0 ;
# endif
return unicode_compare_eq ( left , right_uni ) ;
}
@ -15487,20 +15521,26 @@ void
PyUnicode_InternInPlace ( PyObject * * p )
{
PyObject * s = * p ;
PyObject * t ;
# ifdef Py_DEBUG
assert ( s ! = NULL ) ;
assert ( _PyUnicode_CHECK ( s ) ) ;
# else
if ( s = = NULL | | ! PyUnicode_Check ( s ) )
if ( s = = NULL | | ! PyUnicode_Check ( s ) ) {
return ;
}
# endif
/* If it's a subclass, we don't really know what putting
it in the interned dict might do . */
if ( ! PyUnicode_CheckExact ( s ) )
if ( ! PyUnicode_CheckExact ( s ) ) {
return ;
if ( PyUnicode_CHECK_INTERNED ( s ) )
}
if ( PyUnicode_CHECK_INTERNED ( s ) ) {
return ;
}
# ifdef INTERNED_STRINGS
if ( interned = = NULL ) {
interned = PyDict_New ( ) ;
if ( interned = = NULL ) {
@ -15508,22 +15548,28 @@ PyUnicode_InternInPlace(PyObject **p)
return ;
}
}
PyObject * t ;
Py_ALLOW_RECURSION
t = PyDict_SetDefault ( interned , s , s ) ;
Py_END_ALLOW_RECURSION
if ( t = = NULL ) {
PyErr_Clear ( ) ;
return ;
}
if ( t ! = s ) {
Py_INCREF ( t ) ;
Py_SETREF ( * p , t ) ;
return ;
}
/* The two references in interned are not counted by refcnt.
The deallocator will take care of this */
Py_SET_REFCNT ( s , Py_REFCNT ( s ) - 2 ) ;
_PyUnicode_STATE ( s ) . interned = SSTATE_INTERNED_MORTAL ;
# endif
}
void
@ -16109,9 +16155,11 @@ _PyUnicode_Fini(PyThreadState *tstate)
Py_CLEAR ( unicode_empty ) ;
# ifdef LATIN1_SINGLETONS
for ( Py_ssize_t i = 0 ; i < 256 ; i + + ) {
Py_CLEAR ( unicode_latin1 [ i ] ) ;
}
# endif
_PyUnicode_ClearStaticStrings ( ) ;
}