@ -22,13 +22,23 @@ if sys.platform == "darwin":
else :
C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING
# XXX (ncoghlan): The above is probably still wrong for :
# Note that the above is probably still wrong in some cases, such as :
# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
# * AIX and any other platforms that use latin-1 in the C locale
#
# Options for dealing with this:
# * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't)
# * Fix the test expectations to match the actual platform behaviour
# In order to get the warning messages to match up as expected, the candidate
# order here must much the target locale order in Python/pylifecycle.c
_C_UTF8_LOCALES = ( " C.UTF-8 " , " C.utf8 " , " UTF-8 " )
_C_UTF8_LOCALES = ( " C.UTF-8 " , " C.utf8 " ) #, "UTF-8")
# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
# problems encountered on *BSD systems with those test cases
# For additional details see:
# nl_langinfo CODESET error: https://bugs.python.org/issue30647
# locale handling differences: https://bugs.python.org/issue30672
# There's no reliable cross-platform way of checking locale alias
# lists, so the only way of knowing which of these locales will work
@ -40,20 +50,24 @@ def _set_locale_in_subprocess(locale_name):
result , py_cmd = run_python_until_end ( " -c " , cmd , __isolated = True )
return result . rc == 0
_EncodingDetails = namedtuple ( " EncodingDetails " ,
" fsencoding stdin_info stdout_info stderr_info " )
_fields = " fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all "
_EncodingDetails = namedtuple ( " EncodingDetails " , _fields )
class EncodingDetails ( _EncodingDetails ) :
# XXX (ncoghlan): Using JSON for child state reporting may be less fragile
CHILD_PROCESS_SCRIPT = " ; " . join ( [
" import sys " ,
" import sys, os " ,
" print(sys.getfilesystemencoding()) " ,
" print(sys.stdin.encoding + ' : ' + sys.stdin.errors) " ,
" print(sys.stdout.encoding + ' : ' + sys.stdout.errors) " ,
" print(sys.stderr.encoding + ' : ' + sys.stderr.errors) " ,
" print(os.environ.get( ' LANG ' , ' not set ' )) " ,
" print(os.environ.get( ' LC_CTYPE ' , ' not set ' )) " ,
" print(os.environ.get( ' LC_ALL ' , ' not set ' )) " ,
] )
@classmethod
def get_expected_details ( cls , fs_encoding , stream_encoding ) :
def get_expected_details ( cls , coercion_expected , fs_encoding , stream_encoding , env_vars ) :
""" Returns expected child process details for a given encoding """
_stream = stream_encoding + " :{} "
# stdin and stdout should use surrogateescape either because the
@ -61,7 +75,14 @@ class EncodingDetails(_EncodingDetails):
stream_info = 2 * [ _stream . format ( " surrogateescape " ) ]
# stderr should always use backslashreplace
stream_info . append ( _stream . format ( " backslashreplace " ) )
return dict ( cls ( fs_encoding , * stream_info ) . _asdict ( ) )
expected_lang = env_vars . get ( " LANG " , " not set " ) . lower ( )
if coercion_expected :
expected_lc_ctype = CLI_COERCION_TARGET . lower ( )
else :
expected_lc_ctype = env_vars . get ( " LC_CTYPE " , " not set " ) . lower ( )
expected_lc_all = env_vars . get ( " LC_ALL " , " not set " ) . lower ( )
env_info = expected_lang , expected_lc_ctype , expected_lc_all
return dict ( cls ( fs_encoding , * stream_info , * env_info ) . _asdict ( ) )
@staticmethod
def _handle_output_variations ( data ) :
@ -97,64 +118,20 @@ class EncodingDetails(_EncodingDetails):
result . fail ( py_cmd )
# All subprocess outputs in this test case should be pure ASCII
adjusted_output = cls . _handle_output_variations ( result . out )
stdout_lines = adjusted_output . decode ( " ascii " ) . rstrip ( ) . splitlines ( )
stdout_lines = adjusted_output . decode ( " ascii " ) . splitlines ( )
child_encoding_details = dict ( cls ( * stdout_lines ) . _asdict ( ) )
stderr_lines = result . err . decode ( " ascii " ) . rstrip ( ) . splitlines ( )
return child_encoding_details , stderr_lines
class _ChildProcessEncodingTestCase ( unittest . TestCase ) :
# Base class to check for expected encoding details in a child process
def _check_child_encoding_details ( self ,
env_vars ,
expected_fs_encoding ,
expected_stream_encoding ,
expected_warning ) :
""" Check the C locale handling for the given process environment
Parameters :
expected_fs_encoding : expected sys . getfilesystemencoding ( ) result
expected_stream_encoding : expected encoding for standard streams
expected_warning : stderr output to expect ( if any )
"""
result = EncodingDetails . get_child_details ( env_vars )
encoding_details , stderr_lines = result
self . assertEqual ( encoding_details ,
EncodingDetails . get_expected_details (
expected_fs_encoding ,
expected_stream_encoding ) )
self . assertEqual ( stderr_lines , expected_warning )
# Details of the shared library warning emitted at runtime
LIBRARY_C _LOCALE_WARNING = (
LEGACY_LOCALE_WARNING = (
" Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
" encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
" C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
" locales is recommended. "
)
@unittest.skipUnless ( sysconfig . get_config_var ( " PY_WARN_ON_C_LOCALE " ) ,
" C locale runtime warning disabled at build time " )
class LocaleWarningTests ( _ChildProcessEncodingTestCase ) :
# Test warning emitted when running in the C locale
def test_library_c_locale_warning ( self ) :
self . maxDiff = None
for locale_to_set in ( " C " , " POSIX " , " invalid.ascii " ) :
# XXX (ncoghlan): Mac OS X doesn't behave as expected in the
# POSIX locale, so we skip that for now
if sys . platform == " darwin " and locale_to_set == " POSIX " :
continue
var_dict = {
" LC_ALL " : locale_to_set
}
with self . subTest ( forced_locale = locale_to_set ) :
self . _check_child_encoding_details ( var_dict ,
C_LOCALE_FS_ENCODING ,
C_LOCALE_STREAM_ENCODING ,
[ LIBRARY_C_LOCALE_WARNING ] )
# Details of the CLI locale coercion warning emitted at runtime
CLI_COERCION_WARNING_FMT = (
" Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale "
@ -163,9 +140,13 @@ CLI_COERCION_WARNING_FMT = (
AVAILABLE_TARGETS = None
CLI_COERCION_TARGET = None
CLI_COERCION_WARNING = None
def setUpModule ( ) :
global AVAILABLE_TARGETS
global CLI_COERCION_TARGET
global CLI_COERCION_WARNING
if AVAILABLE_TARGETS is not None :
# initialization already done
@ -177,26 +158,57 @@ def setUpModule():
if _set_locale_in_subprocess ( target_locale ) :
AVAILABLE_TARGETS . append ( target_locale )
if AVAILABLE_TARGETS :
# Coercion is expected to use the first available target locale
CLI_COERCION_TARGET = AVAILABLE_TARGETS [ 0 ]
CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT . format ( CLI_COERCION_TARGET )
class _LocaleCoercionTargetsTestCase ( _ChildProcessEncodingTestCase ) :
# Base class for test cases that rely on coercion targets being defined
class _LocaleHandlingTestCase ( unittest . TestCase ) :
# Base class to check expected locale handling behaviour
@classmethod
def setUpClass ( cls ) :
if not AVAILABLE_TARGETS :
raise unittest . SkipTest ( " No C-with-UTF-8 locale available " )
def _check_child_encoding_details ( self ,
env_vars ,
expected_fs_encoding ,
expected_stream_encoding ,
expected_warnings ,
coercion_expected ) :
""" Check the C locale handling for the given process environment
Parameters :
expected_fs_encoding : expected sys . getfilesystemencoding ( ) result
expected_stream_encoding : expected encoding for standard streams
expected_warning : stderr output to expect ( if any )
"""
result = EncodingDetails . get_child_details ( env_vars )
encoding_details , stderr_lines = result
expected_details = EncodingDetails . get_expected_details (
coercion_expected ,
expected_fs_encoding ,
expected_stream_encoding ,
env_vars
)
self . assertEqual ( encoding_details , expected_details )
if expected_warnings is None :
expected_warnings = [ ]
self . assertEqual ( stderr_lines , expected_warnings )
class LocaleConfigurationTests ( _LocaleCoercionTargetsTestCase ) :
class LocaleConfigurationTests ( _LocaleHandlingTestCase ) :
# Test explicit external configuration via the process environment
def setUpClass ( ) :
# This relies on setupModule() having been run, so it can't be
# handled via the @unittest.skipUnless decorator
if not AVAILABLE_TARGETS :
raise unittest . SkipTest ( " No C-with-UTF-8 locale available " )
def test_external_target_locale_configuration ( self ) :
# Explicitly setting a target locale should give the same behaviour as
# is seen when implicitly coercing to that target locale
self . maxDiff = None
expected_warning = [ ]
expected_fs_encoding = " utf-8 "
expected_stream_encoding = " utf-8 "
@ -209,6 +221,7 @@ class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):
for locale_to_set in AVAILABLE_TARGETS :
# XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as
# expected, so skip that combination for now
# See https://bugs.python.org/issue30672 for discussion
if env_var == " LANG " and locale_to_set == " UTF-8 " :
continue
@ -219,17 +232,23 @@ class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):
self . _check_child_encoding_details ( var_dict ,
expected_fs_encoding ,
expected_stream_encoding ,
expected_warning )
expected_warnings = None ,
coercion_expected = False )
@test.support.cpython_only
@unittest.skipUnless ( sysconfig . get_config_var ( " PY_COERCE_C_LOCALE " ) ,
" C locale coercion disabled at build time " )
class LocaleCoercionTests ( _LocaleCoercionTargets TestCase ) :
class LocaleCoercionTests ( _LocaleHandling TestCase ) :
# Test implicit reconfiguration of the environment during CLI startup
def _check_c_locale_coercion ( self , fs_encoding , stream_encoding , coerce_c_locale ) :
def _check_c_locale_coercion ( self ,
fs_encoding , stream_encoding ,
coerce_c_locale ,
expected_warnings = None ,
coercion_expected = True ,
* * extra_vars ) :
""" Check the C locale handling for various configurations
Parameters :
@ -238,27 +257,31 @@ class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):
coerce_c_locale : setting to use for PYTHONCOERCECLOCALE
None : don ' t set the variable at all
str : the value set in the child ' s environment
expected_warnings : expected warning lines on stderr
extra_vars : additional environment variables to set in subprocess
"""
# Check for expected warning on stderr if C locale is coerced
self . maxDiff = None
expected_warning = [ ]
if coerce_c_locale != " 0 " :
# Expect coercion to use the first available locale
warning_msg = CLI_COERCION_WARNING_FMT . format ( AVAILABLE_TARGETS [ 0 ] )
expected_warning . append ( warning_msg )
if not AVAILABLE_TARGETS :
# Locale coercion is disabled when there aren't any target locales
fs_encoding = C_LOCALE_FS_ENCODING
stream_encoding = C_LOCALE_STREAM_ENCODING
coercion_expected = False
if expected_warnings :
expected_warnings = [ LEGACY_LOCALE_WARNING ]
base_var_dict = {
" LANG " : " " ,
" LC_CTYPE " : " " ,
" LC_ALL " : " " ,
}
base_var_dict . update ( extra_vars )
for env_var in ( " LANG " , " LC_CTYPE " ) :
for locale_to_set in ( " " , " C " , " POSIX " , " invalid.ascii " ) :
# XXX (ncoghlan): Mac OS X does n't behave as expected in the
# XXX (ncoghlan): *BSD platforms do n't behave as expected in the
# POSIX locale, so we skip that for now
if sys . platform == " darwin " and locale_to_set == " POSIX " :
# See https://bugs.python.org/issue30672 for discussion
if locale_to_set == " POSIX " :
continue
with self . subTest ( env_var = env_var ,
nominal_locale = locale_to_set ,
@ -267,33 +290,62 @@ class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):
var_dict [ env_var ] = locale_to_set
if coerce_c_locale is not None :
var_dict [ " PYTHONCOERCECLOCALE " ] = coerce_c_locale
# Check behaviour on successful coercion
self . _check_child_encoding_details ( var_dict ,
fs_encoding ,
stream_encoding ,
expected_warning )
expected_warnings ,
coercion_expected )
def test_test_PYTHONCOERCECLOCALE_not_set ( self ) :
# This should coerce to the first available target locale by default
self . _check_c_locale_coercion ( " utf-8 " , " utf-8 " , coerce_c_locale = None )
def test_PYTHONCOERCECLOCALE_not_zero ( self ) :
# *Any* string other that "0" is considered "set" for our purposes
# *Any* string other than "0" is considered "set" for our purposes
# and hence should result in the locale coercion being enabled
for setting in ( " " , " 1 " , " true " , " false " ) :
self . _check_c_locale_coercion ( " utf-8 " , " utf-8 " , coerce_c_locale = setting )
def test_PYTHONCOERCECLOCALE_set_to_warn ( self ) :
# PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales
self . _check_c_locale_coercion ( " utf-8 " , " utf-8 " ,
coerce_c_locale = " warn " ,
expected_warnings = [ CLI_COERCION_WARNING ] )
def test_PYTHONCOERCECLOCALE_set_to_zero ( self ) :
# The setting "0" should result in the locale coercion being disabled
self . _check_c_locale_coercion ( C_LOCALE_FS_ENCODING ,
C_LOCALE_STREAM_ENCODING ,
coerce_c_locale = " 0 " )
coerce_c_locale = " 0 " ,
coercion_expected = False )
# Setting LC_ALL=C shouldn't make any difference to the behaviour
self . _check_c_locale_coercion ( C_LOCALE_FS_ENCODING ,
C_LOCALE_STREAM_ENCODING ,
coerce_c_locale = " 0 " ,
LC_ALL = " C " ,
coercion_expected = False )
def test_LC_ALL_set_to_C ( self ) :
# Setting LC_ALL should render the locale coercion ineffective
self . _check_c_locale_coercion ( C_LOCALE_FS_ENCODING ,
C_LOCALE_STREAM_ENCODING ,
coerce_c_locale = None ,
LC_ALL = " C " ,
coercion_expected = False )
# And result in a warning about a lack of locale compatibility
self . _check_c_locale_coercion ( C_LOCALE_FS_ENCODING ,
C_LOCALE_STREAM_ENCODING ,
coerce_c_locale = " warn " ,
LC_ALL = " C " ,
expected_warnings = [ LEGACY_LOCALE_WARNING ] ,
coercion_expected = False )
def test_main ( ) :
test . support . run_unittest (
LocaleConfigurationTests ,
LocaleCoercionTests ,
LocaleWarningTests
LocaleCoercionTests
)
test . support . reap_children ( )