closes bpo-36861: Update Unicode database to 12.1.0. (GH-13214)

Adds ㋿.
7 years ago · 3aca40d3cb
10 changed files with 15381 additions and 15377 deletions
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@ -351,7 +351,7 @@ Notes:
   The numeric literals accepted include the digits ``0`` to ``9`` or any
   Unicode equivalent (code points with the ``Nd`` property).

-   See http://www.unicode.org/Public/12.0.0/ucd/extracted/DerivedNumericType.txt
+   See http://www.unicode.org/Public/12.1.0/ucd/extracted/DerivedNumericType.txt
   for a complete list of code points with the ``Nd`` property.


--- a/Doc/library/unicodedata.rst
+++ b/Doc/library/unicodedata.rst
@ -17,8 +17,8 @@

 This module provides access to the Unicode Character Database (UCD) which
 defines character properties for all Unicode characters. The data contained in
-this database is compiled from the `UCD version 12.0.0
-<http://www.unicode.org/Public/12.0.0/ucd>`_.
+this database is compiled from the `UCD version 12.1.0
+<http://www.unicode.org/Public/12.1.0/ucd>`_.

 The module uses the same names and symbols as defined by Unicode
 Standard Annex #44, `"Unicode Character Database"
@ -175,6 +175,6 @@ Examples:

 .. rubric:: Footnotes

-.. [#] http://www.unicode.org/Public/12.0.0/ucd/NameAliases.txt
+.. [#] http://www.unicode.org/Public/12.1.0/ucd/NameAliases.txt

-.. [#] http://www.unicode.org/Public/12.0.0/ucd/NamedSequences.txt
+.. [#] http://www.unicode.org/Public/12.1.0/ucd/NamedSequences.txt
--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@ -316,7 +316,7 @@ The Unicode category codes mentioned above stand for:
 * *Nd* - decimal numbers
 * *Pc* - connector punctuations
 * *Other_ID_Start* - explicit list of characters in `PropList.txt
-  <http://www.unicode.org/Public/12.0.0/ucd/PropList.txt>`_ to support backwards
+  <http://www.unicode.org/Public/12.1.0/ucd/PropList.txt>`_ to support backwards
  compatibility
 * *Other_ID_Continue* - likewise

--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@ -510,9 +510,8 @@ Added new clock :data:`~time.CLOCK_UPTIME_RAW` for macOS 10.12.
 unicodedata
 -----------

-* The :mod:`unicodedata` module has been upgraded to use the `Unicode 12.0.0
-  <http://blog.unicode.org/2019/03/announcing-unicode-standard-version-120.html>`_
-  release.
+* The :mod:`unicodedata` module has been upgraded to use the `Unicode 12.1.0
+  <http://blog.unicode.org/2019/05/unicode-12-1-en.html>`_ release.

 * New function :func:`~unicodedata.is_normalized` can be used to verify a string
  is in a specific normal form. (Contributed by Max Belanger and David Euresti in
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@ -80,7 +80,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):

    # Update this if the database changes. Make sure to do a full rebuild
    # (e.g. 'make distclean && make') to get the correct checksum.
-    expectedchecksum = '4cb02a243aed7c251067386dd738189146fddf94'
+    expectedchecksum = 'c44a49ca7c5cb6441640fe174ede604b45028652'
    def test_function_checksum(self):
        data = []
        h = hashlib.sha1()
--- a/Builtins/2019-05-08-20-42-40.bpo-36861.72mvZM.rst
+++ b/Builtins/2019-05-08-20-42-40.bpo-36861.72mvZM.rst
@ -0,0 +1 @@
+Update the Unicode database to version 12.1.0.
--- a/Modules/unicodedata_db.h
+++ b/Modules/unicodedata_db.h
--- a/Modules/unicodename_db.h
+++ b/Modules/unicodename_db.h
--- a/Objects/unicodetype_db.h
+++ b/Objects/unicodetype_db.h
@ -2925,7 +2925,7 @@ static const unsigned short index2[] = {
    5, 5, 5, 5, 5, 5, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
    27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 55, 55, 55, 55, 55,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 55, 55, 55, 55, 55,
    388, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
    55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
    55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@ -41,7 +41,7 @@ VERSION = "3.3"
 #   * Doc/library/stdtypes.rst, and
 #   * Doc/library/unicodedata.rst
 #   * Doc/reference/lexical_analysis.rst (two occurrences)
-UNIDATA_VERSION = "12.0.0"
+UNIDATA_VERSION = "12.1.0"
 UNICODE_DATA = "UnicodeData%s.txt"
 COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
 EASTASIAN_WIDTH = "EastAsianWidth%s.txt"