bpo-33956: update vendored expat to 2.2.5 (GH-7925)

4e21100f · Benjamin Peterson · GitHub · 58ed7307 · 4e21100f · 4e21100f
Unverified Kaydet (Commit) 4e21100f authored Haz 27, 2018 tarafından Benjamin Peterson Kaydeden (comit) GitHub Haz 27, 2018
9 changed files
--- a/Misc/NEWS.d/next/Core and Builtins/2018-06-25-20-42-44.bpo-33956.1qoTwD.rst
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-06-25-20-42-44.bpo-33956.1qoTwD.rst
+Update vendored Expat library copy to version 2.2.5.
--- a/Modules/expat/expat.h
+++ b/Modules/expat/expat.h
@@ -1076,7 +1076,7 @@ XML_GetFeatureList(void);
 */
 #define XML_MAJOR_VERSION 2
 #define XML_MINOR_VERSION 2
-#define XML_MICRO_VERSION 4
+#define XML_MICRO_VERSION 5

 #ifdef __cplusplus
 }

--- a/Modules/expat/expat_external.h
+++ b/Modules/expat/expat_external.h
@@ -35,12 +35,8 @@

 /* External API definitions */

-/* Namespace external symbols to allow multiple libexpat version to
-   co-exist. */
-#include "pyexpatns.h"
-
 #if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__)
-#define XML_USE_MSC_EXTENSIONS 1
+# define XML_USE_MSC_EXTENSIONS 1
 #endif

 /* Expat tries very hard to make the API boundary very specifically
@@ -66,11 +62,11 @@
   system headers may assume the cdecl convention.
 */
 #ifndef XMLCALL
-#if defined(_MSC_VER)
-#define XMLCALL __cdecl
-#elif defined(__GNUC__) && defined(__i386) && !defined(__INTEL_COMPILER)
-#define XMLCALL __attribute__((cdecl))
-#else
+# if defined(_MSC_VER)
+#  define XMLCALL __cdecl
+# elif defined(__GNUC__) && defined(__i386) && !defined(__INTEL_COMPILER)
+#  define XMLCALL __attribute__((cdecl))
+# else
 /* For any platform which uses this definition and supports more than
   one calling convention, we need to extend this definition to
   declare the convention used on that platform, if it's possible to
@@ -81,41 +77,41 @@
   pre-processor and how to specify the same calling convention as the
   platform's malloc() implementation.
 */
-#define XMLCALL
-#endif
+#  define XMLCALL
+# endif
 #endif  /* not defined XMLCALL */


 #if !defined(XML_STATIC) && !defined(XMLIMPORT)
-#ifndef XML_BUILDING_EXPAT
+# ifndef XML_BUILDING_EXPAT
 /* using Expat from an application */

-#ifdef XML_USE_MSC_EXTENSIONS
-#define XMLIMPORT __declspec(dllimport)
-#endif
+#  ifdef XML_USE_MSC_EXTENSIONS
+#   define XMLIMPORT __declspec(dllimport)
+#  endif

-#endif
+# endif
 #endif  /* not defined XML_STATIC */

 #if !defined(XMLIMPORT) && defined(__GNUC__) && (__GNUC__ >= 4)
-#define XMLIMPORT __attribute__ ((visibility ("default")))
+# define XMLIMPORT __attribute__ ((visibility ("default")))
 #endif

 /* If we didn't define it above, define it away: */
 #ifndef XMLIMPORT
-#define XMLIMPORT
+# define XMLIMPORT
 #endif

 #if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
-#define XML_ATTR_MALLOC __attribute__((__malloc__))
+# define XML_ATTR_MALLOC __attribute__((__malloc__))
 #else
-#define XML_ATTR_MALLOC
+# define XML_ATTR_MALLOC
 #endif

 #if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
-#define XML_ATTR_ALLOC_SIZE(x)  __attribute__((__alloc_size__(x)))
+# define XML_ATTR_ALLOC_SIZE(x)  __attribute__((__alloc_size__(x)))
 #else
-#define XML_ATTR_ALLOC_SIZE(x)
+# define XML_ATTR_ALLOC_SIZE(x)
 #endif

 #define XMLPARSEAPI(type) XMLIMPORT type XMLCALL
@@ -125,33 +121,35 @@ extern "C" {
 #endif

 #ifdef XML_UNICODE_WCHAR_T
-# define XML_UNICODE
+# ifndef XML_UNICODE
+#  define XML_UNICODE
+# endif
 # if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2)
 #  error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc"
 # endif
 #endif

 #ifdef XML_UNICODE     /* Information is UTF-16 encoded. */
-#ifdef XML_UNICODE_WCHAR_T
+# ifdef XML_UNICODE_WCHAR_T
 typedef wchar_t XML_Char;
 typedef wchar_t XML_LChar;
-#else
+# else
 typedef unsigned short XML_Char;
 typedef char XML_LChar;
-#endif /* XML_UNICODE_WCHAR_T */
+# endif /* XML_UNICODE_WCHAR_T */
 #else                  /* Information is UTF-8 encoded. */
 typedef char XML_Char;
 typedef char XML_LChar;
 #endif /* XML_UNICODE */

 #ifdef XML_LARGE_SIZE  /* Use large integers for file/stream positions. */
-#if defined(XML_USE_MSC_EXTENSIONS) && _MSC_VER < 1400
+# if defined(XML_USE_MSC_EXTENSIONS) && _MSC_VER < 1400
 typedef __int64 XML_Index; 
 typedef unsigned __int64 XML_Size;
-#else
+# else
 typedef long long XML_Index;
 typedef unsigned long long XML_Size;
-#endif
+# endif
 #else
 typedef long XML_Index;
 typedef unsigned long XML_Size;

--- a/Modules/expat/internal.h
+++ b/Modules/expat/internal.h
@@ -116,7 +116,7 @@ extern "C" {


 void
-align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef);
+_INTERNAL_trim_to_complete_utf8_characters(const char * from, const char ** fromLimRef);


 #ifdef __cplusplus

--- a/Modules/expat/loadlibrary.c
+++ b/Modules/expat/loadlibrary.c
@@ -84,7 +84,7 @@ HMODULE _Expat_LoadLibrary(LPCTSTR filename)
  /* Get a handle to kernel32 so we can access it's functions at runtime */
  HMODULE hKernel32 = GetModuleHandle(TEXT("kernel32"));
  if(!hKernel32)
-    return NULL;
+    return NULL;  /* LCOV_EXCL_LINE */

  /* Attempt to find LoadLibraryEx() which is only available on Windows 2000
     and above */

--- a/Modules/expat/xmlparse.c
+++ b/Modules/expat/xmlparse.c
--- a/Modules/expat/xmltok.c
+++ b/Modules/expat/xmltok.c
@@ -31,8 +31,17 @@
 */

 #include <stddef.h>
-#include <stdbool.h>
-#include <string.h>  // memcpy
+#include <string.h>  /* memcpy */
+
+#if defined(_MSC_VER) && (_MSC_VER <= 1700)
+  /* for vs2012/11.0/1700 and earlier Visual Studio compilers */
+# define bool   int
+# define false  0
+# define true   1
+#else
+# include <stdbool.h>
+#endif
+

 #ifdef _WIN32
 #include "winconfig.h"
@@ -57,7 +66,6 @@
  { PREFIX(prologTok), PREFIX(contentTok), \
    PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
  { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
-  PREFIX(sameName), \
  PREFIX(nameMatchesAscii), \
  PREFIX(nameLength), \
  PREFIX(skipS), \
@@ -354,7 +362,7 @@ enum {  /* UTF8_cvalN is value of masked first byte of N byte sequence */
 };

 void
-align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
+_INTERNAL_trim_to_complete_utf8_characters(const char * from, const char ** fromLimRef)
 {
  const char * fromLim = *fromLimRef;
  size_t walked = 0;
@@ -405,18 +413,22 @@ utf8_toUtf8(const ENCODING *UNUSED_P(enc),
  }

  /* Avoid copying partial characters (from incomplete input). */
-  const char * const fromLimBefore = fromLim;
-  align_limit_to_full_utf8_characters(*fromP, &fromLim);
-  if (fromLim < fromLimBefore) {
-    input_incomplete = true;
+  {
+    const char * const fromLimBefore = fromLim;
+    _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim);
+    if (fromLim < fromLimBefore) {
+      input_incomplete = true;
+    }
  }

-  const ptrdiff_t bytesToCopy = fromLim - *fromP;
-  memcpy((void *)*toP, (const void *)*fromP, (size_t)bytesToCopy);
-  *fromP += bytesToCopy;
-  *toP += bytesToCopy;
+  {
+    const ptrdiff_t bytesToCopy = fromLim - *fromP;
+    memcpy(*toP, *fromP, bytesToCopy);
+    *fromP += bytesToCopy;
+    *toP += bytesToCopy;
+  }

-  if (output_exhausted)  // needs to go first
+  if (output_exhausted)  /* needs to go first */
    return XML_CONVERT_OUTPUT_EXHAUSTED;
  else if (input_incomplete)
    return XML_CONVERT_INPUT_INCOMPLETE;
@@ -1452,9 +1464,8 @@ unknown_toUtf8(const ENCODING *enc,
        return XML_CONVERT_OUTPUT_EXHAUSTED;
      (*fromP)++;
    }
-    do {
-      *(*toP)++ = *utf8++;
-    } while (--n != 0);
+    memcpy(*toP, utf8, n);
+    *toP += n;
  }
 }


--- a/Modules/expat/xmltok.h
+++ b/Modules/expat/xmltok.h
@@ -167,9 +167,6 @@ enum XML_Convert_Result {
 struct encoding {
  SCANNER scanners[XML_N_STATES];
  SCANNER literalScanners[XML_N_LITERAL_TYPES];
-  int (PTRCALL *sameName)(const ENCODING *,
-                          const char *,
-                          const char *);
  int (PTRCALL *nameMatchesAscii)(const ENCODING *,
                                  const char *,
                                  const char *,
@@ -260,8 +257,6 @@ struct encoding {
 #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \
   XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)

-#define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2))
-
 #define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \
  (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2))


--- a/Modules/expat/xmltok_impl.c
+++ b/Modules/expat/xmltok_impl.c
@@ -1653,79 +1653,6 @@ PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
  return 0;
 }

-/* This function does not appear to be called from anywhere within the
- * library code.  It is used via the macro XmlSameName(), which is
- * defined but never used.  Since it appears in the encoding function
- * table, removing it is not a thing to be undertaken lightly.  For
- * the moment, we simply exclude it from coverage tests.
- *
- * LCOV_EXCL_START
- */
-static int PTRCALL
-PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
-{
-  for (;;) {
-    switch (BYTE_TYPE(enc, ptr1)) {
-#define LEAD_CASE(n) \
-    case BT_LEAD ## n: \
-      if (*ptr1++ != *ptr2++) \
-        return 0;
-    LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
-#undef LEAD_CASE
-      /* fall through */
-      if (*ptr1++ != *ptr2++)
-        return 0;
-      break;
-    case BT_NONASCII:
-    case BT_NMSTRT:
-#ifdef XML_NS
-    case BT_COLON:
-#endif
-    case BT_HEX:
-    case BT_DIGIT:
-    case BT_NAME:
-    case BT_MINUS:
-      if (*ptr2++ != *ptr1++)
-        return 0;
-      if (MINBPC(enc) > 1) {
-        if (*ptr2++ != *ptr1++)
-          return 0;
-        if (MINBPC(enc) > 2) {
-          if (*ptr2++ != *ptr1++)
-            return 0;
-          if (MINBPC(enc) > 3) {
-            if (*ptr2++ != *ptr1++)
-              return 0;
-          }
-        }
-      }
-      break;
-    default:
-      if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
-        return 1;
-      switch (BYTE_TYPE(enc, ptr2)) {
-      case BT_LEAD2:
-      case BT_LEAD3:
-      case BT_LEAD4:
-      case BT_NONASCII:
-      case BT_NMSTRT:
-#ifdef XML_NS
-      case BT_COLON:
-#endif
-      case BT_HEX:
-      case BT_DIGIT:
-      case BT_NAME:
-      case BT_MINUS:
-        return 0;
-      default:
-        return 1;
-      }
-    }
-  }
-  /* not reached */
-}
-/* LCOV_EXCL_STOP */
-
 static int PTRCALL
 PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
                         const char *end1, const char *ptr2)
@@ -1733,7 +1660,7 @@ PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
  for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
    if (end1 - ptr1 < MINBPC(enc)) {
      /* This line cannot be executed.  THe incoming data has already
-       * been tokenized once, so incomplete characters like this have
+       * been tokenized once, so imcomplete characters like this have
       * already been eliminated from the input.  Retaining the
       * paranoia check is still valuable, however.
       */