bpo-30747: Attempt to fix atomic load/store (#2383)

_Py_atomic_* are currently not implemented as atomic operations when building with MSVC. This patch attempts to implement parts of the functionality required.

bpo-30747: Attempt to fix atomic load/store (#2383)
_Py_atomic_* are currently not implemented as atomic operations when building with MSVC. This patch attempts to implement parts of the functionality required.
e664d7f8 · Pär Björklund · Antoine Pitrou · dadca480 · e664d7f8 · e664d7f8
Kaydet (Commit) e664d7f8 authored Agu 12, 2017 tarafından Pär Björklund Kaydeden (comit) Antoine Pitrou Agu 12, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 297 additions and 10 deletions

pyatomic.h Include/pyatomic.h +295 -10

2017-08-08-12-00-29.bpo-30747.g2kZRT.rst ...ore and Builtins/2017-08-08-12-00-29.bpo-30747.g2kZRT.rst +2 -0

No files found.
--- a/Include/pyatomic.h
+++ b/Include/pyatomic.h
@@ -10,6 +10,12 @@
 #include <stdatomic.h>
 #endif
+#if defined(_MSC_VER) 
+#include <intrin.h>
+#include <immintrin.h>
+#endif
 /* This is modeled after the atomics interface from C1x, according to
 * the draft at
 * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
@@ -87,8 +93,9 @@ typedef struct _Py_atomic_int {
            || (ORDER) == __ATOMIC_CONSUME),                  \
     __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER))
-#else
+/* Only support GCC (for expression statements) and x86 (for simple
+ * atomic semantics) and MSVC x86/x64/ARM */
+#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
 typedef enum _Py_memory_order {
    _Py_memory_order_relaxed,
    _Py_memory_order_acquire,
@@ -105,9 +112,6 @@ typedef struct _Py_atomic_int {
    int _value;
 } _Py_atomic_int;
-/* Only support GCC (for expression statements) and x86 (for simple
- * atomic semantics) for now */
-#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
 static __inline__ void
 _Py_atomic_signal_fence(_Py_memory_order order)
@@ -127,7 +131,7 @@ _Py_atomic_thread_fence(_Py_memory_order order)
 static __inline__ void
 _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
 {
-    (void)address;		/* shut up -Wunused-parameter */
+    (void)address;              /* shut up -Wunused-parameter */
    switch(order) {
    case _Py_memory_order_release:
    case _Py_memory_order_acq_rel:
@@ -219,7 +223,291 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
        result; \
    })
-#else  /* !gcc x86 */
+#elif defined(_MSC_VER) 
+/*  _Interlocked* functions provide a full memory barrier and are therefore
+    enough for acq_rel and seq_cst. If the HLE variants aren't available
+    in hardware they will fall back to a full memory barrier as well.
+    This might affect performance but likely only in some very specific and
+    hard to meassure scenario.
+*/
+#if defined(_M_IX86) || defined(_M_X64)
+typedef enum _Py_memory_order {
+    _Py_memory_order_relaxed,
+    _Py_memory_order_acquire,
+    _Py_memory_order_release,
+    _Py_memory_order_acq_rel,
+    _Py_memory_order_seq_cst
+} _Py_memory_order;
+typedef struct _Py_atomic_address {
+    volatile uintptr_t _value;
+} _Py_atomic_address;
+typedef struct _Py_atomic_int {
+    volatile int _value;
+} _Py_atomic_int;
+#if defined(_M_X64) 
+#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
+    switch (ORDER) { \
+    case _Py_memory_order_acquire: \
+      _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
+      break; \
+    case _Py_memory_order_release: \
+      _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
+      break; \
+    default: \
+      _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
+      break; \
+  }
+#else
+#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
+#endif
+#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
+  switch (ORDER) { \
+  case _Py_memory_order_acquire: \
+    _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
+    break; \
+  case _Py_memory_order_release: \
+    _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
+    break; \
+  default: \
+    _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
+    break; \
+  }
+#if defined(_M_X64)
+/*  This has to be an intptr_t for now.
+    gil_created() uses -1 as a sentinel value, if this returns
+    a uintptr_t it will do an unsigned compare and crash
+*/
+inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
+    uintptr_t old;
+    switch (order) {
+    case _Py_memory_order_acquire:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange64_HLEAcquire(value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_release:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange64_HLERelease(value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_relaxed:
+      old = *value;
+      break;
+    default:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange64(value, old, old) != old);
+      break;
+    }
+    }
+    return old; 
+}
+#else
+#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
+#endif
+inline int _Py_atomic_load_32bit(volatile int* value, int order) {
+    int old;
+    switch (order) {
+    case _Py_memory_order_acquire:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange_HLEAcquire(value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_release:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange_HLERelease(value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_relaxed:
+      old = *value;
+      break;
+    default:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange(value, old, old) != old);
+      break;
+    }
+    }
+    return old; 
+}
+#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
+  if (sizeof(*ATOMIC_VAL._value) == 8) { \
+    _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
+    _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } 
+#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
+  ( \
+    sizeof(*(ATOMIC_VAL._value)) == 8 ? \
+    _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
+    _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
+  )
+#elif defined(_M_ARM) || defined(_M_ARM64)
+typedef enum _Py_memory_order {
+    _Py_memory_order_relaxed,
+    _Py_memory_order_acquire,
+    _Py_memory_order_release,
+    _Py_memory_order_acq_rel,
+    _Py_memory_order_seq_cst
+} _Py_memory_order;
+typedef struct _Py_atomic_address {
+    volatile uintptr_t _value;
+} _Py_atomic_address;
+typedef struct _Py_atomic_int {
+    volatile int _value;
+} _Py_atomic_int;
+#if defined(_M_ARM64) 
+#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
+    switch (ORDER) { \
+    case _Py_memory_order_acquire: \
+      _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
+      break; \
+    case _Py_memory_order_release: \
+      _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
+      break; \
+    default: \
+      _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
+      break; \
+  }
+#else
+#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
+#endif
+#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
+  switch (ORDER) { \
+  case _Py_memory_order_acquire: \
+    _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
+    break; \
+  case _Py_memory_order_release: \
+    _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
+    break; \
+  default: \
+    _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
+    break; \
+  }
+#if defined(_M_ARM64)
+/*  This has to be an intptr_t for now.
+    gil_created() uses -1 as a sentinel value, if this returns
+    a uintptr_t it will do an unsigned compare and crash
+*/
+inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
+    uintptr_t old;
+    switch (order) {
+    case _Py_memory_order_acquire:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_release:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_relaxed:
+      old = *value;
+      break;
+    default:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange64(value, old, old) != old);
+      break;
+    }
+    }
+    return old; 
+}
+#else
+#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
+#endif
+inline int _Py_atomic_load_32bit(volatile int* value, int order) {
+    int old;
+    switch (order) {
+    case _Py_memory_order_acquire:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange_acq(value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_release:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange_rel(value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_relaxed:
+      old = *value;
+      break;
+    default:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange(value, old, old) != old);
+      break;
+    }
+    }
+    return old; 
+}
+#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
+  if (sizeof(*ATOMIC_VAL._value) == 8) { \
+    _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
+    _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } 
+#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
+  ( \
+    sizeof(*(ATOMIC_VAL._value)) == 8 ? \
+    _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
+    _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
+  )
+#endif
+#else  /* !gcc x86  !_msc_ver */
+typedef enum _Py_memory_order {
+    _Py_memory_order_relaxed,
+    _Py_memory_order_acquire,
+    _Py_memory_order_release,
+    _Py_memory_order_acq_rel,
+    _Py_memory_order_seq_cst
+} _Py_memory_order;
+typedef struct _Py_atomic_address {
+    uintptr_t _value;
+} _Py_atomic_address;
+typedef struct _Py_atomic_int {
+    int _value;
+} _Py_atomic_int;
 /* Fall back to other compilers and processors by assuming that simple
   volatile accesses are atomic.  This is false, so people should port
   this. */
@@ -229,8 +517,6 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
    ((ATOMIC_VAL)->_value = NEW_VAL)
 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
    ((ATOMIC_VAL)->_value)
-#endif  /* !gcc x86 */
 #endif
 /* Standardized shortcuts. */
@@ -245,6 +531,5 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
    _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed)
 #define _Py_atomic_load_relaxed(ATOMIC_VAL) \
    _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed)
 #endif  /* Py_BUILD_CORE */
 #endif  /* Py_ATOMIC_H */
--- a/Misc/NEWS.d/next/Core and Builtins/2017-08-08-12-00-29.bpo-30747.g2kZRT.rst
+++ b/Misc/NEWS.d/next/Core and Builtins/2017-08-08-12-00-29.bpo-30747.g2kZRT.rst
+Add a non-dummy implementation of _Py_atomic_store and _Py_atomic_load on
+MSVC.