Kaydet (Commit) e664d7f8 authored tarafından Pär Björklund's avatar Pär Björklund Kaydeden (comit) Antoine Pitrou

bpo-30747: Attempt to fix atomic load/store (#2383)

_Py_atomic_* are currently not implemented as atomic operations
when building with MSVC. This patch attempts to implement parts
of the functionality required.
üst dadca480
...@@ -10,6 +10,12 @@ ...@@ -10,6 +10,12 @@
#include <stdatomic.h> #include <stdatomic.h>
#endif #endif
#if defined(_MSC_VER)
#include <intrin.h>
#include <immintrin.h>
#endif
/* This is modeled after the atomics interface from C1x, according to /* This is modeled after the atomics interface from C1x, according to
* the draft at * the draft at
* http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf. * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
...@@ -87,8 +93,9 @@ typedef struct _Py_atomic_int { ...@@ -87,8 +93,9 @@ typedef struct _Py_atomic_int {
|| (ORDER) == __ATOMIC_CONSUME), \ || (ORDER) == __ATOMIC_CONSUME), \
__atomic_load_n(&(ATOMIC_VAL)->_value, ORDER)) __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER))
#else /* Only support GCC (for expression statements) and x86 (for simple
* atomic semantics) and MSVC x86/x64/ARM */
#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
typedef enum _Py_memory_order { typedef enum _Py_memory_order {
_Py_memory_order_relaxed, _Py_memory_order_relaxed,
_Py_memory_order_acquire, _Py_memory_order_acquire,
...@@ -105,9 +112,6 @@ typedef struct _Py_atomic_int { ...@@ -105,9 +112,6 @@ typedef struct _Py_atomic_int {
int _value; int _value;
} _Py_atomic_int; } _Py_atomic_int;
/* Only support GCC (for expression statements) and x86 (for simple
* atomic semantics) for now */
#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
static __inline__ void static __inline__ void
_Py_atomic_signal_fence(_Py_memory_order order) _Py_atomic_signal_fence(_Py_memory_order order)
...@@ -127,7 +131,7 @@ _Py_atomic_thread_fence(_Py_memory_order order) ...@@ -127,7 +131,7 @@ _Py_atomic_thread_fence(_Py_memory_order order)
static __inline__ void static __inline__ void
_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
{ {
(void)address; /* shut up -Wunused-parameter */ (void)address; /* shut up -Wunused-parameter */
switch(order) { switch(order) {
case _Py_memory_order_release: case _Py_memory_order_release:
case _Py_memory_order_acq_rel: case _Py_memory_order_acq_rel:
...@@ -219,7 +223,291 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) ...@@ -219,7 +223,291 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
result; \ result; \
}) })
#else /* !gcc x86 */ #elif defined(_MSC_VER)
/* _Interlocked* functions provide a full memory barrier and are therefore
enough for acq_rel and seq_cst. If the HLE variants aren't available
in hardware they will fall back to a full memory barrier as well.
This might affect performance but likely only in some very specific and
hard to meassure scenario.
*/
#if defined(_M_IX86) || defined(_M_X64)
typedef enum _Py_memory_order {
_Py_memory_order_relaxed,
_Py_memory_order_acquire,
_Py_memory_order_release,
_Py_memory_order_acq_rel,
_Py_memory_order_seq_cst
} _Py_memory_order;
typedef struct _Py_atomic_address {
volatile uintptr_t _value;
} _Py_atomic_address;
typedef struct _Py_atomic_int {
volatile int _value;
} _Py_atomic_int;
#if defined(_M_X64)
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
switch (ORDER) { \
case _Py_memory_order_acquire: \
_InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
break; \
case _Py_memory_order_release: \
_InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
break; \
default: \
_InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
break; \
}
#else
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
#endif
#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
switch (ORDER) { \
case _Py_memory_order_acquire: \
_InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
break; \
case _Py_memory_order_release: \
_InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
break; \
default: \
_InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
break; \
}
#if defined(_M_X64)
/* This has to be an intptr_t for now.
gil_created() uses -1 as a sentinel value, if this returns
a uintptr_t it will do an unsigned compare and crash
*/
inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
uintptr_t old;
switch (order) {
case _Py_memory_order_acquire:
{
do {
old = *value;
} while(_InterlockedCompareExchange64_HLEAcquire(value, old, old) != old);
break;
}
case _Py_memory_order_release:
{
do {
old = *value;
} while(_InterlockedCompareExchange64_HLERelease(value, old, old) != old);
break;
}
case _Py_memory_order_relaxed:
old = *value;
break;
default:
{
do {
old = *value;
} while(_InterlockedCompareExchange64(value, old, old) != old);
break;
}
}
return old;
}
#else
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
#endif
inline int _Py_atomic_load_32bit(volatile int* value, int order) {
int old;
switch (order) {
case _Py_memory_order_acquire:
{
do {
old = *value;
} while(_InterlockedCompareExchange_HLEAcquire(value, old, old) != old);
break;
}
case _Py_memory_order_release:
{
do {
old = *value;
} while(_InterlockedCompareExchange_HLERelease(value, old, old) != old);
break;
}
case _Py_memory_order_relaxed:
old = *value;
break;
default:
{
do {
old = *value;
} while(_InterlockedCompareExchange(value, old, old) != old);
break;
}
}
return old;
}
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
if (sizeof(*ATOMIC_VAL._value) == 8) { \
_Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
_Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
( \
sizeof(*(ATOMIC_VAL._value)) == 8 ? \
_Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
_Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
)
#elif defined(_M_ARM) || defined(_M_ARM64)
typedef enum _Py_memory_order {
_Py_memory_order_relaxed,
_Py_memory_order_acquire,
_Py_memory_order_release,
_Py_memory_order_acq_rel,
_Py_memory_order_seq_cst
} _Py_memory_order;
typedef struct _Py_atomic_address {
volatile uintptr_t _value;
} _Py_atomic_address;
typedef struct _Py_atomic_int {
volatile int _value;
} _Py_atomic_int;
#if defined(_M_ARM64)
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
switch (ORDER) { \
case _Py_memory_order_acquire: \
_InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
break; \
case _Py_memory_order_release: \
_InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
break; \
default: \
_InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
break; \
}
#else
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
#endif
#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
switch (ORDER) { \
case _Py_memory_order_acquire: \
_InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
break; \
case _Py_memory_order_release: \
_InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
break; \
default: \
_InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
break; \
}
#if defined(_M_ARM64)
/* This has to be an intptr_t for now.
gil_created() uses -1 as a sentinel value, if this returns
a uintptr_t it will do an unsigned compare and crash
*/
inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
uintptr_t old;
switch (order) {
case _Py_memory_order_acquire:
{
do {
old = *value;
} while(_InterlockedCompareExchange64_acq(value, old, old) != old);
break;
}
case _Py_memory_order_release:
{
do {
old = *value;
} while(_InterlockedCompareExchange64_rel(value, old, old) != old);
break;
}
case _Py_memory_order_relaxed:
old = *value;
break;
default:
{
do {
old = *value;
} while(_InterlockedCompareExchange64(value, old, old) != old);
break;
}
}
return old;
}
#else
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
#endif
inline int _Py_atomic_load_32bit(volatile int* value, int order) {
int old;
switch (order) {
case _Py_memory_order_acquire:
{
do {
old = *value;
} while(_InterlockedCompareExchange_acq(value, old, old) != old);
break;
}
case _Py_memory_order_release:
{
do {
old = *value;
} while(_InterlockedCompareExchange_rel(value, old, old) != old);
break;
}
case _Py_memory_order_relaxed:
old = *value;
break;
default:
{
do {
old = *value;
} while(_InterlockedCompareExchange(value, old, old) != old);
break;
}
}
return old;
}
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
if (sizeof(*ATOMIC_VAL._value) == 8) { \
_Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
_Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
( \
sizeof(*(ATOMIC_VAL._value)) == 8 ? \
_Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
_Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
)
#endif
#else /* !gcc x86 !_msc_ver */
typedef enum _Py_memory_order {
_Py_memory_order_relaxed,
_Py_memory_order_acquire,
_Py_memory_order_release,
_Py_memory_order_acq_rel,
_Py_memory_order_seq_cst
} _Py_memory_order;
typedef struct _Py_atomic_address {
uintptr_t _value;
} _Py_atomic_address;
typedef struct _Py_atomic_int {
int _value;
} _Py_atomic_int;
/* Fall back to other compilers and processors by assuming that simple /* Fall back to other compilers and processors by assuming that simple
volatile accesses are atomic. This is false, so people should port volatile accesses are atomic. This is false, so people should port
this. */ this. */
...@@ -229,8 +517,6 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) ...@@ -229,8 +517,6 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
((ATOMIC_VAL)->_value = NEW_VAL) ((ATOMIC_VAL)->_value = NEW_VAL)
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
((ATOMIC_VAL)->_value) ((ATOMIC_VAL)->_value)
#endif /* !gcc x86 */
#endif #endif
/* Standardized shortcuts. */ /* Standardized shortcuts. */
...@@ -245,6 +531,5 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) ...@@ -245,6 +531,5 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
_Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed) _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed)
#define _Py_atomic_load_relaxed(ATOMIC_VAL) \ #define _Py_atomic_load_relaxed(ATOMIC_VAL) \
_Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed) _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed)
#endif /* Py_BUILD_CORE */ #endif /* Py_BUILD_CORE */
#endif /* Py_ATOMIC_H */ #endif /* Py_ATOMIC_H */
Add a non-dummy implementation of _Py_atomic_store and _Py_atomic_load on
MSVC.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment