Cleanup and refactor our atomic library.
This commit: * Removes most of all dirty internal details from public atomi_ops.h file, and move them into /intern private subdir. * Removes unused 'architectures' (__apple__ and jemalloc). * Split each implementation into its own file. * Makes use of C99's limits.h system header to determine pointer and int size, instead of using fix hardcoded list of architectures. * Introduces new 'faked' atomics ops for floats. Note that we may add a lot more real and 'faked' atomic operations over integers and floats (multiplication, division, bitshift, bitwise booleans, etc.), as needs arise. Reviewers: sergey, campbellbarton Differential Revision: https://developer.blender.org/D1982
This commit is contained in:
@@ -1,11 +1,11 @@
|
|||||||
/*
|
/*
|
||||||
* Adopted from jemalloc with this license:
|
* Original code from jemalloc with this license:
|
||||||
*
|
*
|
||||||
* Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
|
* Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
|
* Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
|
||||||
* Copyright (C) 2009-2013 Facebook, Inc. All rights reserved.
|
* Copyright (C) 2009-2013 Facebook, Inc. All rights reserved.
|
||||||
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
* 1. Redistributions of source code must retain the above copyright notice(s),
|
* 1. Redistributions of source code must retain the above copyright notice(s),
|
||||||
@@ -13,7 +13,7 @@
|
|||||||
* 2. Redistributions in binary form must reproduce the above copyright notice(s),
|
* 2. Redistributions in binary form must reproduce the above copyright notice(s),
|
||||||
* this list of conditions and the following disclaimer in the documentation
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
* and/or other materials provided with the distribution.
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
|
||||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||||
@@ -24,64 +24,59 @@
|
|||||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||||
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* ***** BEGIN GPL LICENSE BLOCK *****
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
*
|
||||||
|
* The Original Code is Copyright (C) 2016 Blender Foundation.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* The Original Code is: adapted from jemalloc.
|
||||||
|
*
|
||||||
|
* ***** END GPL LICENSE BLOCK *****
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \file atomic_ops.h
|
||||||
|
* \ingroup Atomic
|
||||||
|
*
|
||||||
|
* \author Copyright (C) 2016 Blender Foundation, adapted from jemalloc.
|
||||||
|
* \brief Provides wrapper around system-specific atomic primitives, and some extensions (faked-atomic operations
|
||||||
|
* over float numbers).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __ATOMIC_OPS_H__
|
#ifndef __ATOMIC_OPS_H__
|
||||||
#define __ATOMIC_OPS_H__
|
#define __ATOMIC_OPS_H__
|
||||||
|
|
||||||
#include <assert.h>
|
#if defined(__arm__)
|
||||||
|
|
||||||
#if defined (__APPLE__)
|
|
||||||
# include <libkern/OSAtomic.h>
|
|
||||||
#elif defined(_MSC_VER)
|
|
||||||
# define NOGDI
|
|
||||||
# ifndef NOMINMAX
|
|
||||||
# define NOMINMAX
|
|
||||||
# endif
|
|
||||||
# define WIN32_LEAN_AND_MEAN
|
|
||||||
# include <windows.h>
|
|
||||||
#elif defined(__arm__)
|
|
||||||
/* Attempt to fix compilation error on Debian armel kernel.
|
/* Attempt to fix compilation error on Debian armel kernel.
|
||||||
* arm7 architecture does have both 32 and 64bit atomics, however
|
* arm7 architecture does have both 32 and 64bit atomics, however
|
||||||
* it's gcc doesn't have __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n defined.
|
* it's gcc doesn't have __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n defined.
|
||||||
*/
|
*/
|
||||||
# define JE_FORCE_SYNC_COMPARE_AND_SWAP_1
|
# define JE_FORCE_SYNC_COMPARE_AND_SWAP_1
|
||||||
# define JE_FORCE_SYNC_COMPARE_AND_SWAP_8
|
|
||||||
# define JE_FORCE_SYNC_COMPARE_AND_SWAP_4
|
# define JE_FORCE_SYNC_COMPARE_AND_SWAP_4
|
||||||
|
# define JE_FORCE_SYNC_COMPARE_AND_SWAP_8
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* needed for int types */
|
#include "intern/atomic_ops_utils.h"
|
||||||
#include "../../source/blender/blenlib/BLI_sys_types.h"
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
/* little macro so inline keyword works */
|
/******************************************************************************/
|
||||||
#if defined(_MSC_VER)
|
|
||||||
# define ATOMIC_INLINE static __forceinline
|
|
||||||
#else
|
|
||||||
# if (defined(__APPLE__) && defined(__ppc__))
|
|
||||||
/* static inline __attribute__ here breaks osx ppc gcc42 build */
|
|
||||||
# define ATOMIC_INLINE static __attribute__((always_inline))
|
|
||||||
# else
|
|
||||||
# define ATOMIC_INLINE static inline __attribute__((always_inline))
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* This is becoming a bit nastier that it was originally foreseen,
|
|
||||||
* consider using autoconfig detection instead.
|
|
||||||
*/
|
|
||||||
#if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__) || defined(__s390x__) || defined(__powerpc64__) || defined(__aarch64__) || (defined(__sparc__) && defined(__arch64__)) || defined(__alpha__) || defined(__mips64)
|
|
||||||
# define LG_SIZEOF_PTR 3
|
|
||||||
# define LG_SIZEOF_INT 2
|
|
||||||
#else
|
|
||||||
# define LG_SIZEOF_PTR 2
|
|
||||||
# define LG_SIZEOF_INT 2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/************************/
|
|
||||||
/* Function prototypes. */
|
/* Function prototypes. */
|
||||||
|
|
||||||
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
|
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
|
||||||
ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x);
|
ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x);
|
||||||
ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x);
|
ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x);
|
||||||
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new);
|
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new);
|
||||||
@@ -102,420 +97,22 @@ ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x);
|
|||||||
ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x);
|
ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x);
|
||||||
ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new);
|
ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new);
|
||||||
|
|
||||||
/******************************************************************************/
|
/* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation,
|
||||||
/* 64-bit operations. */
|
* which means they are only efficient if collisions are highly unlikely (i.e. if probability of two threads
|
||||||
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
|
* working on the same pointer at the same time is very low). */
|
||||||
# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
|
ATOMIC_INLINE float atomic_add_fl(float *p, const float x);
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_add_uint64(uint64_t *p, uint64_t x)
|
|
||||||
{
|
|
||||||
return __sync_add_and_fetch(p, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_sub_uint64(uint64_t *p, uint64_t x)
|
|
||||||
{
|
|
||||||
return __sync_sub_and_fetch(p, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
|
|
||||||
{
|
|
||||||
return __sync_val_compare_and_swap(v, old, _new);
|
|
||||||
}
|
|
||||||
#elif (defined(_MSC_VER))
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_add_uint64(uint64_t *p, uint64_t x)
|
|
||||||
{
|
|
||||||
return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x;
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_sub_uint64(uint64_t *p, uint64_t x)
|
|
||||||
{
|
|
||||||
return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x;
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
|
|
||||||
{
|
|
||||||
return InterlockedCompareExchange64((int64_t *)v, _new, old);
|
|
||||||
}
|
|
||||||
#elif (defined(__APPLE__))
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_add_uint64(uint64_t *p, uint64_t x)
|
|
||||||
{
|
|
||||||
return (uint64_t)OSAtomicAdd64((int64_t)x, (int64_t *)p);
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_sub_uint64(uint64_t *p, uint64_t x)
|
|
||||||
{
|
|
||||||
return (uint64_t)OSAtomicAdd64(-((int64_t)x), (int64_t *)p);
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
|
|
||||||
{
|
|
||||||
uint64_t init_val = *v;
|
|
||||||
OSAtomicCompareAndSwap64((int64_t)old, (int64_t)_new, (int64_t *)v);
|
|
||||||
return init_val;
|
|
||||||
}
|
|
||||||
# elif (defined(__amd64__) || defined(__x86_64__))
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_add_uint64(uint64_t *p, uint64_t x)
|
|
||||||
{
|
|
||||||
asm volatile (
|
|
||||||
"lock; xaddq %0, %1;"
|
|
||||||
: "+r" (x), "=m" (*p) /* Outputs. */
|
|
||||||
: "m" (*p) /* Inputs. */
|
|
||||||
);
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_sub_uint64(uint64_t *p, uint64_t x)
|
|
||||||
{
|
|
||||||
x = (uint64_t)(-(int64_t)x);
|
|
||||||
asm volatile (
|
|
||||||
"lock; xaddq %0, %1;"
|
|
||||||
: "+r" (x), "=m" (*p) /* Outputs. */
|
|
||||||
: "m" (*p) /* Inputs. */
|
|
||||||
);
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
|
|
||||||
{
|
|
||||||
uint64_t ret;
|
|
||||||
asm volatile (
|
|
||||||
"lock; cmpxchgq %2,%1"
|
|
||||||
: "=a" (ret), "+m" (*v)
|
|
||||||
: "r" (_new), "0" (old)
|
|
||||||
: "memory");
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
# elif (defined(JEMALLOC_ATOMIC9))
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_add_uint64(uint64_t *p, uint64_t x)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* atomic_fetchadd_64() doesn't exist, but we only ever use this
|
|
||||||
* function on LP64 systems, so atomic_fetchadd_long() will do.
|
|
||||||
*/
|
|
||||||
assert(sizeof(uint64_t) == sizeof(unsigned long));
|
|
||||||
|
|
||||||
return atomic_fetchadd_long(p, (unsigned long)x) + x;
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_sub_uint64(uint64_t *p, uint64_t x)
|
|
||||||
{
|
|
||||||
assert(sizeof(uint64_t) == sizeof(unsigned long));
|
|
||||||
|
|
||||||
return atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x;
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
|
|
||||||
{
|
|
||||||
assert(sizeof(uint64_t) == sizeof(unsigned long));
|
|
||||||
|
|
||||||
return atomic_cmpset_long(v, old, _new);
|
|
||||||
}
|
|
||||||
# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_add_uint64(uint64_t *p, uint64_t x)
|
|
||||||
{
|
|
||||||
return __sync_add_and_fetch(p, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_sub_uint64(uint64_t *p, uint64_t x)
|
|
||||||
{
|
|
||||||
return __sync_sub_and_fetch(p, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint64_t
|
|
||||||
atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
|
|
||||||
{
|
|
||||||
return __sync_val_compare_and_swap(v, old, _new);
|
|
||||||
}
|
|
||||||
# else
|
|
||||||
# error "Missing implementation for 64-bit atomic operations"
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
/* 32-bit operations. */
|
/* Include system-dependent implementations. */
|
||||||
#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_add_uint32(uint32_t *p, uint32_t x)
|
|
||||||
{
|
|
||||||
return __sync_add_and_fetch(p, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint32_t
|
/* Note that we are using _unix flavor as fallback here (it will raise precompiler errors as needed). */
|
||||||
atomic_sub_uint32(uint32_t *p, uint32_t x)
|
#if defined(_MSC_VER)
|
||||||
{
|
# include "intern/atomic_ops_msvc.h"
|
||||||
return __sync_sub_and_fetch(p, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
|
|
||||||
{
|
|
||||||
return __sync_val_compare_and_swap(v, old, _new);
|
|
||||||
}
|
|
||||||
#elif (defined(_MSC_VER))
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_add_uint32(uint32_t *p, uint32_t x)
|
|
||||||
{
|
|
||||||
return InterlockedExchangeAdd(p, x) + x;
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_sub_uint32(uint32_t *p, uint32_t x)
|
|
||||||
{
|
|
||||||
return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
|
|
||||||
{
|
|
||||||
return InterlockedCompareExchange((long *)v, _new, old);
|
|
||||||
}
|
|
||||||
#elif (defined(__APPLE__))
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_add_uint32(uint32_t *p, uint32_t x)
|
|
||||||
{
|
|
||||||
return (uint32_t)OSAtomicAdd32((int32_t)x, (int32_t *)p);
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_sub_uint32(uint32_t *p, uint32_t x)
|
|
||||||
{
|
|
||||||
return (uint32_t)OSAtomicAdd32(-((int32_t)x), (int32_t *)p);
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
|
|
||||||
{
|
|
||||||
uint32_t init_val = *v;
|
|
||||||
OSAtomicCompareAndSwap32((int32_t)old, (int32_t)_new, (int32_t *)v);
|
|
||||||
return init_val;
|
|
||||||
}
|
|
||||||
#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_add_uint32(uint32_t *p, uint32_t x)
|
|
||||||
{
|
|
||||||
asm volatile (
|
|
||||||
"lock; xaddl %0, %1;"
|
|
||||||
: "+r" (x), "=m" (*p) /* Outputs. */
|
|
||||||
: "m" (*p) /* Inputs. */
|
|
||||||
);
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_sub_uint32(uint32_t *p, uint32_t x)
|
|
||||||
{
|
|
||||||
x = (uint32_t)(-(int32_t)x);
|
|
||||||
asm volatile (
|
|
||||||
"lock; xaddl %0, %1;"
|
|
||||||
: "+r" (x), "=m" (*p) /* Outputs. */
|
|
||||||
: "m" (*p) /* Inputs. */
|
|
||||||
);
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
|
|
||||||
{
|
|
||||||
uint32_t ret;
|
|
||||||
asm volatile (
|
|
||||||
"lock; cmpxchgl %2,%1"
|
|
||||||
: "=a" (ret), "+m" (*v)
|
|
||||||
: "r" (_new), "0" (old)
|
|
||||||
: "memory");
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
#elif (defined(JEMALLOC_ATOMIC9))
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_add_uint32(uint32_t *p, uint32_t x)
|
|
||||||
{
|
|
||||||
return atomic_fetchadd_32(p, x) + x;
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_sub_uint32(uint32_t *p, uint32_t x)
|
|
||||||
{
|
|
||||||
return atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x;
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
|
|
||||||
{
|
|
||||||
return atomic_cmpset_32(v, old, _new);
|
|
||||||
}
|
|
||||||
#elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_add_uint32(uint32_t *p, uint32_t x)
|
|
||||||
{
|
|
||||||
return __sync_add_and_fetch(p, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_sub_uint32(uint32_t *p, uint32_t x)
|
|
||||||
{
|
|
||||||
return __sync_sub_and_fetch(p, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE uint32_t
|
|
||||||
atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
|
|
||||||
{
|
|
||||||
return __sync_val_compare_and_swap(v, old, _new);
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
# error "Missing implementation for 32-bit atomic operations"
|
# include "intern/atomic_ops_unix.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/******************************************************************************/
|
/* Include 'fake' atomic extensions, built over real atomic primitives. */
|
||||||
/* 8-bit operations. */
|
#include "intern/atomic_ops_ext.h"
|
||||||
#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
|
|
||||||
ATOMIC_INLINE uint8_t
|
|
||||||
atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
|
|
||||||
{
|
|
||||||
return __sync_fetch_and_or(p, b);
|
|
||||||
}
|
|
||||||
ATOMIC_INLINE uint8_t
|
|
||||||
atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
|
|
||||||
{
|
|
||||||
return __sync_fetch_and_and(p, b);
|
|
||||||
}
|
|
||||||
#elif (defined(_MSC_VER))
|
|
||||||
#include <intrin.h>
|
|
||||||
#pragma intrinsic(_InterlockedAnd8)
|
|
||||||
ATOMIC_INLINE uint8_t
|
|
||||||
atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
|
|
||||||
{
|
|
||||||
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
|
|
||||||
return InterlockedOr8((char *)p, (char)b);
|
|
||||||
#else
|
|
||||||
return _InterlockedOr8((char *)p, (char)b);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
ATOMIC_INLINE uint8_t
|
|
||||||
atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
|
|
||||||
{
|
|
||||||
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
|
|
||||||
return InterlockedAnd8((char *)p, (char)b);
|
|
||||||
#else
|
|
||||||
return _InterlockedAnd8((char *)p, (char)b);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_1)
|
|
||||||
ATOMIC_INLINE uint8_t
|
|
||||||
atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
|
|
||||||
{
|
|
||||||
return __sync_fetch_and_or(p, b);
|
|
||||||
}
|
|
||||||
ATOMIC_INLINE uint8_t
|
|
||||||
atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
|
|
||||||
{
|
|
||||||
return __sync_fetch_and_and(p, b);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
# error "Missing implementation for 8-bit atomic operations"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
/* size_t operations. */
|
|
||||||
ATOMIC_INLINE size_t
|
|
||||||
atomic_add_z(size_t *p, size_t x)
|
|
||||||
{
|
|
||||||
assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
|
|
||||||
|
|
||||||
#if (LG_SIZEOF_PTR == 3)
|
|
||||||
return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
|
|
||||||
#elif (LG_SIZEOF_PTR == 2)
|
|
||||||
return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE size_t
|
|
||||||
atomic_sub_z(size_t *p, size_t x)
|
|
||||||
{
|
|
||||||
assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
|
|
||||||
|
|
||||||
#if (LG_SIZEOF_PTR == 3)
|
|
||||||
return (size_t)atomic_add_uint64((uint64_t *)p,
|
|
||||||
(uint64_t)-((int64_t)x));
|
|
||||||
#elif (LG_SIZEOF_PTR == 2)
|
|
||||||
return (size_t)atomic_add_uint32((uint32_t *)p,
|
|
||||||
(uint32_t)-((int32_t)x));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE size_t
|
|
||||||
atomic_cas_z(size_t *v, size_t old, size_t _new)
|
|
||||||
{
|
|
||||||
assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
|
|
||||||
|
|
||||||
#if (LG_SIZEOF_PTR == 3)
|
|
||||||
return (size_t)atomic_cas_uint64((uint64_t *)v,
|
|
||||||
(uint64_t)old,
|
|
||||||
(uint64_t)_new);
|
|
||||||
#elif (LG_SIZEOF_PTR == 2)
|
|
||||||
return (size_t)atomic_cas_uint32((uint32_t *)v,
|
|
||||||
(uint32_t)old,
|
|
||||||
(uint32_t)_new);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
/* unsigned operations. */
|
|
||||||
ATOMIC_INLINE unsigned
|
|
||||||
atomic_add_u(unsigned *p, unsigned x)
|
|
||||||
{
|
|
||||||
assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
|
|
||||||
|
|
||||||
#if (LG_SIZEOF_INT == 3)
|
|
||||||
return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
|
|
||||||
#elif (LG_SIZEOF_INT == 2)
|
|
||||||
return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE unsigned
|
|
||||||
atomic_sub_u(unsigned *p, unsigned x)
|
|
||||||
{
|
|
||||||
assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
|
|
||||||
|
|
||||||
#if (LG_SIZEOF_INT == 3)
|
|
||||||
return (unsigned)atomic_add_uint64((uint64_t *)p,
|
|
||||||
(uint64_t)-((int64_t)x));
|
|
||||||
#elif (LG_SIZEOF_INT == 2)
|
|
||||||
return (unsigned)atomic_add_uint32((uint32_t *)p,
|
|
||||||
(uint32_t)-((int32_t)x));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
ATOMIC_INLINE unsigned
|
|
||||||
atomic_cas_u(unsigned *v, unsigned old, unsigned _new)
|
|
||||||
{
|
|
||||||
assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
|
|
||||||
|
|
||||||
#if (LG_SIZEOF_PTR == 3)
|
|
||||||
return (unsigned)atomic_cas_uint64((uint64_t *)v,
|
|
||||||
(uint64_t)old,
|
|
||||||
(uint64_t)_new);
|
|
||||||
#elif (LG_SIZEOF_PTR == 2)
|
|
||||||
return (unsigned)atomic_cas_uint32((uint32_t *)v,
|
|
||||||
(uint32_t)old,
|
|
||||||
(uint32_t)_new);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* __ATOMIC_OPS_H__ */
|
#endif /* __ATOMIC_OPS_H__ */
|
||||||
|
146
intern/atomic/intern/atomic_ops_ext.h
Normal file
146
intern/atomic/intern/atomic_ops_ext.h
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
/*
|
||||||
|
* Original code from jemalloc with this license:
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
|
||||||
|
* Copyright (C) 2009-2013 Facebook, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice(s),
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice(s),
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
|
||||||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||||
|
* EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||||
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* ***** BEGIN GPL LICENSE BLOCK *****
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
*
|
||||||
|
* The Original Code is Copyright (C) 2016 Blender Foundation.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* The Original Code is: adapted from jemalloc.
|
||||||
|
*
|
||||||
|
* ***** END GPL LICENSE BLOCK *****
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __ATOMIC_OPS_EXT_H__
|
||||||
|
#define __ATOMIC_OPS_EXT_H__
|
||||||
|
|
||||||
|
#include "atomic_ops_utils.h"
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
/* size_t operations. */
|
||||||
|
ATOMIC_INLINE size_t atomic_add_z(size_t *p, size_t x)
|
||||||
|
{
|
||||||
|
assert(sizeof(size_t) == LG_SIZEOF_PTR);
|
||||||
|
|
||||||
|
#if (LG_SIZEOF_PTR == 8)
|
||||||
|
return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
|
||||||
|
#elif (LG_SIZEOF_PTR == 4)
|
||||||
|
return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE size_t atomic_sub_z(size_t *p, size_t x)
|
||||||
|
{
|
||||||
|
assert(sizeof(size_t) == LG_SIZEOF_PTR);
|
||||||
|
|
||||||
|
#if (LG_SIZEOF_PTR == 8)
|
||||||
|
return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
|
||||||
|
#elif (LG_SIZEOF_PTR == 4)
|
||||||
|
return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new)
|
||||||
|
{
|
||||||
|
assert(sizeof(size_t) == LG_SIZEOF_PTR);
|
||||||
|
|
||||||
|
#if (LG_SIZEOF_PTR == 8)
|
||||||
|
return (size_t)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
|
||||||
|
#elif (LG_SIZEOF_PTR == 4)
|
||||||
|
return (size_t)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
/* unsigned operations. */
|
||||||
|
ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x)
|
||||||
|
{
|
||||||
|
assert(sizeof(unsigned) == LG_SIZEOF_INT);
|
||||||
|
|
||||||
|
#if (LG_SIZEOF_INT == 8)
|
||||||
|
return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
|
||||||
|
#elif (LG_SIZEOF_INT == 4)
|
||||||
|
return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x)
|
||||||
|
{
|
||||||
|
assert(sizeof(unsigned) == LG_SIZEOF_INT);
|
||||||
|
|
||||||
|
#if (LG_SIZEOF_INT == 8)
|
||||||
|
return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
|
||||||
|
#elif (LG_SIZEOF_INT == 4)
|
||||||
|
return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new)
|
||||||
|
{
|
||||||
|
assert(sizeof(unsigned) == LG_SIZEOF_INT);
|
||||||
|
|
||||||
|
#if (LG_SIZEOF_INT == 8)
|
||||||
|
return (unsigned)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
|
||||||
|
#elif (LG_SIZEOF_INT == 4)
|
||||||
|
return (unsigned)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
/* float operations. */
|
||||||
|
|
||||||
|
ATOMIC_INLINE float atomic_add_fl(float *p, const float x)
|
||||||
|
{
|
||||||
|
assert(sizeof(float) == sizeof(uint32_t));
|
||||||
|
|
||||||
|
float oldval, newval;
|
||||||
|
uint32_t prevval;
|
||||||
|
|
||||||
|
do { /* Note that since collisions are unlikely, loop will nearly always run once. */
|
||||||
|
oldval = *p;
|
||||||
|
newval = oldval + x;
|
||||||
|
prevval = atomic_cas_uint32((uint32_t *)p, *(uint32_t *)(&oldval), *(uint32_t *)(&newval));
|
||||||
|
} while (UNLIKELY(prevval != *(uint32_t *)(&oldval)));
|
||||||
|
|
||||||
|
return newval;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __ATOMIC_OPS_EXT_H__ */
|
102
intern/atomic/intern/atomic_ops_msvc.h
Normal file
102
intern/atomic/intern/atomic_ops_msvc.h
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
/*
|
||||||
|
* Adopted from jemalloc with this license:
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
|
||||||
|
* Copyright (C) 2009-2013 Facebook, Inc. All rights reserved.
|
||||||
|
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice(s),
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice(s),
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
|
||||||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||||
|
* EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||||
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __ATOMIC_OPS_MSVC_H__
|
||||||
|
#define __ATOMIC_OPS_MSVC_H__
|
||||||
|
|
||||||
|
#include "atomic_ops_utils.h"
|
||||||
|
|
||||||
|
#define NOGDI
|
||||||
|
#ifndef NOMINMAX
|
||||||
|
# define NOMINMAX
|
||||||
|
#endif
|
||||||
|
#define WIN32_LEAN_AND_MEAN
|
||||||
|
|
||||||
|
#include <windows.h>
|
||||||
|
#include <intrin.h>
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
/* 64-bit operations. */
|
||||||
|
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
|
||||||
|
ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x)
|
||||||
|
{
|
||||||
|
return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x;
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x)
|
||||||
|
{
|
||||||
|
return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x;
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
|
||||||
|
{
|
||||||
|
return InterlockedCompareExchange64((int64_t *)v, _new, old);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
/* 32-bit operations. */
|
||||||
|
ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x)
|
||||||
|
{
|
||||||
|
return InterlockedExchangeAdd(p, x) + x;
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x)
|
||||||
|
{
|
||||||
|
return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
|
||||||
|
{
|
||||||
|
return InterlockedCompareExchange((long *)v, _new, old);
|
||||||
|
}
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
/* 8-bit operations. */
|
||||||
|
|
||||||
|
#pragma intrinsic(_InterlockedAnd8)
|
||||||
|
ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
|
||||||
|
{
|
||||||
|
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
|
||||||
|
return InterlockedAnd8((char *)p, (char)b);
|
||||||
|
#else
|
||||||
|
return _InterlockedAnd8((char *)p, (char)b);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma intrinsic(_InterlockedOr8)
|
||||||
|
ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
|
||||||
|
{
|
||||||
|
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
|
||||||
|
return InterlockedOr8((char *)p, (char)b);
|
||||||
|
#else
|
||||||
|
return _InterlockedOr8((char *)p, (char)b);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __ATOMIC_OPS_MSVC_H__ */
|
180
intern/atomic/intern/atomic_ops_unix.h
Normal file
180
intern/atomic/intern/atomic_ops_unix.h
Normal file
@@ -0,0 +1,180 @@
|
|||||||
|
/*
|
||||||
|
* Original code from jemalloc with this license:
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
|
||||||
|
* Copyright (C) 2009-2013 Facebook, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice(s),
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice(s),
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
|
||||||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||||
|
* EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||||
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* ***** BEGIN GPL LICENSE BLOCK *****
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
*
|
||||||
|
* The Original Code is Copyright (C) 2016 Blender Foundation.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* The Original Code is: adapted from jemalloc.
|
||||||
|
*
|
||||||
|
* ***** END GPL LICENSE BLOCK *****
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __ATOMIC_OPS_GCC_H__
|
||||||
|
#define __ATOMIC_OPS_GCC_H__
|
||||||
|
|
||||||
|
#include "atomic_ops_utils.h"
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
/* 64-bit operations. */
|
||||||
|
#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
|
||||||
|
# if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
|
||||||
|
ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x)
|
||||||
|
{
|
||||||
|
return __sync_add_and_fetch(p, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x)
|
||||||
|
{
|
||||||
|
return __sync_sub_and_fetch(p, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
|
||||||
|
{
|
||||||
|
return __sync_val_compare_and_swap(v, old, _new);
|
||||||
|
}
|
||||||
|
# elif (defined(__amd64__) || defined(__x86_64__))
|
||||||
|
ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x)
|
||||||
|
{
|
||||||
|
asm volatile (
|
||||||
|
"lock; xaddq %0, %1;"
|
||||||
|
: "+r" (x), "=m" (*p) /* Outputs. */
|
||||||
|
: "m" (*p) /* Inputs. */
|
||||||
|
);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x)
|
||||||
|
{
|
||||||
|
x = (uint64_t)(-(int64_t)x);
|
||||||
|
asm volatile (
|
||||||
|
"lock; xaddq %0, %1;"
|
||||||
|
: "+r" (x), "=m" (*p) /* Outputs. */
|
||||||
|
: "m" (*p) /* Inputs. */
|
||||||
|
);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
|
||||||
|
{
|
||||||
|
uint64_t ret;
|
||||||
|
asm volatile (
|
||||||
|
"lock; cmpxchgq %2,%1"
|
||||||
|
: "=a" (ret), "+m" (*v)
|
||||||
|
: "r" (_new), "0" (old)
|
||||||
|
: "memory");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
# else
|
||||||
|
# error "Missing implementation for 64-bit atomic operations"
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
/* 32-bit operations. */
|
||||||
|
#if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
|
||||||
|
ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x)
|
||||||
|
{
|
||||||
|
return __sync_add_and_fetch(p, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x)
|
||||||
|
{
|
||||||
|
return __sync_sub_and_fetch(p, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
|
||||||
|
{
|
||||||
|
return __sync_val_compare_and_swap(v, old, _new);
|
||||||
|
}
|
||||||
|
#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
|
||||||
|
ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x)
|
||||||
|
{
|
||||||
|
asm volatile (
|
||||||
|
"lock; xaddl %0, %1;"
|
||||||
|
: "+r" (x), "=m" (*p) /* Outputs. */
|
||||||
|
: "m" (*p) /* Inputs. */
|
||||||
|
);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x)
|
||||||
|
{
|
||||||
|
x = (uint32_t)(-(int32_t)x);
|
||||||
|
asm volatile (
|
||||||
|
"lock; xaddl %0, %1;"
|
||||||
|
: "+r" (x), "=m" (*p) /* Outputs. */
|
||||||
|
: "m" (*p) /* Inputs. */
|
||||||
|
);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
|
||||||
|
{
|
||||||
|
uint32_t ret;
|
||||||
|
asm volatile (
|
||||||
|
"lock; cmpxchgl %2,%1"
|
||||||
|
: "=a" (ret), "+m" (*v)
|
||||||
|
: "r" (_new), "0" (old)
|
||||||
|
: "memory");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
# error "Missing implementation for 32-bit atomic operations"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/******************************************************************************/
|
||||||
|
/* 8-bit operations. */
|
||||||
|
#if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_1))
|
||||||
|
ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
|
||||||
|
{
|
||||||
|
return __sync_fetch_and_and(p, b);
|
||||||
|
}
|
||||||
|
ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
|
||||||
|
{
|
||||||
|
return __sync_fetch_and_or(p, b);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
# error "Missing implementation for 8-bit atomic operations"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __ATOMIC_OPS_GCC_H__ */
|
110
intern/atomic/intern/atomic_ops_utils.h
Normal file
110
intern/atomic/intern/atomic_ops_utils.h
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
/*
|
||||||
|
* Original code from jemalloc with this license:
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
|
||||||
|
* Copyright (C) 2009-2013 Facebook, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice(s),
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice(s),
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
|
||||||
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||||
|
* EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||||
|
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* ***** BEGIN GPL LICENSE BLOCK *****
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
*
|
||||||
|
* The Original Code is Copyright (C) 2016 Blender Foundation.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* The Original Code is: adapted from jemalloc.
|
||||||
|
*
|
||||||
|
* ***** END GPL LICENSE BLOCK *****
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __ATOMIC_OPS_UTILS_H__
|
||||||
|
#define __ATOMIC_OPS_UTILS_H__
|
||||||
|
|
||||||
|
/* needed for int types */
|
||||||
|
#include "../../../source/blender/blenlib/BLI_sys_types.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
/* little macro so inline keyword works */
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
# define ATOMIC_INLINE static __forceinline
|
||||||
|
#else
|
||||||
|
# if (defined(__APPLE__) && defined(__ppc__))
|
||||||
|
/* static inline __attribute__ here breaks osx ppc gcc42 build */
|
||||||
|
# define ATOMIC_INLINE static __attribute__((always_inline))
|
||||||
|
# else
|
||||||
|
# define ATOMIC_INLINE static inline __attribute__((always_inline))
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef LIKELY
|
||||||
|
# ifdef __GNUC__
|
||||||
|
# define LIKELY(x) __builtin_expect(!!(x), 1)
|
||||||
|
# define UNLIKELY(x) __builtin_expect(!!(x), 0)
|
||||||
|
# else
|
||||||
|
# define LIKELY(x) (x)
|
||||||
|
# define UNLIKELY(x) (x)
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef UINTPTR_MAX
|
||||||
|
# if (UINTPTR_MAX == 0xFFFFFFFF)
|
||||||
|
# define LG_SIZEOF_PTR 4
|
||||||
|
# elif (UINTPTR_MAX == 0xFFFFFFFFFFFFFFFF)
|
||||||
|
# define LG_SIZEOF_PTR 8
|
||||||
|
# endif
|
||||||
|
#elif defined(__WORDSIZE) /* Fallback for older glibc and cpp */
|
||||||
|
# if (__WORDSIZE == 32)
|
||||||
|
# define LG_SIZEOF_PTR 4
|
||||||
|
# elif (__WORDSIZE == 64)
|
||||||
|
# define LG_SIZEOF_PTR 8
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef LG_SIZEOF_PTR
|
||||||
|
# error "Cannot find pointer size"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (UINT_MAX == 0xFFFFFFFF)
|
||||||
|
# define LG_SIZEOF_INT 4
|
||||||
|
#elif (UINT_MAX == 0xFFFFFFFFFFFFFFFF)
|
||||||
|
# define LG_SIZEOF_INT 8
|
||||||
|
#else
|
||||||
|
# error "Cannot find int size"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* __ATOMIC_OPS_UTILS_H__ */
|
@@ -979,16 +979,7 @@ static void pbvh_update_normals_accum_task_cb(void *userdata, const int n)
|
|||||||
* Not exact equivalent though, since atomicity is only ensured for one component
|
* Not exact equivalent though, since atomicity is only ensured for one component
|
||||||
* of the vector at a time, but here it shall not make any sensible difference. */
|
* of the vector at a time, but here it shall not make any sensible difference. */
|
||||||
for (int k = 3; k--; ) {
|
for (int k = 3; k--; ) {
|
||||||
/* Atomic float addition.
|
atomic_add_fl(&vnors[v][k], fn[k]);
|
||||||
* Note that since collision are unlikely, loop will nearly always run once. */
|
|
||||||
float oldval, newval;
|
|
||||||
uint32_t prevval;
|
|
||||||
do {
|
|
||||||
oldval = vnors[v][k];
|
|
||||||
newval = oldval + fn[k];
|
|
||||||
prevval = atomic_cas_uint32(
|
|
||||||
(uint32_t *)&vnors[v][k], *(uint32_t *)(&oldval), *(uint32_t *)(&newval));
|
|
||||||
} while (UNLIKELY(prevval != *(uint32_t *)(&oldval)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user