A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
// hpp
#pragma once
#include <atomic>
#include <cassert>
#define XSEMAPHORE_TRICKY
struct xsemaphore
{
-a-a-a-axsemaphore( uint32_t initial = 0 ) noexcept;
-a-a-a-axsemaphore( const xsemaphore & ) = delete;
-a-a-a-a~xsemaphore();
-a-a-a-axsemaphore &operator =( const xsemaphore & ) = delete;
-a-a-a-avoid acquire() noexcept;
-a-a-a-avoid release( uint32_t n = 1 ) noexcept;
private:
-a-a-a-astatic constexpr unsigned
-a-a-a-a-a-a-a MASK_BITS = 21,
-a-a-a-a-a-a-a NOTIFY_BASE = MASK_BITS,
-a-a-a-a-a-a-a WAIT_BASE = 2 * MASK_BITS;
-a-a-a-astatic constexpr uint64_t
-a-a-a-a-a-a-a MASK21 = 0x1FFFFF,
-a-a-a-a-a-a-a COUNT_VALUE = 1,
-a-a-a-a-a-a-a NOTIFY_VALUE = 1ull << NOTIFY_BASE,
-a-a-a-a-a-a-a WAIT_VALUE = 1ull << WAIT_BASE,
-a-a-a-a-a-a-a NOTIFY_MASK = MASK21 << NOTIFY_BASE,
-a-a-a-a-a-a-a WAIT_MASK = MASK21 << WAIT_BASE;
-a-a-a-astatic constexpr std::memory_order
-a-a-a-a-a-a-a ACQ = std::memory_order_acquire,
-a-a-a-a-a-a-a REL = std::memory_order_release,
-a-a-a-a-a-a-a RLX = std::memory_order_relaxed;
-a-a-a-astd::atomic_uint64_t m_counters;
};
inline xsemaphore::~xsemaphore()
{
#if defined(XSEMAPHORE_TRICKY)
-a-a-a-aassert(!((m_counters >> WAIT_BASE) & MASK21));
#endif
}
// cpp
#include "xsemaphore.hpp"
#include <algorithm>
using namespace std;
xsemaphore::xsemaphore( uint32_t initial ) noexcept :
-a-a-a-am_counters( [&]-a-a-a { return initial <= MASK21 ? initial : MASK21; }
() )
{
}
void xsemaphore::acquire() noexcept
{
-a-a-a-auint64_t ref = m_counters.load( RLX ), niu;
-a-a-a-afor( ; ; )
-a-a-a-a-a-a-a if( (ref & MASK21) )
-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - COUNT_VALUE, ACQ, RLX ) )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
-a-a-a-a-a-a-a-a-a-a-a else
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a continue;
-a-a-a-a-a-a-a else
-a-a-a-a-a-a-a {
-a-a-a-a-a-a-a-a-a-a-a if( (ref & WAIT_MASK) == WAIT_MASK )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a abort();
-a-a-a-a-a-a-a-a-a-a-a niu = ref + WAIT_VALUE;
-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, niu, RLX, RLX ) )
-a-a-a-a-a-a-a-a-a-a-a {
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a ref = niu;
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a break;
-a-a-a-a-a-a-a-a-a-a-a }
-a-a-a-a-a-a-a }
-a-a-a-afor( ; ; )
-a-a-a-a{
-a-a-a-a-a-a-a while( (ref & NOTIFY_MASK) )
-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - NOTIFY_VALUE, ACQ, RLX ) )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
-a-a-a-a-a-a-a m_counters.wait( ref, RLX );
-a-a-a-a-a-a-a ref = m_counters.load( RLX );
-a-a-a-a}
}
void xsemaphore::release( uint32_t n ) noexcept
{
-a-a-a-aif( !n )
-a-a-a-a-a-a-a return;
-a-a-a-auint64_t ref = m_counters.load( RLX ), niu, notifies;
-a-a-a-aint64_t ahead;
-a-a-a-ado
-a-a-a-a{
-a-a-a-a-a-a-a uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
-a-a-a-a-a-a-a ahead = n - waiters;
-a-a-a-a-a-a-a notifies = ahead >= 0 ? waiters : n;
-a-a-a-a-a-a-a uint64_t beyond = ahead >= 0 ? ahead : 0;
-a-a-a-a-a-a-a if( (ref & MASK21) + beyond > MASK21 )
-a-a-a-a-a-a-a-a-a-a-a abort();
-a-a-a-a-a-a-a niu = ref + beyond;
-a-a-a-a-a-a-a if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
-a-a-a-a-a-a-a-a-a-a-a abort();
-a-a-a-a-a-a-a niu += notifies << NOTIFY_BASE;
-a-a-a-a-a-a-a niu -= notifies << WAIT_BASE;
-a-a-a-a} while( !m_counters.compare_exchange_strong( ref, niu, REL, RLX ) );
-a-a-a-aif( ahead >= 0 )
-a-a-a-a-a-a-a m_counters.notify_all();
-a-a-a-aelse
-a-a-a-a-a-a-a for( ; notifies; m_counters.notify_one(), --notifies );
}
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
// hpp
#pragma once
#include <atomic>
#include <cassert>
#define XSEMAPHORE_TRICKY
struct xsemaphore
{
-a-a-a-a-axsemaphore( uint32_t initial = 0 ) noexcept;
-a-a-a-a-axsemaphore( const xsemaphore & ) = delete;
-a-a-a-a-a~xsemaphore();
-a-a-a-a-axsemaphore &operator =( const xsemaphore & ) = delete;
-a-a-a-a-avoid acquire() noexcept;
-a-a-a-a-avoid release( uint32_t n = 1 ) noexcept;
private:
-a-a-a-a-astatic constexpr unsigned
-a-a-a-a-a-a-a-a MASK_BITS = 21,
-a-a-a-a-a-a-a-a NOTIFY_BASE = MASK_BITS,
-a-a-a-a-a-a-a-a WAIT_BASE = 2 * MASK_BITS;
-a-a-a-a-astatic constexpr uint64_t
-a-a-a-a-a-a-a-a MASK21 = 0x1FFFFF,
-a-a-a-a-a-a-a-a COUNT_VALUE = 1,
-a-a-a-a-a-a-a-a NOTIFY_VALUE = 1ull << NOTIFY_BASE,
-a-a-a-a-a-a-a-a WAIT_VALUE = 1ull << WAIT_BASE,
-a-a-a-a-a-a-a-a NOTIFY_MASK = MASK21 << NOTIFY_BASE,
-a-a-a-a-a-a-a-a WAIT_MASK = MASK21 << WAIT_BASE;
-a-a-a-a-astatic constexpr std::memory_order
-a-a-a-a-a-a-a-a ACQ = std::memory_order_acquire,
-a-a-a-a-a-a-a-a REL = std::memory_order_release,
-a-a-a-a-a-a-a-a RLX = std::memory_order_relaxed;
-a-a-a-a-astd::atomic_uint64_t m_counters;
};
inline xsemaphore::~xsemaphore()
{
#if defined(XSEMAPHORE_TRICKY)
-a-a-a-a-aassert(!((m_counters >> WAIT_BASE) & MASK21));
#endif
}
// cpp
#include "xsemaphore.hpp"
#include <algorithm>
using namespace std;
xsemaphore::xsemaphore( uint32_t initial ) noexcept :
-a-a-a-a-am_counters( [&]-a-a-a { return initial <= MASK21 ? initial :
MASK21; } () )
{
}
void xsemaphore::acquire() noexcept
{
-a-a-a-a-auint64_t ref = m_counters.load( RLX ), niu;
-a-a-a-a-afor( ; ; )
-a-a-a-a-a-a-a-a if( (ref & MASK21) )
-a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - >> COUNT_VALUE, ACQ, RLX ) )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
-a-a-a-a-a-a-a-a-a-a-a-a else
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a continue;
-a-a-a-a-a-a-a-a else
-a-a-a-a-a-a-a-a {
-a-a-a-a-a-a-a-a-a-a-a-a if( (ref & WAIT_MASK) == WAIT_MASK )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a abort();
-a-a-a-a-a-a-a-a-a-a-a-a niu = ref + WAIT_VALUE;
-a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, niu, RLX,
RLX ) )
-a-a-a-a-a-a-a-a-a-a-a-a {
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a ref = niu;
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a break;
-a-a-a-a-a-a-a-a-a-a-a-a }
-a-a-a-a-a-a-a-a }
-a-a-a-a-afor( ; ; )
-a-a-a-a-a{
-a-a-a-a-a-a-a-a while( (ref & NOTIFY_MASK) )
-a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - >> NOTIFY_VALUE, ACQ, RLX ) )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
-a-a-a-a-a-a-a-a m_counters.wait( ref, RLX );
-a-a-a-a-a-a-a-a ref = m_counters.load( RLX );
-a-a-a-a-a}
}
void xsemaphore::release( uint32_t n ) noexcept
{
-a-a-a-a-aif( !n )
-a-a-a-a-a-a-a-a return;
-a-a-a-a-auint64_t ref = m_counters.load( RLX ), niu, notifies;
-a-a-a-a-aint64_t ahead;
-a-a-a-a-ado
-a-a-a-a-a{
-a-a-a-a-a-a-a-a uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
-a-a-a-a-a-a-a-a ahead = n - waiters;
-a-a-a-a-a-a-a-a notifies = ahead >= 0 ? waiters : n;
-a-a-a-a-a-a-a-a uint64_t beyond = ahead >= 0 ? ahead : 0;
-a-a-a-a-a-a-a-a if( (ref & MASK21) + beyond > MASK21 )
-a-a-a-a-a-a-a-a-a-a-a-a abort();
-a-a-a-a-a-a-a-a niu = ref + beyond;
-a-a-a-a-a-a-a-a if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 ) >> -a-a-a-a-a-a-a-a-a-a-a-a abort();
-a-a-a-a-a-a-a-a niu += notifies << NOTIFY_BASE;
-a-a-a-a-a-a-a-a niu -= notifies << WAIT_BASE;
-a-a-a-a-a} while( !m_counters.compare_exchange_strong( ref, niu, REL,
RLX ) );
-a-a-a-a-aif( ahead >= 0 )
-a-a-a-a-a-a-a-a m_counters.notify_all();
-a-a-a-a-aelse
-a-a-a-a-a-a-a-a for( ; notifies; m_counters.notify_one(), --notifies );
}
Again, "using namespace std" is imprecise programming.
Need to look at it when I have some more time. But, I still like the benaphore. Simple, works, elegant. No CAS loops in sight.
Am 24.01.2026 um 03:08 schrieb Chris M. Thomasson:
Need to look at it when I have some more time. But, I still like the
benaphore. Simple, works, elegant. No CAS loops in sight.
A benaphore isn't a semaphnore.
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Am 24.01.2026 um 03:08 schrieb Chris M. Thomasson:
Need to look at it when I have some more time. But, I still like the
benaphore. Simple, works, elegant. No CAS loops in sight.
A benaphore isn't a semaphnore.
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a semaphore. You, idiot? Humm...
A futex'd counting semaphore that doesn't suffer stolen wakeups[...]
(as with most implementations).
On 1/23/2026 10:06 AM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
[...]
Are you trying to stress-test the CPU's branch prediction? All of those loops...
Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a
semaphore. You, idiot? Humm...
A benaphore is a combination of an atomic counter and a semaphore
to have a mutex.
Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:
On 1/23/2026 10:06 AM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
[...]
Are you trying to stress-test the CPU's branch prediction? All of
those loops...
A futex'd semaphore's performance isn't determined by the branch
prediction but by the speed of the cacheline-transfer between the
coress; this could be really slow. And sleeping inside the kernel
and being awakened by an intra processor interrupt is even two
magitudes slower.
On 1/24/2026 11:55 PM, Bonita Montero wrote:
Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a
semaphore. You, idiot? Humm...
A benaphore is a combination of an atomic counter and a semaphore
to have a mutex.
A benaphore is basically atomic accounting using a fast path. The logic
is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on LL/SC
logic, it ruins the loopless factor... Its simple. Your CAS infested
thing is not so simple... I understand it, but wow.
Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
On 1/24/2026 11:55 PM, Bonita Montero wrote:
Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a
semaphore. You, idiot? Humm...
A benaphore is a combination of an atomic counter and a semaphore
to have a mutex.
A benaphore is basically atomic accounting using a fast path. The
logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on
LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
infested thing is not so simple... I understand it, but wow.
I know what a Benaphore is; it is not a semaphore but it has a
semaphore.
On 1/25/2026 9:26 PM, Bonita Montero wrote:
Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
On 1/24/2026 11:55 PM, Bonita Montero wrote:
Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups >>>>>>>> (as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a
semaphore. You, idiot? Humm...
A benaphore is a combination of an atomic counter and a semaphore
to have a mutex.
A benaphore is basically atomic accounting using a fast path. The
logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on
LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
infested thing is not so simple... I understand it, but wow.
I know what a Benaphore is; it is not a semaphore but it has a
semaphore.
A Benaphore is a semaphore.
class fast_semaphore
{
public:
-a-a-a fast_semaphore(int count) noexcept
-a-a-a : m_count(count), m_semaphore(0) {}
-a-a-a void post()
-a-a-a {
-a-a-a-a-a-a-a std::atomic_thread_fence(std::memory_order_release);
-a-a-a-a-a-a-a int count = m_count.fetch_add(1, std::memory_order_relaxed);
-a-a-a-a-a-a-a if (count < 0)
-a-a-a-a-a-a-a-a-a-a-a m_semaphore.post();
-a-a-a }
-a-a-a void wait()
-a-a-a {
-a-a-a-a-a-a-a int count = m_count.fetch_sub(1, std::memory_order_relaxed);
-a-a-a-a-a-a-a if (count < 1)
-a-a-a-a-a-a-a-a-a-a-a m_semaphore.wait();
-a-a-a-a-a-a-a std::atomic_thread_fence(std::memory_order_acquire);
-a-a-a }
private:
-a-a-a std::atomic m_count;
-a-a-a semaphore m_semaphore;
};
Am 27.01.2026 um 08:57 schrieb Chris M. Thomasson:
On 1/25/2026 9:26 PM, Bonita Montero wrote:
Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
On 1/24/2026 11:55 PM, Bonita Montero wrote:
Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups >>>>>>>>> (as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a
semaphore. You, idiot? Humm...
A benaphore is a combination of an atomic counter and a semaphore
to have a mutex.
A benaphore is basically atomic accounting using a fast path. The
logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based
on LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
infested thing is not so simple... I understand it, but wow.
I know what a Benaphore is; it is not a semaphore but it has a
semaphore.
A Benaphore is a semaphore.
A benaphore can allow only one thread to run. A semaphore can trigger
an arbitrary number of threads to run. So they're completely different.
class fast_semaphore
{
public:
-a-a-a-a fast_semaphore(int count) noexcept
-a-a-a-a : m_count(count), m_semaphore(0) {}
-a-a-a-a void post()
-a-a-a-a {
-a-a-a-a-a-a-a-a std::atomic_thread_fence(std::memory_order_release);
-a-a-a-a-a-a-a-a int count = m_count.fetch_add(1, std::memory_order_relaxed);
-a-a-a-a-a-a-a-a if (count < 0)
-a-a-a-a-a-a-a-a-a-a-a-a m_semaphore.post();
-a-a-a-a }
-a-a-a-a void wait()
-a-a-a-a {
-a-a-a-a-a-a-a-a int count = m_count.fetch_sub(1, std::memory_order_relaxed);
-a-a-a-a-a-a-a-a if (count < 1)
-a-a-a-a-a-a-a-a-a-a-a-a m_semaphore.wait();
-a-a-a-a-a-a-a-a std::atomic_thread_fence(std::memory_order_acquire);
-a-a-a-a }
private:
-a-a-a-a std::atomic m_count;
-a-a-a-a semaphore m_semaphore;
};
On 1/23/2026 10:06 AM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
// hpp
#pragma once
#include <atomic>
#include <cassert>
#define XSEMAPHORE_TRICKY
struct xsemaphore
{
-a-a-a-a-axsemaphore( uint32_t initial = 0 ) noexcept;
-a-a-a-a-axsemaphore( const xsemaphore & ) = delete;
-a-a-a-a-a~xsemaphore();
-a-a-a-a-axsemaphore &operator =( const xsemaphore & ) = delete;
-a-a-a-a-avoid acquire() noexcept;
-a-a-a-a-avoid release( uint32_t n = 1 ) noexcept;
private:
-a-a-a-a-astatic constexpr unsigned
-a-a-a-a-a-a-a-a MASK_BITS = 21,
-a-a-a-a-a-a-a-a NOTIFY_BASE = MASK_BITS,
-a-a-a-a-a-a-a-a WAIT_BASE = 2 * MASK_BITS;
-a-a-a-a-astatic constexpr uint64_t
-a-a-a-a-a-a-a-a MASK21 = 0x1FFFFF,
-a-a-a-a-a-a-a-a COUNT_VALUE = 1,
-a-a-a-a-a-a-a-a NOTIFY_VALUE = 1ull << NOTIFY_BASE,
-a-a-a-a-a-a-a-a WAIT_VALUE = 1ull << WAIT_BASE,
-a-a-a-a-a-a-a-a NOTIFY_MASK = MASK21 << NOTIFY_BASE,
-a-a-a-a-a-a-a-a WAIT_MASK = MASK21 << WAIT_BASE;
-a-a-a-a-astatic constexpr std::memory_order
-a-a-a-a-a-a-a-a ACQ = std::memory_order_acquire,
-a-a-a-a-a-a-a-a REL = std::memory_order_release,
-a-a-a-a-a-a-a-a RLX = std::memory_order_relaxed;
-a-a-a-a-astd::atomic_uint64_t m_counters;
};
inline xsemaphore::~xsemaphore()
{
#if defined(XSEMAPHORE_TRICKY)
-a-a-a-a-aassert(!((m_counters >> WAIT_BASE) & MASK21));
#endif
}
// cpp
#include "xsemaphore.hpp"
#include <algorithm>
using namespace std;
xsemaphore::xsemaphore( uint32_t initial ) noexcept :
-a-a-a-a-am_counters( [&]-a-a-a { return initial <= MASK21 ? initial :
MASK21; } () )
{
}
void xsemaphore::acquire() noexcept
{
-a-a-a-a-auint64_t ref = m_counters.load( RLX ), niu;
-a-a-a-a-afor( ; ; )
-a-a-a-a-a-a-a-a if( (ref & MASK21) )
-a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - >> COUNT_VALUE, ACQ, RLX ) )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
-a-a-a-a-a-a-a-a-a-a-a-a else
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a continue;
-a-a-a-a-a-a-a-a else
-a-a-a-a-a-a-a-a {
-a-a-a-a-a-a-a-a-a-a-a-a if( (ref & WAIT_MASK) == WAIT_MASK )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a abort();
Oh shit.
-a-a-a-a-a-a-a-a-a-a-a-a niu = ref + WAIT_VALUE;
-a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, niu, RLX,
RLX ) )
-a-a-a-a-a-a-a-a-a-a-a-a {
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a ref = niu;
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a break;
-a-a-a-a-a-a-a-a-a-a-a-a }
-a-a-a-a-a-a-a-a }
-a-a-a-a-afor( ; ; )
-a-a-a-a-a{
-a-a-a-a-a-a-a-a while( (ref & NOTIFY_MASK) )
-a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - >> NOTIFY_VALUE, ACQ, RLX ) )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
-a-a-a-a-a-a-a-a m_counters.wait( ref, RLX );
-a-a-a-a-a-a-a-a ref = m_counters.load( RLX );
-a-a-a-a-a}
}
void xsemaphore::release( uint32_t n ) noexcept
{
-a-a-a-a-aif( !n )
-a-a-a-a-a-a-a-a return;
-a-a-a-a-auint64_t ref = m_counters.load( RLX ), niu, notifies;
-a-a-a-a-aint64_t ahead;
-a-a-a-a-ado
-a-a-a-a-a{
-a-a-a-a-a-a-a-a uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
-a-a-a-a-a-a-a-a ahead = n - waiters;
-a-a-a-a-a-a-a-a notifies = ahead >= 0 ? waiters : n;
-a-a-a-a-a-a-a-a uint64_t beyond = ahead >= 0 ? ahead : 0;
-a-a-a-a-a-a-a-a if( (ref & MASK21) + beyond > MASK21 )
-a-a-a-a-a-a-a-a-a-a-a-a abort();
^^^^^^^^^^^^^^^^
Gotta love the abort here... ;^o
-a-a-a-a-a-a-a-a niu = ref + beyond;
-a-a-a-a-a-a-a-a if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 ) >> -a-a-a-a-a-a-a-a-a-a-a-a abort();
-a-a-a-a-a-a-a-a niu += notifies << NOTIFY_BASE;
-a-a-a-a-a-a-a-a niu -= notifies << WAIT_BASE;
-a-a-a-a-a} while( !m_counters.compare_exchange_strong( ref, niu, REL,
RLX ) );
-a-a-a-a-aif( ahead >= 0 )
-a-a-a-a-a-a-a-a m_counters.notify_all();
-a-a-a-a-aelse
-a-a-a-a-a-a-a-a for( ; notifies; m_counters.notify_one(), --notifies );
}
Sigh.
I think you might be missing the forest for the trees?
Am 25.01.2026 um 23:39 schrieb Chris M. Thomasson:
On 1/25/2026 1:17 AM, Bonita Montero wrote:
Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:
On 1/23/2026 10:06 AM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
[...]
Are you trying to stress-test the CPU's branch prediction? All of
those loops...
A futex'd semaphore's performance isn't determined by the branch
prediction but by the speed of the cacheline-transfer between the
coress; this could be really slow. And sleeping inside the kernel
and being awakened by an intra processor interrupt is even two
magitudes slower.
I know how the futex works. Your loop here is interesting to me:
for( ; notifies; m_counters.notify_one(), --notifies );
This when there are less notfifies than thee are waiting threads.
Am 27.01.2026 um 22:21 schrieb Chris M. Thomasson:
On 1/23/2026 10:06 AM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
// hpp
#pragma once
#include <atomic>
#include <cassert>
#define XSEMAPHORE_TRICKY
struct xsemaphore
{
-a-a-a-a-axsemaphore( uint32_t initial = 0 ) noexcept;
-a-a-a-a-axsemaphore( const xsemaphore & ) = delete;
-a-a-a-a-a~xsemaphore();
-a-a-a-a-axsemaphore &operator =( const xsemaphore & ) = delete;
-a-a-a-a-avoid acquire() noexcept;
-a-a-a-a-avoid release( uint32_t n = 1 ) noexcept;
private:
-a-a-a-a-astatic constexpr unsigned
-a-a-a-a-a-a-a-a MASK_BITS = 21,
-a-a-a-a-a-a-a-a NOTIFY_BASE = MASK_BITS,
-a-a-a-a-a-a-a-a WAIT_BASE = 2 * MASK_BITS;
-a-a-a-a-astatic constexpr uint64_t
-a-a-a-a-a-a-a-a MASK21 = 0x1FFFFF,
-a-a-a-a-a-a-a-a COUNT_VALUE = 1,
-a-a-a-a-a-a-a-a NOTIFY_VALUE = 1ull << NOTIFY_BASE,
-a-a-a-a-a-a-a-a WAIT_VALUE = 1ull << WAIT_BASE,
-a-a-a-a-a-a-a-a NOTIFY_MASK = MASK21 << NOTIFY_BASE,
-a-a-a-a-a-a-a-a WAIT_MASK = MASK21 << WAIT_BASE;
-a-a-a-a-astatic constexpr std::memory_order
-a-a-a-a-a-a-a-a ACQ = std::memory_order_acquire,
-a-a-a-a-a-a-a-a REL = std::memory_order_release,
-a-a-a-a-a-a-a-a RLX = std::memory_order_relaxed;
-a-a-a-a-astd::atomic_uint64_t m_counters;
};
inline xsemaphore::~xsemaphore()
{
#if defined(XSEMAPHORE_TRICKY)
-a-a-a-a-aassert(!((m_counters >> WAIT_BASE) & MASK21));
#endif
}
// cpp
#include "xsemaphore.hpp"
#include <algorithm>
using namespace std;
xsemaphore::xsemaphore( uint32_t initial ) noexcept :
-a-a-a-a-am_counters( [&]-a-a-a { return initial <= MASK21 ? initial :
MASK21; } () )
{
}
void xsemaphore::acquire() noexcept
{
-a-a-a-a-auint64_t ref = m_counters.load( RLX ), niu;
-a-a-a-a-afor( ; ; )
-a-a-a-a-a-a-a-a if( (ref & MASK21) )
-a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref -
COUNT_VALUE, ACQ, RLX ) )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
-a-a-a-a-a-a-a-a-a-a-a-a else
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a continue;
-a-a-a-a-a-a-a-a else
-a-a-a-a-a-a-a-a {
-a-a-a-a-a-a-a-a-a-a-a-a if( (ref & WAIT_MASK) == WAIT_MASK )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a abort();
Oh shit.
-a-a-a-a-a-a-a-a-a-a-a-a niu = ref + WAIT_VALUE;
-a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, niu, RLX,
RLX ) )
-a-a-a-a-a-a-a-a-a-a-a-a {
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a ref = niu;
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a break;
-a-a-a-a-a-a-a-a-a-a-a-a }
-a-a-a-a-a-a-a-a }
-a-a-a-a-afor( ; ; )
-a-a-a-a-a{
-a-a-a-a-a-a-a-a while( (ref & NOTIFY_MASK) )
-a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref -
NOTIFY_VALUE, ACQ, RLX ) )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
-a-a-a-a-a-a-a-a m_counters.wait( ref, RLX );
-a-a-a-a-a-a-a-a ref = m_counters.load( RLX );
-a-a-a-a-a}
}
void xsemaphore::release( uint32_t n ) noexcept
{
-a-a-a-a-aif( !n )
-a-a-a-a-a-a-a-a return;
-a-a-a-a-auint64_t ref = m_counters.load( RLX ), niu, notifies;
-a-a-a-a-aint64_t ahead;
-a-a-a-a-ado
-a-a-a-a-a{
-a-a-a-a-a-a-a-a uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
-a-a-a-a-a-a-a-a ahead = n - waiters;
-a-a-a-a-a-a-a-a notifies = ahead >= 0 ? waiters : n;
-a-a-a-a-a-a-a-a uint64_t beyond = ahead >= 0 ? ahead : 0;
-a-a-a-a-a-a-a-a if( (ref & MASK21) + beyond > MASK21 )
-a-a-a-a-a-a-a-a-a-a-a-a abort();
^^^^^^^^^^^^^^^^
Gotta love the abort here... ;^o
It's ab abort beyond 2 ^ 21 threads.
-a-a-a-a-a-a-a-a niu = ref + beyond;
-a-a-a-a-a-a-a-a if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 ) >>> -a-a-a-a-a-a-a-a-a-a-a-a abort();
-a-a-a-a-a-a-a-a niu += notifies << NOTIFY_BASE;
-a-a-a-a-a-a-a-a niu -= notifies << WAIT_BASE;
-a-a-a-a-a} while( !m_counters.compare_exchange_strong( ref, niu, REL,
RLX ) );
-a-a-a-a-aif( ahead >= 0 )
-a-a-a-a-a-a-a-a m_counters.notify_all();
-a-a-a-a-aelse
-a-a-a-a-a-a-a-a for( ; notifies; m_counters.notify_one(), --notifies ); >>> }
Sigh.
Am 27.01.2026 um 22:15 schrieb Chris M. Thomasson:
I think you might be missing the forest for the trees?
Calling a benaphore a semaphore is not precise as necessary.
Am 27.01.2026 um 08:57 schrieb Chris M. Thomasson:
On 1/25/2026 9:26 PM, Bonita Montero wrote:
Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
On 1/24/2026 11:55 PM, Bonita Montero wrote:
Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups >>>>>>>>> (as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a
semaphore. You, idiot? Humm...
A benaphore is a combination of an atomic counter and a semaphore
to have a mutex.
A benaphore is basically atomic accounting using a fast path. The
logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based
on LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
infested thing is not so simple... I understand it, but wow.
I know what a Benaphore is; it is not a semaphore but it has a
semaphore.
A Benaphore is a semaphore.
A benaphore can allow only one thread to run. A semaphore can trigger
an arbitrary number of threads to run. So they're completely different.
class fast_semaphore
{
public:
-a-a-a-a fast_semaphore(int count) noexcept
-a-a-a-a : m_count(count), m_semaphore(0) {}
-a-a-a-a void post()
-a-a-a-a {
-a-a-a-a-a-a-a-a std::atomic_thread_fence(std::memory_order_release);
-a-a-a-a-a-a-a-a int count = m_count.fetch_add(1, std::memory_order_relaxed);
-a-a-a-a-a-a-a-a if (count < 0)
-a-a-a-a-a-a-a-a-a-a-a-a m_semaphore.post();
-a-a-a-a }
-a-a-a-a void wait()
-a-a-a-a {
-a-a-a-a-a-a-a-a int count = m_count.fetch_sub(1, std::memory_order_relaxed);
-a-a-a-a-a-a-a-a if (count < 1)
-a-a-a-a-a-a-a-a-a-a-a-a m_semaphore.wait();
-a-a-a-a-a-a-a-a std::atomic_thread_fence(std::memory_order_acquire);
-a-a-a-a }
private:
-a-a-a-a std::atomic m_count;
-a-a-a-a semaphore m_semaphore;
};
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
// hpp
#pragma once
#include <atomic>
#include <cassert>
#define XSEMAPHORE_TRICKY
struct xsemaphore
{
-a-a-a-axsemaphore( uint32_t initial = 0 ) noexcept;
-a-a-a-axsemaphore( const xsemaphore & ) = delete;
-a-a-a-a~xsemaphore();
-a-a-a-axsemaphore &operator =( const xsemaphore & ) = delete;
-a-a-a-avoid acquire() noexcept;
-a-a-a-avoid release( uint32_t n = 1 ) noexcept;
private:
-a-a-a-astatic constexpr unsigned
-a-a-a-a-a-a-a MASK_BITS = 21,
-a-a-a-a-a-a-a NOTIFY_BASE = MASK_BITS,
-a-a-a-a-a-a-a WAIT_BASE = 2 * MASK_BITS;
-a-a-a-astatic constexpr uint64_t
-a-a-a-a-a-a-a MASK21 = 0x1FFFFF,
-a-a-a-a-a-a-a COUNT_VALUE = 1,
-a-a-a-a-a-a-a NOTIFY_VALUE = 1ull << NOTIFY_BASE,
-a-a-a-a-a-a-a WAIT_VALUE = 1ull << WAIT_BASE,
-a-a-a-a-a-a-a NOTIFY_MASK = MASK21 << NOTIFY_BASE,
-a-a-a-a-a-a-a WAIT_MASK = MASK21 << WAIT_BASE;
-a-a-a-astatic constexpr std::memory_order
-a-a-a-a-a-a-a ACQ = std::memory_order_acquire,
-a-a-a-a-a-a-a REL = std::memory_order_release,
-a-a-a-a-a-a-a RLX = std::memory_order_relaxed;
-a-a-a-astd::atomic_uint64_t m_counters;
};
inline xsemaphore::~xsemaphore()
{
#if defined(XSEMAPHORE_TRICKY)
-a-a-a-aassert(!((m_counters >> WAIT_BASE) & MASK21));
#endif
}
// cpp
#include "xsemaphore.hpp"
#include <algorithm>
using namespace std;
xsemaphore::xsemaphore( uint32_t initial ) noexcept :
-a-a-a-am_counters( [&]-a-a-a { return initial <= MASK21 ? initial : MASK21; }
() )
{
}
void xsemaphore::acquire() noexcept
{
-a-a-a-auint64_t ref = m_counters.load( RLX ), niu;
-a-a-a-afor( ; ; )
-a-a-a-a-a-a-a if( (ref & MASK21) )
-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - COUNT_VALUE, ACQ, RLX ) )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
-a-a-a-a-a-a-a-a-a-a-a else
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a continue;
-a-a-a-a-a-a-a else
-a-a-a-a-a-a-a {
-a-a-a-a-a-a-a-a-a-a-a if( (ref & WAIT_MASK) == WAIT_MASK )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a abort();
-a-a-a-a-a-a-a-a-a-a-a niu = ref + WAIT_VALUE;
-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, niu, RLX, RLX ) )
-a-a-a-a-a-a-a-a-a-a-a {
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a ref = niu;
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a break;
-a-a-a-a-a-a-a-a-a-a-a }
-a-a-a-a-a-a-a }
-a-a-a-afor( ; ; )
-a-a-a-a{
-a-a-a-a-a-a-a while( (ref & NOTIFY_MASK) )
-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - NOTIFY_VALUE, ACQ, RLX ) )
-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
-a-a-a-a-a-a-a m_counters.wait( ref, RLX );
-a-a-a-a-a-a-a ref = m_counters.load( RLX );
-a-a-a-a}
}
void xsemaphore::release( uint32_t n ) noexcept
{
-a-a-a-aif( !n )
-a-a-a-a-a-a-a return;
-a-a-a-auint64_t ref = m_counters.load( RLX ), niu, notifies;
-a-a-a-aint64_t ahead;
-a-a-a-ado
-a-a-a-a{
-a-a-a-a-a-a-a uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
-a-a-a-a-a-a-a ahead = n - waiters;
-a-a-a-a-a-a-a notifies = ahead >= 0 ? waiters : n;
-a-a-a-a-a-a-a uint64_t beyond = ahead >= 0 ? ahead : 0;
-a-a-a-a-a-a-a if( (ref & MASK21) + beyond > MASK21 )
-a-a-a-a-a-a-a-a-a-a-a abort();
-a-a-a-a-a-a-a niu = ref + beyond;
-a-a-a-a-a-a-a if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
-a-a-a-a-a-a-a-a-a-a-a abort();
-a-a-a-a-a-a-a niu += notifies << NOTIFY_BASE;
-a-a-a-a-a-a-a niu -= notifies << WAIT_BASE;
-a-a-a-a} while( !m_counters.compare_exchange_strong( ref, niu, REL, RLX ) );
-a-a-a-aif( ahead >= 0 )
-a-a-a-a-a-a-a m_counters.notify_all();
-a-a-a-aelse
-a-a-a-a-a-a-a for( ; notifies; m_counters.notify_one(), --notifies );
}
On 1/25/2026 1:17 AM, Bonita Montero wrote:
Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:
On 1/23/2026 10:06 AM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
[...]
Are you trying to stress-test the CPU's branch prediction? All of
those loops...
A futex'd semaphore's performance isn't determined by the branch
prediction but by the speed of the cacheline-transfer between the
coress; this could be really slow. And sleeping inside the kernel
and being awakened by an intra processor interrupt is even two
magitudes slower.
I know how the futex works. Your loop here is interesting to me:
for( ; notifies; m_counters.notify_one(), --notifies );
| Sysop: | Amessyroom |
|---|---|
| Location: | Fayetteville, NC |
| Users: | 59 |
| Nodes: | 6 (1 / 5) |
| Uptime: | 16:03:06 |
| Calls: | 810 |
| Calls today: | 1 |
| Files: | 1,287 |
| D/L today: |
10 files (21,017K bytes) |
| Messages: | 193,341 |