• ChatGPT calls my code "subtile"

    From Bonita Montero@Bonita.Montero@gmail.com to comp.lang.c++ on Fri Jan 23 19:06:21 2026
    From Newsgroup: comp.lang.c++

    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    // hpp

    #pragma once
    #include <atomic>
    #include <cassert>

    #define XSEMAPHORE_TRICKY

    struct xsemaphore
    {
    xsemaphore( uint32_t initial = 0 ) noexcept;
    xsemaphore( const xsemaphore & ) = delete;
    ~xsemaphore();
    xsemaphore &operator =( const xsemaphore & ) = delete;
    void acquire() noexcept;
    void release( uint32_t n = 1 ) noexcept;
    private:
    static constexpr unsigned
    MASK_BITS = 21,
    NOTIFY_BASE = MASK_BITS,
    WAIT_BASE = 2 * MASK_BITS;
    static constexpr uint64_t
    MASK21 = 0x1FFFFF,
    COUNT_VALUE = 1,
    NOTIFY_VALUE = 1ull << NOTIFY_BASE,
    WAIT_VALUE = 1ull << WAIT_BASE,
    NOTIFY_MASK = MASK21 << NOTIFY_BASE,
    WAIT_MASK = MASK21 << WAIT_BASE;
    static constexpr std::memory_order
    ACQ = std::memory_order_acquire,
    REL = std::memory_order_release,
    RLX = std::memory_order_relaxed;
    std::atomic_uint64_t m_counters;
    };

    inline xsemaphore::~xsemaphore()
    {
    #if defined(XSEMAPHORE_TRICKY)
    assert(!((m_counters >> WAIT_BASE) & MASK21));
    #endif
    }

    // cpp

    #include "xsemaphore.hpp"
    #include <algorithm>

    using namespace std;

    xsemaphore::xsemaphore( uint32_t initial ) noexcept :
    m_counters( [&] { return initial <= MASK21 ? initial : MASK21; }() )
    {
    }

    void xsemaphore::acquire() noexcept
    {
    uint64_t ref = m_counters.load( RLX ), niu;
    for( ; ; )
    if( (ref & MASK21) )
    if( m_counters.compare_exchange_strong( ref, ref - COUNT_VALUE, ACQ,
    RLX ) )
    return;
    else
    continue;
    else
    {
    if( (ref & WAIT_MASK) == WAIT_MASK )
    abort();
    niu = ref + WAIT_VALUE;
    if( m_counters.compare_exchange_strong( ref, niu, RLX, RLX ) )
    {
    ref = niu;
    break;
    }
    }
    for( ; ; )
    {
    while( (ref & NOTIFY_MASK) )
    if( m_counters.compare_exchange_strong( ref, ref - NOTIFY_VALUE, ACQ,
    RLX ) )
    return;
    m_counters.wait( ref, RLX );
    ref = m_counters.load( RLX );
    }
    }

    void xsemaphore::release( uint32_t n ) noexcept
    {
    if( !n )
    return;
    uint64_t ref = m_counters.load( RLX ), niu, notifies;
    int64_t ahead;
    do
    {
    uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
    ahead = n - waiters;
    notifies = ahead >= 0 ? waiters : n;
    uint64_t beyond = ahead >= 0 ? ahead : 0;
    if( (ref & MASK21) + beyond > MASK21 )
    abort();
    niu = ref + beyond;
    if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
    abort();
    niu += notifies << NOTIFY_BASE;
    niu -= notifies << WAIT_BASE;
    } while( !m_counters.compare_exchange_strong( ref, niu, REL, RLX ) );
    if( ahead >= 0 )
    m_counters.notify_all();
    else
    for( ; notifies; m_counters.notify_one(), --notifies );
    }

    --- Synchronet 3.21a-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Fri Jan 23 18:08:58 2026
    From Newsgroup: comp.lang.c++

    On 1/23/2026 10:06 AM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).


    [...]

    Need to look at it when I have some more time. But, I still like the benaphore. Simple, works, elegant. No CAS loops in sight.
    --- Synchronet 3.21a-Linux NewsLink 1.2
  • From Lynn McGuire@lynnmcguire5@gmail.com to comp.lang.c++ on Fri Jan 23 20:53:52 2026
    From Newsgroup: comp.lang.c++

    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    // hpp

    #pragma once
    #include <atomic>
    #include <cassert>

    #define XSEMAPHORE_TRICKY

    struct xsemaphore
    {
    -a-a-a-axsemaphore( uint32_t initial = 0 ) noexcept;
    -a-a-a-axsemaphore( const xsemaphore & ) = delete;
    -a-a-a-a~xsemaphore();
    -a-a-a-axsemaphore &operator =( const xsemaphore & ) = delete;
    -a-a-a-avoid acquire() noexcept;
    -a-a-a-avoid release( uint32_t n = 1 ) noexcept;
    private:
    -a-a-a-astatic constexpr unsigned
    -a-a-a-a-a-a-a MASK_BITS = 21,
    -a-a-a-a-a-a-a NOTIFY_BASE = MASK_BITS,
    -a-a-a-a-a-a-a WAIT_BASE = 2 * MASK_BITS;
    -a-a-a-astatic constexpr uint64_t
    -a-a-a-a-a-a-a MASK21 = 0x1FFFFF,
    -a-a-a-a-a-a-a COUNT_VALUE = 1,
    -a-a-a-a-a-a-a NOTIFY_VALUE = 1ull << NOTIFY_BASE,
    -a-a-a-a-a-a-a WAIT_VALUE = 1ull << WAIT_BASE,
    -a-a-a-a-a-a-a NOTIFY_MASK = MASK21 << NOTIFY_BASE,
    -a-a-a-a-a-a-a WAIT_MASK = MASK21 << WAIT_BASE;
    -a-a-a-astatic constexpr std::memory_order
    -a-a-a-a-a-a-a ACQ = std::memory_order_acquire,
    -a-a-a-a-a-a-a REL = std::memory_order_release,
    -a-a-a-a-a-a-a RLX = std::memory_order_relaxed;
    -a-a-a-astd::atomic_uint64_t m_counters;
    };

    inline xsemaphore::~xsemaphore()
    {
    #if defined(XSEMAPHORE_TRICKY)
    -a-a-a-aassert(!((m_counters >> WAIT_BASE) & MASK21));
    #endif
    }

    // cpp

    #include "xsemaphore.hpp"
    #include <algorithm>

    using namespace std;

    xsemaphore::xsemaphore( uint32_t initial ) noexcept :
    -a-a-a-am_counters( [&]-a-a-a { return initial <= MASK21 ? initial : MASK21; }
    () )
    {
    }

    void xsemaphore::acquire() noexcept
    {
    -a-a-a-auint64_t ref = m_counters.load( RLX ), niu;
    -a-a-a-afor( ; ; )
    -a-a-a-a-a-a-a if( (ref & MASK21) )
    -a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - COUNT_VALUE, ACQ, RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
    -a-a-a-a-a-a-a-a-a-a-a else
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a continue;
    -a-a-a-a-a-a-a else
    -a-a-a-a-a-a-a {
    -a-a-a-a-a-a-a-a-a-a-a if( (ref & WAIT_MASK) == WAIT_MASK )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a abort();
    -a-a-a-a-a-a-a-a-a-a-a niu = ref + WAIT_VALUE;
    -a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, niu, RLX, RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a {
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a ref = niu;
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a break;
    -a-a-a-a-a-a-a-a-a-a-a }
    -a-a-a-a-a-a-a }
    -a-a-a-afor( ; ; )
    -a-a-a-a{
    -a-a-a-a-a-a-a while( (ref & NOTIFY_MASK) )
    -a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - NOTIFY_VALUE, ACQ, RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
    -a-a-a-a-a-a-a m_counters.wait( ref, RLX );
    -a-a-a-a-a-a-a ref = m_counters.load( RLX );
    -a-a-a-a}
    }

    void xsemaphore::release( uint32_t n ) noexcept
    {
    -a-a-a-aif( !n )
    -a-a-a-a-a-a-a return;
    -a-a-a-auint64_t ref = m_counters.load( RLX ), niu, notifies;
    -a-a-a-aint64_t ahead;
    -a-a-a-ado
    -a-a-a-a{
    -a-a-a-a-a-a-a uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
    -a-a-a-a-a-a-a ahead = n - waiters;
    -a-a-a-a-a-a-a notifies = ahead >= 0 ? waiters : n;
    -a-a-a-a-a-a-a uint64_t beyond = ahead >= 0 ? ahead : 0;
    -a-a-a-a-a-a-a if( (ref & MASK21) + beyond > MASK21 )
    -a-a-a-a-a-a-a-a-a-a-a abort();
    -a-a-a-a-a-a-a niu = ref + beyond;
    -a-a-a-a-a-a-a if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
    -a-a-a-a-a-a-a-a-a-a-a abort();
    -a-a-a-a-a-a-a niu += notifies << NOTIFY_BASE;
    -a-a-a-a-a-a-a niu -= notifies << WAIT_BASE;
    -a-a-a-a} while( !m_counters.compare_exchange_strong( ref, niu, REL, RLX ) );
    -a-a-a-aif( ahead >= 0 )
    -a-a-a-a-a-a-a m_counters.notify_all();
    -a-a-a-aelse
    -a-a-a-a-a-a-a for( ; notifies; m_counters.notify_one(), --notifies );
    }

    Again, "using namespace std" is imprecise programming.

    And an amazing lack of comments.

    Lynn

    --- Synchronet 3.21a-Linux NewsLink 1.2
  • From Bonita Montero@Bonita.Montero@gmail.com to comp.lang.c++ on Sat Jan 24 08:19:10 2026
    From Newsgroup: comp.lang.c++

    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    // hpp

    #pragma once
    #include <atomic>
    #include <cassert>

    #define XSEMAPHORE_TRICKY

    struct xsemaphore
    {
    -a-a-a-a-axsemaphore( uint32_t initial = 0 ) noexcept;
    -a-a-a-a-axsemaphore( const xsemaphore & ) = delete;
    -a-a-a-a-a~xsemaphore();
    -a-a-a-a-axsemaphore &operator =( const xsemaphore & ) = delete;
    -a-a-a-a-avoid acquire() noexcept;
    -a-a-a-a-avoid release( uint32_t n = 1 ) noexcept;
    private:
    -a-a-a-a-astatic constexpr unsigned
    -a-a-a-a-a-a-a-a MASK_BITS = 21,
    -a-a-a-a-a-a-a-a NOTIFY_BASE = MASK_BITS,
    -a-a-a-a-a-a-a-a WAIT_BASE = 2 * MASK_BITS;
    -a-a-a-a-astatic constexpr uint64_t
    -a-a-a-a-a-a-a-a MASK21 = 0x1FFFFF,
    -a-a-a-a-a-a-a-a COUNT_VALUE = 1,
    -a-a-a-a-a-a-a-a NOTIFY_VALUE = 1ull << NOTIFY_BASE,
    -a-a-a-a-a-a-a-a WAIT_VALUE = 1ull << WAIT_BASE,
    -a-a-a-a-a-a-a-a NOTIFY_MASK = MASK21 << NOTIFY_BASE,
    -a-a-a-a-a-a-a-a WAIT_MASK = MASK21 << WAIT_BASE;
    -a-a-a-a-astatic constexpr std::memory_order
    -a-a-a-a-a-a-a-a ACQ = std::memory_order_acquire,
    -a-a-a-a-a-a-a-a REL = std::memory_order_release,
    -a-a-a-a-a-a-a-a RLX = std::memory_order_relaxed;
    -a-a-a-a-astd::atomic_uint64_t m_counters;
    };

    inline xsemaphore::~xsemaphore()
    {
    #if defined(XSEMAPHORE_TRICKY)
    -a-a-a-a-aassert(!((m_counters >> WAIT_BASE) & MASK21));
    #endif
    }

    // cpp

    #include "xsemaphore.hpp"
    #include <algorithm>

    using namespace std;

    xsemaphore::xsemaphore( uint32_t initial ) noexcept :
    -a-a-a-a-am_counters( [&]-a-a-a { return initial <= MASK21 ? initial :
    MASK21; } () )
    {
    }

    void xsemaphore::acquire() noexcept
    {
    -a-a-a-a-auint64_t ref = m_counters.load( RLX ), niu;
    -a-a-a-a-afor( ; ; )
    -a-a-a-a-a-a-a-a if( (ref & MASK21) )
    -a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - >> COUNT_VALUE, ACQ, RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
    -a-a-a-a-a-a-a-a-a-a-a-a else
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a continue;
    -a-a-a-a-a-a-a-a else
    -a-a-a-a-a-a-a-a {
    -a-a-a-a-a-a-a-a-a-a-a-a if( (ref & WAIT_MASK) == WAIT_MASK )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a abort();
    -a-a-a-a-a-a-a-a-a-a-a-a niu = ref + WAIT_VALUE;
    -a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, niu, RLX,
    RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a-a {
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a ref = niu;
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a break;
    -a-a-a-a-a-a-a-a-a-a-a-a }
    -a-a-a-a-a-a-a-a }
    -a-a-a-a-afor( ; ; )
    -a-a-a-a-a{
    -a-a-a-a-a-a-a-a while( (ref & NOTIFY_MASK) )
    -a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - >> NOTIFY_VALUE, ACQ, RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
    -a-a-a-a-a-a-a-a m_counters.wait( ref, RLX );
    -a-a-a-a-a-a-a-a ref = m_counters.load( RLX );
    -a-a-a-a-a}
    }

    void xsemaphore::release( uint32_t n ) noexcept
    {
    -a-a-a-a-aif( !n )
    -a-a-a-a-a-a-a-a return;
    -a-a-a-a-auint64_t ref = m_counters.load( RLX ), niu, notifies;
    -a-a-a-a-aint64_t ahead;
    -a-a-a-a-ado
    -a-a-a-a-a{
    -a-a-a-a-a-a-a-a uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
    -a-a-a-a-a-a-a-a ahead = n - waiters;
    -a-a-a-a-a-a-a-a notifies = ahead >= 0 ? waiters : n;
    -a-a-a-a-a-a-a-a uint64_t beyond = ahead >= 0 ? ahead : 0;
    -a-a-a-a-a-a-a-a if( (ref & MASK21) + beyond > MASK21 )
    -a-a-a-a-a-a-a-a-a-a-a-a abort();
    -a-a-a-a-a-a-a-a niu = ref + beyond;
    -a-a-a-a-a-a-a-a if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 ) >> -a-a-a-a-a-a-a-a-a-a-a-a abort();
    -a-a-a-a-a-a-a-a niu += notifies << NOTIFY_BASE;
    -a-a-a-a-a-a-a-a niu -= notifies << WAIT_BASE;
    -a-a-a-a-a} while( !m_counters.compare_exchange_strong( ref, niu, REL,
    RLX ) );
    -a-a-a-a-aif( ahead >= 0 )
    -a-a-a-a-a-a-a-a m_counters.notify_all();
    -a-a-a-a-aelse
    -a-a-a-a-a-a-a-a for( ; notifies; m_counters.notify_one(), --notifies );
    }

    Again, "using namespace std" is imprecise programming.

    Idiot.
    --- Synchronet 3.21a-Linux NewsLink 1.2
  • From Bonita Montero@Bonita.Montero@gmail.com to comp.lang.c++ on Sat Jan 24 08:19:45 2026
    From Newsgroup: comp.lang.c++

    Am 24.01.2026 um 03:08 schrieb Chris M. Thomasson:

    Need to look at it when I have some more time. But, I still like the benaphore. Simple, works, elegant. No CAS loops in sight.

    A benaphore isn't a semaphnore.

    --- Synchronet 3.21a-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Sat Jan 24 13:35:35 2026
    From Newsgroup: comp.lang.c++

    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:08 schrieb Chris M. Thomasson:

    Need to look at it when I have some more time. But, I still like the
    benaphore. Simple, works, elegant. No CAS loops in sight.

    A benaphore isn't a semaphnore.

    Are you sure about that? Have you even looked at it?
    --- Synchronet 3.21a-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Sat Jan 24 13:36:32 2026
    From Newsgroup: comp.lang.c++

    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a semaphore.
    You, idiot? Humm...
    --- Synchronet 3.21a-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Sat Jan 24 23:29:41 2026
    From Newsgroup: comp.lang.c++

    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:08 schrieb Chris M. Thomasson:

    Need to look at it when I have some more time. But, I still like the
    benaphore. Simple, works, elegant. No CAS loops in sight.

    A benaphore isn't a semaphnore.


    Fwiw, here is a version from a while back:

    https://vorbrodt.blog/2019/02/05/fast-semaphore/

    Now for the slow path, a slow semaphore would work fine. Use a platform semaphore (sem_t, HANDLE, etc.) in the slow path.
    --- Synchronet 3.21a-Linux NewsLink 1.2
  • From Bonita Montero@Bonita.Montero@gmail.com to comp.lang.c++ on Sun Jan 25 08:55:34 2026
    From Newsgroup: comp.lang.c++

    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.
    --- Synchronet 3.21a-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Sat Jan 24 23:59:27 2026
    From Newsgroup: comp.lang.c++

    On 1/23/2026 10:06 AM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).
    [...]

    Are you trying to stress-test the CPU's branch prediction? All of those loops...
    --- Synchronet 3.21a-Linux NewsLink 1.2
  • From Bonita Montero@Bonita.Montero@gmail.com to comp.lang.c++ on Sun Jan 25 10:17:04 2026
    From Newsgroup: comp.lang.c++

    Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:

    On 1/23/2026 10:06 AM, Bonita Montero wrote:

    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    [...]

    Are you trying to stress-test the CPU's branch prediction? All of those loops...

    A futex'd semaphore's performance isn't determined by the branch
    prediction but by the speed of the cacheline-transfer between the
    coress; this could be really slow. And sleeping inside the kernel
    and being awakened by an intra processor interrupt is even two
    magitudes slower.

    --- Synchronet 3.21a-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Sun Jan 25 12:34:01 2026
    From Newsgroup: comp.lang.c++

    On 1/24/2026 11:55 PM, Bonita Montero wrote:
    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a
    semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.

    A benaphore is basically atomic accounting using a fast path. The logic
    is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on LL/SC
    logic, it ruins the loopless factor... Its simple. Your CAS infested
    thing is not so simple... I understand it, but wow.

    Benaphore:
    __________________
    class fast_semaphore
    {
    public:
    fast_semaphore(int count) noexcept
    : m_count(count), m_semaphore(0) {}

    void post()
    {
    std::atomic_thread_fence(std::memory_order_release);
    int count = m_count.fetch_add(1, std::memory_order_relaxed);
    if (count < 0)
    m_semaphore.post();
    }

    void wait()
    {
    int count = m_count.fetch_sub(1, std::memory_order_relaxed);
    if (count < 1)
    m_semaphore.wait();
    std::atomic_thread_fence(std::memory_order_acquire);
    }

    private:
    std::atomic m_count;
    semaphore m_semaphore;
    };
    __________________

    Pretty simple. Actually, I "think" on the wait the membar can be removed
    on the slow path because the m_semaphore.wait() should have acquire
    implied.

    void wait()
    {
    int count = m_count.fetch_sub(1, std::memory_order_relaxed);

    if (count < 1)
    {
    m_semaphore.wait();
    // acquire implied...
    }

    else {
    std::atomic_thread_fence(std::memory_order_acquire);
    }
    }


    Humm. should work okay. Not sure how much it buys us, but, well, there
    it is. ;^D

    --- Synchronet 3.21b-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Sun Jan 25 14:39:06 2026
    From Newsgroup: comp.lang.c++

    On 1/25/2026 1:17 AM, Bonita Montero wrote:
    Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:

    On 1/23/2026 10:06 AM, Bonita Montero wrote:

    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    [...]

    Are you trying to stress-test the CPU's branch prediction? All of
    those loops...

    A futex'd semaphore's performance isn't determined by the branch
    prediction but by the speed of the cacheline-transfer between the
    coress; this could be really slow. And sleeping inside the kernel
    and being awakened by an intra processor interrupt is even two
    magitudes slower.


    I know how the futex works. Your loop here is interesting to me:

    for( ; notifies; m_counters.notify_one(), --notifies );
    --- Synchronet 3.21b-Linux NewsLink 1.2
  • From Bonita Montero@Bonita.Montero@gmail.com to comp.lang.c++ on Mon Jan 26 06:26:52 2026
    From Newsgroup: comp.lang.c++

    Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
    On 1/24/2026 11:55 PM, Bonita Montero wrote:
    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a
    semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.

    A benaphore is basically atomic accounting using a fast path. The logic
    is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on LL/SC
    logic, it ruins the loopless factor... Its simple. Your CAS infested
    thing is not so simple... I understand it, but wow.

    I know what a Benaphore is; it is not a semaphore but it has a
    semaphore.
    --- Synchronet 3.21b-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Mon Jan 26 23:57:23 2026
    From Newsgroup: comp.lang.c++

    On 1/25/2026 9:26 PM, Bonita Montero wrote:
    Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
    On 1/24/2026 11:55 PM, Bonita Montero wrote:
    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a
    semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.

    A benaphore is basically atomic accounting using a fast path. The
    logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on
    LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
    infested thing is not so simple... I understand it, but wow.

    I know what a Benaphore is; it is not a semaphore but it has a
    semaphore.

    A Benaphore is a semaphore.

    class fast_semaphore
    {
    public:
    fast_semaphore(int count) noexcept
    : m_count(count), m_semaphore(0) {}

    void post()
    {
    std::atomic_thread_fence(std::memory_order_release);
    int count = m_count.fetch_add(1, std::memory_order_relaxed);
    if (count < 0)
    m_semaphore.post();
    }

    void wait()
    {
    int count = m_count.fetch_sub(1, std::memory_order_relaxed);
    if (count < 1)
    m_semaphore.wait();
    std::atomic_thread_fence(std::memory_order_acquire);
    }

    private:
    std::atomic m_count;
    semaphore m_semaphore;
    };
    --- Synchronet 3.21b-Linux NewsLink 1.2
  • From Bonita Montero@Bonita.Montero@gmail.com to comp.lang.c++ on Tue Jan 27 09:33:01 2026
    From Newsgroup: comp.lang.c++

    Am 27.01.2026 um 08:57 schrieb Chris M. Thomasson:
    On 1/25/2026 9:26 PM, Bonita Montero wrote:
    Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
    On 1/24/2026 11:55 PM, Bonita Montero wrote:
    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups >>>>>>>> (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a
    semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.

    A benaphore is basically atomic accounting using a fast path. The
    logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on
    LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
    infested thing is not so simple... I understand it, but wow.

    I know what a Benaphore is; it is not a semaphore but it has a
    semaphore.

    A Benaphore is a semaphore.

    A benaphore can allow only one thread to run. A semaphore can trigger
    an arbitrary number of threads to run. So they're completely different.


    class fast_semaphore
    {
    public:
    -a-a-a fast_semaphore(int count) noexcept
    -a-a-a : m_count(count), m_semaphore(0) {}

    -a-a-a void post()
    -a-a-a {
    -a-a-a-a-a-a-a std::atomic_thread_fence(std::memory_order_release);
    -a-a-a-a-a-a-a int count = m_count.fetch_add(1, std::memory_order_relaxed);
    -a-a-a-a-a-a-a if (count < 0)
    -a-a-a-a-a-a-a-a-a-a-a m_semaphore.post();
    -a-a-a }

    -a-a-a void wait()
    -a-a-a {
    -a-a-a-a-a-a-a int count = m_count.fetch_sub(1, std::memory_order_relaxed);
    -a-a-a-a-a-a-a if (count < 1)
    -a-a-a-a-a-a-a-a-a-a-a m_semaphore.wait();
    -a-a-a-a-a-a-a std::atomic_thread_fence(std::memory_order_acquire);
    -a-a-a }

    private:
    -a-a-a std::atomic m_count;
    -a-a-a semaphore m_semaphore;
    };

    --- Synchronet 3.21b-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Tue Jan 27 13:15:14 2026
    From Newsgroup: comp.lang.c++

    On 1/27/2026 12:33 AM, Bonita Montero wrote:
    Am 27.01.2026 um 08:57 schrieb Chris M. Thomasson:
    On 1/25/2026 9:26 PM, Bonita Montero wrote:
    Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
    On 1/24/2026 11:55 PM, Bonita Montero wrote:
    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups >>>>>>>>> (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a
    semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.

    A benaphore is basically atomic accounting using a fast path. The
    logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based
    on LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
    infested thing is not so simple... I understand it, but wow.

    I know what a Benaphore is; it is not a semaphore but it has a
    semaphore.

    A Benaphore is a semaphore.

    A benaphore can allow only one thread to run. A semaphore can trigger
    an arbitrary number of threads to run. So they're completely different.

    I think you might be missing the forest for the trees?




    class fast_semaphore
    {
    public:
    -a-a-a-a fast_semaphore(int count) noexcept
    -a-a-a-a : m_count(count), m_semaphore(0) {}

    -a-a-a-a void post()
    -a-a-a-a {
    -a-a-a-a-a-a-a-a std::atomic_thread_fence(std::memory_order_release);
    -a-a-a-a-a-a-a-a int count = m_count.fetch_add(1, std::memory_order_relaxed);
    -a-a-a-a-a-a-a-a if (count < 0)
    -a-a-a-a-a-a-a-a-a-a-a-a m_semaphore.post();
    -a-a-a-a }

    -a-a-a-a void wait()
    -a-a-a-a {
    -a-a-a-a-a-a-a-a int count = m_count.fetch_sub(1, std::memory_order_relaxed);
    -a-a-a-a-a-a-a-a if (count < 1)
    -a-a-a-a-a-a-a-a-a-a-a-a m_semaphore.wait();
    -a-a-a-a-a-a-a-a std::atomic_thread_fence(std::memory_order_acquire);
    -a-a-a-a }

    private:
    -a-a-a-a std::atomic m_count;
    -a-a-a-a semaphore m_semaphore;
    };


    --- Synchronet 3.21b-Linux NewsLink 1.2
  • From Bonita Montero@Bonita.Montero@gmail.com to comp.lang.c++ on Wed Jan 28 09:08:27 2026
    From Newsgroup: comp.lang.c++

    Am 27.01.2026 um 22:21 schrieb Chris M. Thomasson:
    On 1/23/2026 10:06 AM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    // hpp

    #pragma once
    #include <atomic>
    #include <cassert>

    #define XSEMAPHORE_TRICKY

    struct xsemaphore
    {
    -a-a-a-a-axsemaphore( uint32_t initial = 0 ) noexcept;
    -a-a-a-a-axsemaphore( const xsemaphore & ) = delete;
    -a-a-a-a-a~xsemaphore();
    -a-a-a-a-axsemaphore &operator =( const xsemaphore & ) = delete;
    -a-a-a-a-avoid acquire() noexcept;
    -a-a-a-a-avoid release( uint32_t n = 1 ) noexcept;
    private:
    -a-a-a-a-astatic constexpr unsigned
    -a-a-a-a-a-a-a-a MASK_BITS = 21,
    -a-a-a-a-a-a-a-a NOTIFY_BASE = MASK_BITS,
    -a-a-a-a-a-a-a-a WAIT_BASE = 2 * MASK_BITS;
    -a-a-a-a-astatic constexpr uint64_t
    -a-a-a-a-a-a-a-a MASK21 = 0x1FFFFF,
    -a-a-a-a-a-a-a-a COUNT_VALUE = 1,
    -a-a-a-a-a-a-a-a NOTIFY_VALUE = 1ull << NOTIFY_BASE,
    -a-a-a-a-a-a-a-a WAIT_VALUE = 1ull << WAIT_BASE,
    -a-a-a-a-a-a-a-a NOTIFY_MASK = MASK21 << NOTIFY_BASE,
    -a-a-a-a-a-a-a-a WAIT_MASK = MASK21 << WAIT_BASE;
    -a-a-a-a-astatic constexpr std::memory_order
    -a-a-a-a-a-a-a-a ACQ = std::memory_order_acquire,
    -a-a-a-a-a-a-a-a REL = std::memory_order_release,
    -a-a-a-a-a-a-a-a RLX = std::memory_order_relaxed;
    -a-a-a-a-astd::atomic_uint64_t m_counters;
    };

    inline xsemaphore::~xsemaphore()
    {
    #if defined(XSEMAPHORE_TRICKY)
    -a-a-a-a-aassert(!((m_counters >> WAIT_BASE) & MASK21));
    #endif
    }

    // cpp

    #include "xsemaphore.hpp"
    #include <algorithm>

    using namespace std;

    xsemaphore::xsemaphore( uint32_t initial ) noexcept :
    -a-a-a-a-am_counters( [&]-a-a-a { return initial <= MASK21 ? initial :
    MASK21; } () )
    {
    }

    void xsemaphore::acquire() noexcept
    {
    -a-a-a-a-auint64_t ref = m_counters.load( RLX ), niu;
    -a-a-a-a-afor( ; ; )
    -a-a-a-a-a-a-a-a if( (ref & MASK21) )
    -a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - >> COUNT_VALUE, ACQ, RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
    -a-a-a-a-a-a-a-a-a-a-a-a else
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a continue;
    -a-a-a-a-a-a-a-a else
    -a-a-a-a-a-a-a-a {
    -a-a-a-a-a-a-a-a-a-a-a-a if( (ref & WAIT_MASK) == WAIT_MASK )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a abort();

    Oh shit.


    -a-a-a-a-a-a-a-a-a-a-a-a niu = ref + WAIT_VALUE;
    -a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, niu, RLX,
    RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a-a {
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a ref = niu;
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a break;
    -a-a-a-a-a-a-a-a-a-a-a-a }
    -a-a-a-a-a-a-a-a }
    -a-a-a-a-afor( ; ; )
    -a-a-a-a-a{
    -a-a-a-a-a-a-a-a while( (ref & NOTIFY_MASK) )
    -a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - >> NOTIFY_VALUE, ACQ, RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
    -a-a-a-a-a-a-a-a m_counters.wait( ref, RLX );
    -a-a-a-a-a-a-a-a ref = m_counters.load( RLX );
    -a-a-a-a-a}
    }

    void xsemaphore::release( uint32_t n ) noexcept
    {
    -a-a-a-a-aif( !n )
    -a-a-a-a-a-a-a-a return;
    -a-a-a-a-auint64_t ref = m_counters.load( RLX ), niu, notifies;
    -a-a-a-a-aint64_t ahead;
    -a-a-a-a-ado
    -a-a-a-a-a{
    -a-a-a-a-a-a-a-a uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
    -a-a-a-a-a-a-a-a ahead = n - waiters;
    -a-a-a-a-a-a-a-a notifies = ahead >= 0 ? waiters : n;
    -a-a-a-a-a-a-a-a uint64_t beyond = ahead >= 0 ? ahead : 0;
    -a-a-a-a-a-a-a-a if( (ref & MASK21) + beyond > MASK21 )
    -a-a-a-a-a-a-a-a-a-a-a-a abort();

    ^^^^^^^^^^^^^^^^


    Gotta love the abort here... ;^o

    It's ab abort beyond 2 ^ 21 threads.



    -a-a-a-a-a-a-a-a niu = ref + beyond;
    -a-a-a-a-a-a-a-a if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 ) >> -a-a-a-a-a-a-a-a-a-a-a-a abort();
    -a-a-a-a-a-a-a-a niu += notifies << NOTIFY_BASE;
    -a-a-a-a-a-a-a-a niu -= notifies << WAIT_BASE;
    -a-a-a-a-a} while( !m_counters.compare_exchange_strong( ref, niu, REL,
    RLX ) );
    -a-a-a-a-aif( ahead >= 0 )
    -a-a-a-a-a-a-a-a m_counters.notify_all();
    -a-a-a-a-aelse
    -a-a-a-a-a-a-a-a for( ; notifies; m_counters.notify_one(), --notifies );
    }


    Sigh.

    --- Synchronet 3.21b-Linux NewsLink 1.2
  • From Bonita Montero@Bonita.Montero@gmail.com to comp.lang.c++ on Wed Jan 28 14:46:17 2026
    From Newsgroup: comp.lang.c++

    Am 27.01.2026 um 22:15 schrieb Chris M. Thomasson:

    I think you might be missing the forest for the trees?

    Calling a benaphore a semaphore is not precise as necessary.
    --- Synchronet 3.21b-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Wed Jan 28 12:49:44 2026
    From Newsgroup: comp.lang.c++

    On 1/28/2026 2:57 AM, Bonita Montero wrote:
    Am 25.01.2026 um 23:39 schrieb Chris M. Thomasson:
    On 1/25/2026 1:17 AM, Bonita Montero wrote:
    Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:

    On 1/23/2026 10:06 AM, Bonita Montero wrote:

    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    [...]

    Are you trying to stress-test the CPU's branch prediction? All of
    those loops...

    A futex'd semaphore's performance isn't determined by the branch
    prediction but by the speed of the cacheline-transfer between the
    coress; this could be really slow. And sleeping inside the kernel
    and being awakened by an intra processor interrupt is even two
    magitudes slower.


    I know how the futex works. Your loop here is interesting to me:

    for( ; notifies; m_counters.notify_one(), --notifies );

    This when there are less notfifies than thee are waiting threads.

    Hummm... Your logic is WAY over engineered?
    --- Synchronet 3.21b-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Wed Jan 28 12:50:20 2026
    From Newsgroup: comp.lang.c++

    On 1/28/2026 12:08 AM, Bonita Montero wrote:
    Am 27.01.2026 um 22:21 schrieb Chris M. Thomasson:
    On 1/23/2026 10:06 AM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    // hpp

    #pragma once
    #include <atomic>
    #include <cassert>

    #define XSEMAPHORE_TRICKY

    struct xsemaphore
    {
    -a-a-a-a-axsemaphore( uint32_t initial = 0 ) noexcept;
    -a-a-a-a-axsemaphore( const xsemaphore & ) = delete;
    -a-a-a-a-a~xsemaphore();
    -a-a-a-a-axsemaphore &operator =( const xsemaphore & ) = delete;
    -a-a-a-a-avoid acquire() noexcept;
    -a-a-a-a-avoid release( uint32_t n = 1 ) noexcept;
    private:
    -a-a-a-a-astatic constexpr unsigned
    -a-a-a-a-a-a-a-a MASK_BITS = 21,
    -a-a-a-a-a-a-a-a NOTIFY_BASE = MASK_BITS,
    -a-a-a-a-a-a-a-a WAIT_BASE = 2 * MASK_BITS;
    -a-a-a-a-astatic constexpr uint64_t
    -a-a-a-a-a-a-a-a MASK21 = 0x1FFFFF,
    -a-a-a-a-a-a-a-a COUNT_VALUE = 1,
    -a-a-a-a-a-a-a-a NOTIFY_VALUE = 1ull << NOTIFY_BASE,
    -a-a-a-a-a-a-a-a WAIT_VALUE = 1ull << WAIT_BASE,
    -a-a-a-a-a-a-a-a NOTIFY_MASK = MASK21 << NOTIFY_BASE,
    -a-a-a-a-a-a-a-a WAIT_MASK = MASK21 << WAIT_BASE;
    -a-a-a-a-astatic constexpr std::memory_order
    -a-a-a-a-a-a-a-a ACQ = std::memory_order_acquire,
    -a-a-a-a-a-a-a-a REL = std::memory_order_release,
    -a-a-a-a-a-a-a-a RLX = std::memory_order_relaxed;
    -a-a-a-a-astd::atomic_uint64_t m_counters;
    };

    inline xsemaphore::~xsemaphore()
    {
    #if defined(XSEMAPHORE_TRICKY)
    -a-a-a-a-aassert(!((m_counters >> WAIT_BASE) & MASK21));
    #endif
    }

    // cpp

    #include "xsemaphore.hpp"
    #include <algorithm>

    using namespace std;

    xsemaphore::xsemaphore( uint32_t initial ) noexcept :
    -a-a-a-a-am_counters( [&]-a-a-a { return initial <= MASK21 ? initial :
    MASK21; } () )
    {
    }

    void xsemaphore::acquire() noexcept
    {
    -a-a-a-a-auint64_t ref = m_counters.load( RLX ), niu;
    -a-a-a-a-afor( ; ; )
    -a-a-a-a-a-a-a-a if( (ref & MASK21) )
    -a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref -
    COUNT_VALUE, ACQ, RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
    -a-a-a-a-a-a-a-a-a-a-a-a else
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a continue;
    -a-a-a-a-a-a-a-a else
    -a-a-a-a-a-a-a-a {
    -a-a-a-a-a-a-a-a-a-a-a-a if( (ref & WAIT_MASK) == WAIT_MASK )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a abort();

    Oh shit.


    -a-a-a-a-a-a-a-a-a-a-a-a niu = ref + WAIT_VALUE;
    -a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, niu, RLX,
    RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a-a {
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a ref = niu;
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a break;
    -a-a-a-a-a-a-a-a-a-a-a-a }
    -a-a-a-a-a-a-a-a }
    -a-a-a-a-afor( ; ; )
    -a-a-a-a-a{
    -a-a-a-a-a-a-a-a while( (ref & NOTIFY_MASK) )
    -a-a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref -
    NOTIFY_VALUE, ACQ, RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
    -a-a-a-a-a-a-a-a m_counters.wait( ref, RLX );
    -a-a-a-a-a-a-a-a ref = m_counters.load( RLX );
    -a-a-a-a-a}
    }

    void xsemaphore::release( uint32_t n ) noexcept
    {
    -a-a-a-a-aif( !n )
    -a-a-a-a-a-a-a-a return;
    -a-a-a-a-auint64_t ref = m_counters.load( RLX ), niu, notifies;
    -a-a-a-a-aint64_t ahead;
    -a-a-a-a-ado
    -a-a-a-a-a{
    -a-a-a-a-a-a-a-a uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
    -a-a-a-a-a-a-a-a ahead = n - waiters;
    -a-a-a-a-a-a-a-a notifies = ahead >= 0 ? waiters : n;
    -a-a-a-a-a-a-a-a uint64_t beyond = ahead >= 0 ? ahead : 0;
    -a-a-a-a-a-a-a-a if( (ref & MASK21) + beyond > MASK21 )
    -a-a-a-a-a-a-a-a-a-a-a-a abort();

    ^^^^^^^^^^^^^^^^


    Gotta love the abort here... ;^o

    It's ab abort beyond 2 ^ 21 threads.

    Shit happens... ;^o





    -a-a-a-a-a-a-a-a niu = ref + beyond;
    -a-a-a-a-a-a-a-a if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 ) >>> -a-a-a-a-a-a-a-a-a-a-a-a abort();
    -a-a-a-a-a-a-a-a niu += notifies << NOTIFY_BASE;
    -a-a-a-a-a-a-a-a niu -= notifies << WAIT_BASE;
    -a-a-a-a-a} while( !m_counters.compare_exchange_strong( ref, niu, REL,
    RLX ) );
    -a-a-a-a-aif( ahead >= 0 )
    -a-a-a-a-a-a-a-a m_counters.notify_all();
    -a-a-a-a-aelse
    -a-a-a-a-a-a-a-a for( ; notifies; m_counters.notify_one(), --notifies ); >>> }


    Sigh.


    --- Synchronet 3.21b-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Wed Jan 28 12:53:01 2026
    From Newsgroup: comp.lang.c++

    On 1/28/2026 5:46 AM, Bonita Montero wrote:
    Am 27.01.2026 um 22:15 schrieb Chris M. Thomasson:

    I think you might be missing the forest for the trees?

    Calling a benaphore a semaphore is not precise as necessary.

    How about a fast-pathed semaphore? ;^)

    Even in the post n case:
    ____________
    void post_n(int n)
    {
    std::atomic_thread_fence(std::memory_order_release);
    int old_count = m_count.fetch_add(n, std::memory_order_relaxed);

    // If old_count was negative, there were waiters.
    if (old_count < 0)
    {
    // Calculate how many actually need a signal.
    // If we have 5 waiters (count == -5) and we post 10,
    // we only signal 5.
    int to_signal = std::min(-old_count, n);
    m_semaphore.post_n(to_signal);
    }
    }
    ____________

    Loopless, no CAS, just accounting.
    --- Synchronet 3.21b-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Tue Jan 27 12:50:06 2026
    From Newsgroup: comp.lang.c++

    On 1/27/2026 12:33 AM, Bonita Montero wrote:
    Am 27.01.2026 um 08:57 schrieb Chris M. Thomasson:
    On 1/25/2026 9:26 PM, Bonita Montero wrote:
    Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
    On 1/24/2026 11:55 PM, Bonita Montero wrote:
    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups >>>>>>>>> (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a
    semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.

    A benaphore is basically atomic accounting using a fast path. The
    logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based
    on LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
    infested thing is not so simple... I understand it, but wow.

    I know what a Benaphore is; it is not a semaphore but it has a
    semaphore.

    A Benaphore is a semaphore.

    A benaphore can allow only one thread to run. A semaphore can trigger
    an arbitrary number of threads to run. So they're completely different.

    You being rather pedantic... ;^) post and wait are standard semaphore operations. postn can be implented. Is all in the accounting. So, my
    example does not have say post_n, but it can be implemented. Without
    using any CAS and/or loops. Way better than other semaphore impls I have seen...

    Just thinking off the top of my head, it might be something like, typing
    in the newsreader sorry for any typos. I have old code on some hd's in storage. Joe Seigh had some fun logic back in comp.programming.thread a
    long time ago:
    ____________
    void post_n(int n)
    {
    std::atomic_thread_fence(std::memory_order_release);
    int old_count = m_count.fetch_add(n, std::memory_order_relaxed);

    // If old_count was negative, there were waiters.
    if (old_count < 0)
    {
    // Calculate how many actually need a signal.
    // If we have 5 waiters (count == -5) and we post 10,
    // we only signal 5.
    int to_signal = std::min(-old_count, n);
    m_semaphore.post_n(to_signal);
    }
    }
    ____________



    class fast_semaphore
    {
    public:
    -a-a-a-a fast_semaphore(int count) noexcept
    -a-a-a-a : m_count(count), m_semaphore(0) {}

    -a-a-a-a void post()
    -a-a-a-a {
    -a-a-a-a-a-a-a-a std::atomic_thread_fence(std::memory_order_release);
    -a-a-a-a-a-a-a-a int count = m_count.fetch_add(1, std::memory_order_relaxed);
    -a-a-a-a-a-a-a-a if (count < 0)
    -a-a-a-a-a-a-a-a-a-a-a-a m_semaphore.post();
    -a-a-a-a }

    -a-a-a-a void wait()
    -a-a-a-a {
    -a-a-a-a-a-a-a-a int count = m_count.fetch_sub(1, std::memory_order_relaxed);
    -a-a-a-a-a-a-a-a if (count < 1)
    -a-a-a-a-a-a-a-a-a-a-a-a m_semaphore.wait();
    -a-a-a-a-a-a-a-a std::atomic_thread_fence(std::memory_order_acquire);
    -a-a-a-a }

    private:
    -a-a-a-a std::atomic m_count;
    -a-a-a-a semaphore m_semaphore;
    };


    --- Synchronet 3.21b-Linux NewsLink 1.2
  • From Chris M. Thomasson@chris.m.thomasson.1@gmail.com to comp.lang.c++ on Tue Jan 27 13:21:15 2026
    From Newsgroup: comp.lang.c++

    On 1/23/2026 10:06 AM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    // hpp

    #pragma once
    #include <atomic>
    #include <cassert>

    #define XSEMAPHORE_TRICKY

    struct xsemaphore
    {
    -a-a-a-axsemaphore( uint32_t initial = 0 ) noexcept;
    -a-a-a-axsemaphore( const xsemaphore & ) = delete;
    -a-a-a-a~xsemaphore();
    -a-a-a-axsemaphore &operator =( const xsemaphore & ) = delete;
    -a-a-a-avoid acquire() noexcept;
    -a-a-a-avoid release( uint32_t n = 1 ) noexcept;
    private:
    -a-a-a-astatic constexpr unsigned
    -a-a-a-a-a-a-a MASK_BITS = 21,
    -a-a-a-a-a-a-a NOTIFY_BASE = MASK_BITS,
    -a-a-a-a-a-a-a WAIT_BASE = 2 * MASK_BITS;
    -a-a-a-astatic constexpr uint64_t
    -a-a-a-a-a-a-a MASK21 = 0x1FFFFF,
    -a-a-a-a-a-a-a COUNT_VALUE = 1,
    -a-a-a-a-a-a-a NOTIFY_VALUE = 1ull << NOTIFY_BASE,
    -a-a-a-a-a-a-a WAIT_VALUE = 1ull << WAIT_BASE,
    -a-a-a-a-a-a-a NOTIFY_MASK = MASK21 << NOTIFY_BASE,
    -a-a-a-a-a-a-a WAIT_MASK = MASK21 << WAIT_BASE;
    -a-a-a-astatic constexpr std::memory_order
    -a-a-a-a-a-a-a ACQ = std::memory_order_acquire,
    -a-a-a-a-a-a-a REL = std::memory_order_release,
    -a-a-a-a-a-a-a RLX = std::memory_order_relaxed;
    -a-a-a-astd::atomic_uint64_t m_counters;
    };

    inline xsemaphore::~xsemaphore()
    {
    #if defined(XSEMAPHORE_TRICKY)
    -a-a-a-aassert(!((m_counters >> WAIT_BASE) & MASK21));
    #endif
    }

    // cpp

    #include "xsemaphore.hpp"
    #include <algorithm>

    using namespace std;

    xsemaphore::xsemaphore( uint32_t initial ) noexcept :
    -a-a-a-am_counters( [&]-a-a-a { return initial <= MASK21 ? initial : MASK21; }
    () )
    {
    }

    void xsemaphore::acquire() noexcept
    {
    -a-a-a-auint64_t ref = m_counters.load( RLX ), niu;
    -a-a-a-afor( ; ; )
    -a-a-a-a-a-a-a if( (ref & MASK21) )
    -a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - COUNT_VALUE, ACQ, RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
    -a-a-a-a-a-a-a-a-a-a-a else
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a continue;
    -a-a-a-a-a-a-a else
    -a-a-a-a-a-a-a {
    -a-a-a-a-a-a-a-a-a-a-a if( (ref & WAIT_MASK) == WAIT_MASK )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a abort();

    Oh shit.


    -a-a-a-a-a-a-a-a-a-a-a niu = ref + WAIT_VALUE;
    -a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, niu, RLX, RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a {
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a ref = niu;
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a break;
    -a-a-a-a-a-a-a-a-a-a-a }
    -a-a-a-a-a-a-a }
    -a-a-a-afor( ; ; )
    -a-a-a-a{
    -a-a-a-a-a-a-a while( (ref & NOTIFY_MASK) )
    -a-a-a-a-a-a-a-a-a-a-a if( m_counters.compare_exchange_strong( ref, ref - NOTIFY_VALUE, ACQ, RLX ) )
    -a-a-a-a-a-a-a-a-a-a-a-a-a-a-a return;
    -a-a-a-a-a-a-a m_counters.wait( ref, RLX );
    -a-a-a-a-a-a-a ref = m_counters.load( RLX );
    -a-a-a-a}
    }

    void xsemaphore::release( uint32_t n ) noexcept
    {
    -a-a-a-aif( !n )
    -a-a-a-a-a-a-a return;
    -a-a-a-auint64_t ref = m_counters.load( RLX ), niu, notifies;
    -a-a-a-aint64_t ahead;
    -a-a-a-ado
    -a-a-a-a{
    -a-a-a-a-a-a-a uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
    -a-a-a-a-a-a-a ahead = n - waiters;
    -a-a-a-a-a-a-a notifies = ahead >= 0 ? waiters : n;
    -a-a-a-a-a-a-a uint64_t beyond = ahead >= 0 ? ahead : 0;
    -a-a-a-a-a-a-a if( (ref & MASK21) + beyond > MASK21 )
    -a-a-a-a-a-a-a-a-a-a-a abort();

    ^^^^^^^^^^^^^^^^


    Gotta love the abort here... ;^o


    -a-a-a-a-a-a-a niu = ref + beyond;
    -a-a-a-a-a-a-a if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
    -a-a-a-a-a-a-a-a-a-a-a abort();
    -a-a-a-a-a-a-a niu += notifies << NOTIFY_BASE;
    -a-a-a-a-a-a-a niu -= notifies << WAIT_BASE;
    -a-a-a-a} while( !m_counters.compare_exchange_strong( ref, niu, REL, RLX ) );
    -a-a-a-aif( ahead >= 0 )
    -a-a-a-a-a-a-a m_counters.notify_all();
    -a-a-a-aelse
    -a-a-a-a-a-a-a for( ; notifies; m_counters.notify_one(), --notifies );
    }


    Sigh.
    --- Synchronet 3.21b-Linux NewsLink 1.2
  • From Bonita Montero@Bonita.Montero@gmail.com to comp.lang.c++ on Wed Jan 28 11:57:20 2026
    From Newsgroup: comp.lang.c++

    Am 25.01.2026 um 23:39 schrieb Chris M. Thomasson:
    On 1/25/2026 1:17 AM, Bonita Montero wrote:
    Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:

    On 1/23/2026 10:06 AM, Bonita Montero wrote:

    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    [...]

    Are you trying to stress-test the CPU's branch prediction? All of
    those loops...

    A futex'd semaphore's performance isn't determined by the branch
    prediction but by the speed of the cacheline-transfer between the
    coress; this could be really slow. And sleeping inside the kernel
    and being awakened by an intra processor interrupt is even two
    magitudes slower.


    I know how the futex works. Your loop here is interesting to me:

    for( ; notifies; m_counters.notify_one(), --notifies );

    This when there are less notfifies than thee are waiting threads.
    --- Synchronet 3.21b-Linux NewsLink 1.2