• Invoke any function with custom stack (irrespective of signature)

    From Frederick Virchanza Gotham@cauldwell.thomas@nospicedham.gmail.com to comp.lang.asm.x86 on Fri Jul 14 05:11:23 2023
    From Newsgroup: comp.lang.asm.x86


    My main language is C/C++ and so I'm used to seeing function signatures such as the following:
    void Func(void);
    int Func(void);
    void Func(double, int, char*);
    which would be written as follows in Visual Basic:
    Private Sub Func()
    Private Function Func() As Integer
    Private Sub Func(ByVal arg1 As Double, ByVal arg2 As Integer, ByVal arg3 As String)
    A few months ago, I devised a way in x86_64 assembler and C++ of dynamically allocating a separate stack to use for a single invocation of a function, and in the end I even got it working with exception handling. But it only worked for functions that don't take an argument and don't return a value.
    Since then, I've tried to come up with a universal solution that will work for all function signatures (even those with supernumerary parameters, or that return a very big struct by value), and I've got it working for System V x86_64.
    Here's what I currently have, the assembler is inline among the C++ code:
    https://godbolt.org/z/W4x7vbE7q
    And here it is copy-pasted:
    #include <cassert> // assert
    #include <cstddef> // size_t
    #include <memory> // unique_ptr
    #include <utility> // forward
    thread_local char *p_original, *p_replacement;
    thread_local void (*f)(void);
    thread_local char *bottom_of_stack;
    extern "C" {
    void Assembler_set_bottom_of_stack (void) noexcept;
    void Assembler_set_stack_pointer_and_invoke(void) noexcept;
    }
    __asm("Assembler_set_bottom_of_stack: \n"
    ".intel_syntax noprefix \n"
    " mov r10, rsp \n"
    " add r10, 16 \n" // +8 return addr, +8 to be safe
    " mov QWORD PTR fs:bottom_of_stack@tpoff, r10 \n"
    " ret \n"
    ".att_syntax");
    template<typename R, typename... Params>
    class Invoker {
    Invoker(char *const arg_p, R(*const arg_f)(Params...)) noexcept
    {
    p_replacement = arg_p; // sets a thread_local variable
    f = reinterpret_cast<void (*)(void)>(arg_f); // sets a thread_local variable
    }
    public:
    R operator()(Params... args) // This could be static function but I like operator()
    {
    Assembler_set_bottom_of_stack();
    R (*const funcptr)(Params...) = reinterpret_cast<R(*)(Params...)>(Assembler_set_stack_pointer_and_invoke);
    return funcptr( std::forward<Params>(args)... );
    }
    friend class Stacker;
    };
    class Stacker {
    char *p;
    std::unique_ptr<char[]> mystack;
    public:
    Stacker(std::size_t const len) noexcept(false) // might throw bad_alloc
    {
    assert( len >= 128u );
    mystack.reset( new char[len] );
    p = mystack.get() + len - 16u;
    }
    Stacker(char *const arg, std::size_t const len) noexcept
    {
    assert( nullptr != arg );
    assert( len >= 128u );
    p = arg + len - 16u;
    }
    template<typename R, typename... Params>
    Invoker<R,Params...> operator()( R(*const arg)(Params...) )
    {
    return Invoker<R,Params...>(this->p, arg);
    }
    };
    /* In the following function written in x86_64 assembler using
    the System V calling convention, we can only clobber r10
    and r11 because all of the other caller-saved registers
    must be preserved for the 'jmp' to the target function. */ __asm("Assembler_set_stack_pointer_and_invoke:\n"
    ".intel_syntax noprefix \n"
    // Step 1: Save the original stack pointer
    " mov QWORD PTR fs:p_original@tpoff, rsp \n"
    // Step 2: Retrieve the replacement stack pointer
    " push r15 \n" // save to restore later
    " mov r15, rsp \n" // pointer to the r15 we just pushed onto stack
    " add r15, 8 \n" // sets 'r15' to top of old stack
    " mov r10, QWORD PTR fs:p_replacement@tpoff \n" // sets 'r10' to top of new stack
    " mov rax, QWORD PTR fs:bottom_of_stack@tpoff \n" // sets 'rax' to bottom of old stack
    // Right now: R15 is the top of the old stack
    // R10 is the top of the new stack
    // RAX is the bottom of the old stack
    // We want to do:
    // while ( rax != r15 ) *r10-- = *rax--;
    // Step 3: Copy the old stack to the new stack (it might contain supernumerary arguments or a big return struct)
    " jmp cond \n" // Jump to condition of 'while' loop
    "loop: \n" // ----<----<----<----<----
    " mov r11, qword ptr [rax] \n" // |
    " mov qword ptr [r10], r11 \n" // ^
    " sub r10, 1 \n" // | Loop
    " sub rax, 1 \n" // |
    "cond: \n" // ^
    " cmp rax, r15 \n" // |
    " jne loop \n" // ---->---->---->---->----
    " pop r15 \n" // restore original value
    // Step 4: Change the stack pointer to the new stack =============================================
    " mov rsp, r10 \n" // ================================================= new stack
    // Step 5: Set the return address to after the 'jmp' instruction
    " lea r10, [Label_Jump_Back] \n"
    " add rsp, 8 \n" // This line and the next line replace the return address on the stack
    " push r10 \n" // This line and the previous line replace the return address on the stack
    // Step 5: Invoke the function
    " jmp QWORD PTR fs:f@tpoff \n" // --- Invoke the function!
    "Label_Jump_Back: \n"
    // Note: The label has already been popped off the stack by the callee
    // Step 9: Restore the original stack pointer
    " mov rsp, QWORD PTR fs:p_original@tpoff \n"
    // Step 10: Jump back to the original address
    " ret \n"
    ".att_syntax");
    // =================== And now the test code ===============================================
    #include <iostream> // cout, endl
    using std::cout, std::endl;
    struct VeryBigStruct {
    double a[3];
    int b[3];
    double c[3];
    int d[3];
    double e[3];
    int f[3];
    };
    VeryBigStruct Func2(int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8, int a9, int a10)
    {
    VeryBigStruct vbs;
    vbs.f[2] = a1+a2+a3+a4+a5+a6+a7+a8+a9+a10;
    return vbs;
    }
    int main(void)
    {
    cout << "first line in main\n";
    cout << "Retval: " << Func2(1,2,3,4,5,6,7,8,9,10).f[2] << endl;
    cout << "Retval: " << Stacker(1048576000u)(Func2)(1,2,3,4,5,6,7,8,9,10).f[2] << endl;
    cout << "last line in main\n";
    }
    --- Synchronet 3.21d-Linux NewsLink 1.2
  • From Frederick Virchanza Gotham@cauldwell.thomas@nospicedham.gmail.com to comp.lang.asm.x86 on Sat Jul 15 06:35:23 2023
    From Newsgroup: comp.lang.asm.x86

    On Friday, July 14, 2023 at 1:12:54rC>PM UTC+1, Frederick Virchanza Gotham wrote:

    A few months ago, I devised a way in x86_64 assembler and C++ of dynamically allocating a separate stack to use for a single invocation of a function, and in the end I even got it working with exception handling. But it only worked for functions that don't take an argument and don't return a value.

    Since then, I've tried to come up with a universal solution that will work for all function signatures (even those with supernumerary parameters, or that return a very big struct by value), and I've got it working for System V x86_64.
    I'm trying to get it working with exception handling, but it's segfaulting. I've tried copying the entire stack but it's still segfaulting inside libgcc inside the function "uw_update_context_1" when it tries to copy the context struct. Anybody know what's wrong with the following?
    https://godbolt.org/z/vv7hTGWdr
    And here it is copy-pasted:
    #include <cassert> // assert
    #include <cstddef> // size_t
    #include <climits> // ULONG_LONG_MAX
    #include <cstdlib> // strtoull
    #include <cstring> // strstr
    #include <cstdint> // UINTPTR_MAX
    #include <memory> // unique_ptr
    #include <utility> // forward
    #include <exception> // exception_ptr, current_exception #include <type_traits> // is_rvalue_reference, is_trivially_destructible
    #include <iostream> // cout, endl ============================= REMOVE THIS
    using std::cerr, std::cout, std::endl;
    #include <unistd.h> // lseek, read, close
    #include <fcntl.h> // open
    char *GetStackBottom(void) noexcept
    {
    static thread_local int fd = -1;
    if ( fd < 0 )
    {
    fd = ::open("/proc/thread-self/maps", O_RDONLY);
    if ( fd < 0 ) return nullptr;
    // std::atexit( [](){ ::close(fd); } ); - Won't work
    }
    if ( 0 != ::lseek(fd, 0, SEEK_SET) ) return nullptr;
    static char buf[8192u];
    if ( ::read(fd, buf, 8191u) < 32 ) return nullptr;
    buf[8191u] = '\0';
    char *vm = std::strstr(buf, "[stack]\n");
    if ( nullptr == vm ) return nullptr;
    while ( '\n' != *vm ) --vm;
    ++vm;
    while ( '-' != *vm ) ++vm;
    char *vm2 = ++vm;
    while ( ' ' != *vm2 ) ++vm2;
    *vm2 = '\0';
    static_assert( ULONG_LONG_MAX >= UINTPTR_MAX );
    long long unsigned const addr = std::strtoull(vm,nullptr,16u);
    return reinterpret_cast<char*>(addr);
    }
    thread_local char *p_original, *p_replacement;
    thread_local void (*f)(void), (*g)(void);
    thread_local char *bottom_of_stack;
    thread_local std::exception_ptr e;
    extern "C" {
    void Assembler_set_bottom_of_stack (void) noexcept;
    void Assembler_set_stack_pointer_and_invoke(void) noexcept;
    }
    __asm("Assembler_set_bottom_of_stack: \n"
    ".intel_syntax noprefix \n"
    " mov r10, rsp \n"
    " add r10, 16 \n" // +8 return addr, +8 to be safe
    " mov QWORD PTR fs:bottom_of_stack@tpoff, r10 \n"
    " ret \n"
    ".att_syntax");
    template<typename T> requires std::is_trivially_destructible_v<std::remove_cvref_t<T> >
    std::remove_cvref_t<T> dummy_prvalue(void) noexcept
    {
    typedef std::remove_cvref_t<T> TT;
    void (*const tmp)(void) = [](){};
    TT (*const funcptr)(void) = reinterpret_cast<TT(*)(void)>(tmp);
    return funcptr(); // guaranteed elision of move/copy operations here
    }
    template<typename R, typename... Params>
    class Invoker {
    static R exception_capable(Params... args) noexcept
    {
    cerr << "Entered exception_capable\n";
    R (*const funcptr)(Params...) = reinterpret_cast<R(*)(Params...)>(g);
    try
    {
    cerr << "Entered try-block\n";
    return funcptr( std::forward<Params>(args)... );
    }
    catch (...)
    {
    cerr << "Exception thrown!\n";
    e = std::current_exception();
    }
    return dummy_prvalue<R>();
    }
    Invoker(char *const arg_p, R(*const arg_f)(Params...)) noexcept
    {
    p_replacement = arg_p; // sets a thread_local variable
    g = reinterpret_cast<void (*)(void)>(arg_f); // sets a thread_local variable
    f = reinterpret_cast<void (*)(void)>(exception_capable); // sets a thread_local variable
    }
    public:
    R operator()(Params... args) noexcept(false) // This could be static function but I like operator()
    {
    //Assembler_set_bottom_of_stack();
    //cout << "\nBottom of stack: " << (void*)bottom_of_stack << " (my own assembler)\n";
    bottom_of_stack = GetStackBottom() - 8u;
    //cout << "Bottom of stack: " << (void*)bottom_of_stack << " (thread-self/maps)\n";
    e = nullptr;
    R (*const funcptr)(Params...) = reinterpret_cast<R(*)(Params...)>(Assembler_set_stack_pointer_and_invoke);
    R retval = funcptr( std::forward<Params>(args)... );
    if ( nullptr != e )
    {
    cerr << "About to rethrow!\n";
    std::rethrow_exception(e);
    }
    return retval;
    }
    friend class Stacker;
    };
    class Stacker {
    char *p;
    std::unique_ptr<char[]> mystack;
    public:
    Stacker(std::size_t const len) noexcept(false) // might throw bad_alloc
    {
    assert( len >= 128u );
    mystack.reset( new char[len] );
    p = mystack.get() + len - 16u;
    }
    Stacker(char *const arg, std::size_t const len) noexcept
    {
    assert( nullptr != arg );
    assert( len >= 128u );
    p = arg + len - 16u;
    }
    template<typename R, typename... Params>
    Invoker<R,Params...> operator()( R(*const arg)(Params...) ) noexcept
    {
    return Invoker<R,Params...>(this->p, arg);
    }
    };
    /* In the following function written in x86_64 assembler using
    the System V calling convention, we can only clobber r10
    and r11 because all of the other caller-saved registers
    must be preserved for the 'jmp' to the target function. */ __asm("Assembler_set_stack_pointer_and_invoke:\n"
    ".intel_syntax noprefix \n"
    // Step 1: Save the original stack pointer
    " mov QWORD PTR fs:p_original@tpoff, rsp \n"
    // Step 2: Retrieve the replacement stack pointer
    " push r15 \n" // save to restore later
    " mov r15, rsp \n" // pointer to the r15 we just pushed onto stack
    " add r15, 8 \n" // sets 'r15' to top of old stack
    " mov r10, QWORD PTR fs:p_replacement@tpoff \n" // sets 'r10' to top of new stack
    " mov rax, QWORD PTR fs:bottom_of_stack@tpoff \n" // sets 'rax' to bottom of old stack
    // Right now: R15 is the top of the old stack
    // R10 is the top of the new stack
    // RAX is the bottom of the old stack
    // We want to do:
    // while ( rax != r15 ) *r10-- = *rax--;
    // Step 3: Copy the old stack to the new stack (it might contain supernumerary arguments or a big return struct)
    " jmp cond \n" // Jump to condition of 'while' loop
    "loop: \n" // ----<----<----<----<----
    " mov r11, qword ptr [rax] \n" // |
    " mov qword ptr [r10], r11 \n" // ^
    " sub r10, 1 \n" // | Loop
    " sub rax, 1 \n" // |
    "cond: \n" // ^
    " cmp rax, r15 \n" // |
    " jne loop \n" // ---->---->---->---->----
    " pop r15 \n" // restore original value
    // Step 4: Change the stack pointer to the new stack =============================================
    " mov rsp, r10 \n" // ================================================= new stack
    // Step 5: Set the return address to after the 'jmp' instruction
    " lea r10, [Label_Jump_Back] \n"
    " add rsp, 8 \n" // This line and the next line replace the return address on the stack
    " push r10 \n" // This line and the previous line replace the return address on the stack
    // Step 6: Invoke the function
    " jmp QWORD PTR fs:f@tpoff \n" // --- Invoke the function!
    "Label_Jump_Back: \n"
    // Note: The label has already been popped off the stack by the callee
    // Step 7: Restore the original stack pointer
    " mov rsp, QWORD PTR fs:p_original@tpoff \n"
    // Step 8: Jump back to the original address
    " ret \n"
    ".att_syntax");
    // =================== And now the test code ===============================================
    #include <iostream> // cout, endl
    using std::cout, std::endl;
    struct VeryBigStruct {
    double a[3];
    int b[3];
    double c[3];
    int d[3];
    double e[3];
    int f[3];
    };
    VeryBigStruct Func2(int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8, int a9, int a10)
    {
    cerr << "Entered Func2\n";
    VeryBigStruct vbs;
    vbs.f[2] = a1+a2+a3+a4+a5+a6+a7+a8+a9+a10;
    throw int(3);
    return vbs;
    }
    int main(void)
    {
    cout << "first line in main\n";
    #if 1
    try
    {
    cout << "Retval: " << Func2(1,2,7,4,5,6,7,8,9,10).f[2] << endl;
    }
    catch (int const n)
    {
    cout << "Caught an int: " << n << endl;
    }
    #endif
    try
    {
    cout << "Retval: " << Stacker(1048576000u)(Func2)(1,2,7,4,5,6,7,8,9,10).f[2] << endl;
    }
    catch (int const n)
    {
    cout << "Caught an int: " << n << endl;
    }
    cout << "last line in main\n";
    }
    --- Synchronet 3.21d-Linux NewsLink 1.2