From 020cd360e1e4406c3dae93004b45104a2324d3c7 Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Wed, 12 Oct 2011 17:40:03 -0700 Subject: [PATCH] modify x64 assembly and so forth --- src/rt/arch/x64/_context.S | 129 +++++++++++++++++++++--------------- src/rt/arch/x64/ccall.S | 13 ++-- src/rt/arch/x64/context.cpp | 49 ++------------ src/rt/arch/x64/context.h | 14 ++-- 4 files changed, 93 insertions(+), 112 deletions(-) diff --git a/src/rt/arch/x64/_context.S b/src/rt/arch/x64/_context.S index 561a250cedb..7c689a88bf7 100644 --- a/src/rt/arch/x64/_context.S +++ b/src/rt/arch/x64/_context.S @@ -1,11 +1,39 @@ .text /* +According to ABI documentation found at +http://www.x86-64.org/documentation.html +and Microsoft discussion at +http://msdn.microsoft.com/en-US/library/9z1stfyw%28v=VS.80%29.aspx. + +BOTH CALLING CONVENTIONS + Callee save registers: - ebp, ebx, esi, edi + R12--R15, RDI, RSI, RBX, RBP, RSP + XMM0--XMM5 Caller save registers: - eax, ecx, edx + RAX, RCX, RDX, R8--R11 + XMM6--XMM15 + Floating point stack + +MAC/AMD CALLING CONVENTIONS + +Integer arguments go in registers: + rdi, rsi, rdx, rcx, r8, r9 + +User flags have no specified role and are not preserved + across calls, with the exception of DF in %rFLAGS, + which must be clear (set to "forward" direction) + on function entry and return. + +MICROSOFT CALLING CONVENTIONS + +Return value: RAX + +First four arguments: + RCX, RDX, R8, R9 + XMM0, XMM1, XMM2, XMM3 */ /* @@ -17,60 +45,55 @@ Caller save registers: // swap_registers(registers_t *oregs, registers_t *regs) .globl swap_registers swap_registers: - // save the old context - movl 4(%esp), %eax - //movl %eax, 0(%eax) - movl %ebx, 4(%eax) - movl %ecx, 8(%eax) - movl %edx, 12(%eax) - movl %ebp, 16(%eax) - movl %esi, 20(%eax) - movl %edi, 24(%eax) - //movl %cs, 32(%eax) - //movl %ds, 34(%eax) - //movl %ss, 36(%eax) - //movl %es, 38(%eax) - //movl %fs, 40(%eax) - //movl %gs, 42(%eax) + // n.b. when we enter, the return address is at the top of + // the stack (i.e., 0(%RSP)). We + // simply save all NV registers into oregs. + // We then restore all NV registers from regs. This restores + // the old stack pointer, which should include the proper + // return address. We can therefore just return normally to + // jump back into the old code. + + // Save instruction pointer: + popl %rax + movl %rax, 104(%rdi) - // save the flags - pushf - popl %ecx - movl %ecx, 44(%eax) + // Save non-volatile integer registers: + // (including RSP) + movl %rbx, 0(%rdi) + movl %rsp, 8(%rdi) + movl %rbp, 16(%rdi) + movl %r12, 24(%rdi) + movl %r13, 32(%rdi) + movl %r14, 40(%rdi) + movl %r15, 48(%rdi) - // save the return address as the instruction pointer - // and save the stack pointer of the caller - popl %ecx - movl %esp, 28(%eax) - movl %ecx, 48(%eax) + // Save non-volatile XMM registers: + movl %xmm0, 56(%rdi) + movl %xmm1, 64(%rdi) + movl %xmm2, 72(%rdi) + movl %xmm3, 80(%rdi) + movl %xmm4, 88(%rdi) + movl %xmm5, 96(%rdi) - // restore the new context - movl 4(%esp), %eax + // Restore non-volatile integer registers: + // (including RSP) + movl 0(%rsi), %rbx + movl 8(%rsi), %rsp + movl 16(%rsi), %rbp + movl 24(%rsi), %r12 + movl 32(%rsi), %r13 + movl 40(%rsi), %r14 + movl 48(%rsi), %r15 - movl 4(%eax), %ebx - // save ecx for later... - movl 12(%eax), %edx - movl 16(%eax), %ebp - movl 20(%eax), %esi - movl 24(%eax), %edi - movl 28(%eax), %esp - // We can't actually change this... - //movl 32(%eax), %cs - //movl 34(%eax), %ds - //movl 36(%eax), %ss - //movl 38(%eax), %es - //movl 40(%eax), %fs - //movl 42(%eax), %gs - - // restore the flags - movl 44(%eax), %ecx - push %ecx - popf - - // ok, now we can restore ecx - movl 8(%eax), %ecx - - // Return! - jmp *48(%eax) + // Restore non-volatile XMM registers: + movl 56(%rsi), %xmm0 + movl 64(%rsi), %xmm1 + movl 72(%rsi), %xmm2 + movl 80(%rsi), %xmm3 + movl 88(%rsi), %xmm4 + movl 96(%rsi), %xmm5 + // Jump to the instruction pointer + // found in regs: + jmp *104(%rsi) diff --git a/src/rt/arch/x64/ccall.S b/src/rt/arch/x64/ccall.S index 44d0c217736..dd1efda289c 100644 --- a/src/rt/arch/x64/ccall.S +++ b/src/rt/arch/x64/ccall.S @@ -11,12 +11,11 @@ _upcall_call_c_stack: .globl upcall_call_c_stack upcall_call_c_stack: #endif - pushl %ebp - movl %esp,%ebp // save esp - movl 8(%esp),%eax // eax = callee - movl 12(%esp),%esp // switch stack - calll *%eax - movl %ebp,%esp // would like to use "leave" but it's slower - popl %ebp + pushl %rbp + movl %rsp,%rbp // save rsp + movl %rsi,%rsp // switch stack + calll *%rdi + movl %rbp,%rsp // would like to use "leave" but it's slower + popl %rbp ret diff --git a/src/rt/arch/x64/context.cpp b/src/rt/arch/x64/context.cpp index f5fa3777eec..0d108549396 100644 --- a/src/rt/arch/x64/context.cpp +++ b/src/rt/arch/x64/context.cpp @@ -6,9 +6,9 @@ #include #include -extern "C" uint32_t CDECL swap_registers(registers_t *oregs, - registers_t *regs) - asm ("swap_registers"); +extern "C" void CDECL swap_registers(registers_t *oregs, + registers_t *regs) +asm ("swap_registers"); context::context() { @@ -26,48 +26,13 @@ void context::call(void *f, void *arg, void *stack) { swap(*this); // set up the trampoline frame - uint32_t *sp = (uint32_t *)stack; + uint64_t *sp = (uint64_t *)stack; // Shift the stack pointer so the alignment works out right. sp = align_down(sp) - 3; - *--sp = (uint32_t)arg; + *--sp = (uint64_t)arg; *--sp = 0xdeadbeef; - regs.esp = (uint32_t)sp; - regs.eip = (uint32_t)f; + regs.regs[1] = (uint64_t)sp; + regs.ip = (uint64_t)f; } - -#if 0 -// This is some useful code to check how the registers struct got -// layed out in memory. -int main() { - registers_t regs; - - printf("Register offsets\n"); - -#define REG(r) \ - printf(" %6s: +%ld\n", #r, (intptr_t)®s.r - (intptr_t)®s); - - REG(eax); - REG(ebx); - REG(ecx); - REG(edx); - REG(ebp); - REG(esi); - REG(edi); - REG(esp); - - REG(cs); - REG(ds); - REG(ss); - REG(es); - REG(fs); - REG(gs); - - REG(eflags); - - REG(eip); - - return 0; -} -#endif diff --git a/src/rt/arch/x64/context.h b/src/rt/arch/x64/context.h index 131a994ba45..8f0478d6b74 100644 --- a/src/rt/arch/x64/context.h +++ b/src/rt/arch/x64/context.h @@ -16,19 +16,13 @@ T align_down(T sp) { // There is no platform we care about that needs more than a // 16-byte alignment. - return (T)((uint32_t)sp & ~(16 - 1)); + return (T)((uint64_t)sp & ~(16 - 1)); } struct registers_t { - // general purpose registers - uint32_t eax, ebx, ecx, edx, ebp, esi, edi, esp; - - // segment registers - uint16_t cs, ds, ss, es, fs, gs; - - uint32_t eflags; - - uint32_t eip; + uint64_t regs[7]; // Space for the volatile regs: rbx, rsp, rbp, r12:r15 + uint64_t xmms[6]; // Space for the volatile regs: xmm0:xmm5 + uint64_t ip; }; class context {