/* powerpc-sys5.s -- assembly support. */ /* * QuickThreads -- Threads-building toolkit. * Copyright (c) 1993 by David Keppel * * Permission to use, copy, modify and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that the above copyright notice and this notice * appear in all copies. This software is provided as a * proof-of-concept and for demonstration purposes; there is no * representation about the suitability of this software for any * purpose. * PowerPC-System V thread switching module. * * This software is largely based on the original PowerPC-Linux porting * developed by Ken Aaker * * Marco Bucci * December 2002 * */ /* * * PowerPC Register convections: * * r0 volatile * r1 SP * r2 system reserved * r3-r4 volatile for parameter passing and function return * r5-r10 volatile for parameter passing * r11-r12 volatile * r13-r14 non volatile registers * f0 volatile * f1 volatile for parameter passing and function return * f2-f13 volatile for parameter passing * f14-f31 non volatile * * cr2-cr4 non volatile * * * See on the heather file for more documentation. * * * * IMPLEMENTATION NOTES * * * 1) Condition register saving * On most machines, the condition code register is caller-save. * On the PPC, the condition code register is callee-save, so the * thread context switch must preserve it. * * * 2) Floating point registers saving * On resuming a thread, floating point registers are or not restored just * depending on which block routine suspended the thread (i.e. regardless * whether "qt_block", "qt_blocki" or "qt_abort" is used to resume it). * This behaviour is obtained by implementing "qt_block" by means af a nested * call to "qt_blocki". As a result, the blocking of a thread always goes * and returns through "qt_blocki and, if a thread was blocked by "qt_block", * its execution resumes from the floating point restoring code on exit * of "qt_block". * * Thanks to David Keppel that explained me this "simple" trick. * * * 3) C languace code debugging * The original version of this software was developed and debugged under * MacOS X using the Metrowerks Code Warrior PPC integrated assembler. * It could be still used with a C inline assembler by means of a suitable * file to include it. * In order to avoid "copy and paste" bugs, and make easyer the maintaining, * I made the minimal changes, so you can find some strange code as: * * #if 0 * .if 0 * C code here * .endif * #endif * * This is just to embed some C code that is needed by the Code Warrior * integrated assembler. * * * 4) Assembly constants generation * Constants used in the assembly code are generated by running * the C code in the sequel (commented). It uses the C macros declared in * the C heather in order to guarantee that the C interface and the assebly * code are "aligned". I avoided the use of an assebler preprocessor since * they are not so standard and moreover using macro espressions makes the * assembly debugging more difficult. * * #include #include "powerpc_sys5.h" int main() { using namespace std; int i; cout << ".set LR_SAVE, " << PPC_LR_SAVE << endl; cout << ".set BLOCKI_FSIZE, " << QUICKTHREADS_BLOCKI_FRAME_SIZE << endl; cout << ".set BLOCKI_CR_SAVE, " << QUICKTHREADS_BLOCKI_CR_SAVE << endl; cout << ".set BLOCK_FSIZE, " << QUICKTHREADS_BLOCK_FRAME_SIZE << endl; cout << endl; for(i=0; i<12; i++) cout << ".set PAR_" << i << ", " << PPC_PAR(i) << endl; cout << endl; i = 13; cout << ".set GPR_SAVE_" << i << ", " << QUICKTHREADS_BLOCKI_GPR_SAVE(i) << endl; cout << endl; for(i=31; i>13; i--) cout << ".set FPR_SAVE_" << i << ", " << QUICKTHREADS_BLOCK_FPR_SAVE(i) << endl; cout << endl; cout << ".set VARGS_BKOFF, " << QUICKTHREADS_VARGS_BKOFF << endl; cout << endl << endl << endl; for(i=31; i>13; i--) cout << "\tstfd\tf" << i << ",FPR_SAVE_" << i << "(%r1)" << endl; cout << endl; for(i=31; i>13; i--) cout << "\tlfd \tf" << i << ",FPR_SAVE_" << i << "(%r1)" << endl; cout << endl << endl << endl; return 0; } * * * */ #if 0 .text .align 4 .globl qt_block .globl _qt_block .globl qt_blocki .globl _qt_blocki .globl qt_abort .globl _qt_abort .globl qt_start .globl _qt_start .globl qt_vstart .globl _qt_vstart .set LR_SAVE, 4 .set BLOCKI_FSIZE, 96 .set BLOCKI_CR_SAVE, 8 /* CR is saved into the callee's stack frame */ .set BLOCK_FSIZE, 160 .set PAR_0, 8 .set PAR_1, 12 .set PAR_2, 16 .set PAR_3, 20 .set PAR_4, 24 .set PAR_5, 28 .set PAR_6, 32 .set PAR_7, 36 .set PAR_8, 40 .set PAR_9, 44 .set PAR_10, 48 .set PAR_11, 52 .set GPR_SAVE_13, 20 .set FPR_SAVE_31, 152 .set FPR_SAVE_30, 144 .set FPR_SAVE_29, 136 .set FPR_SAVE_28, 128 .set FPR_SAVE_27, 120 .set FPR_SAVE_26, 112 .set FPR_SAVE_25, 104 .set FPR_SAVE_24, 96 .set FPR_SAVE_23, 88 .set FPR_SAVE_22, 80 .set FPR_SAVE_21, 72 .set FPR_SAVE_20, 64 .set FPR_SAVE_19, 56 .set FPR_SAVE_18, 48 .set FPR_SAVE_17, 40 .set FPR_SAVE_16, 32 .set FPR_SAVE_15, 24 .set FPR_SAVE_14, 16 /* various offsets used by "qt_varg" */ .set P_T, PAR_0 .set P_STARTUP, PAR_1 .set P_USERF, PAR_2 .set P_CLEANUP, PAR_3 /* the offset used to move back the linkage area to be adiacent to * the variant argument list before calling "userf(...). * Skip "t", "startup", "userf", "cleanup" and first * 8 parameters (since they are passed via registers) */ .set VARGS_BKOFF, 48 /* location where "t" and "cleanup" are saved (with respect of * the stack frame base) */ .set P_T_SAVE, -4 .set P_CLEANUP_SAVE, -8 #endif /* Block the current thread saving all integer non volatile registers and * start a new thread. */ #if 0 .if 0 #endif void *qt_blocki (void *helper, void *a0, void *a1, void *newthread); asm void *qt_blocki (void *helper, void *a0, void *a1, void *newthread) { #if 0 .endif #endif #if 0 qt_blocki: _qt_blocki: #endif /* prolog code */ stwu %r1,-BLOCKI_FSIZE(%r1) /* allocate the stack frame */ mflr %r0 /* return addr in r0 */ mfcr %r11 /* CR in r11 */ stw %r0,LR_SAVE+BLOCKI_FSIZE(%r1) /* save return addr in the stack */ stw %r11,BLOCKI_CR_SAVE(%r1) /* save CR in the stack */ stmw %r13,GPR_SAVE_13(%r1) /* save non-volatile reg */ /* call helper(qt_t *old, void *a0, void *a1) */ mtlr %r3 /* "helper" addr in the link reg */ mr %r3,%r1 /* current thread (i.e. the SP) in arg "old" */ mr %r1,%r6 /* swap to the new thread (i.e. to its SP) */ blrl /* jump to "helper" */ /* the "helper" return value is returned (since r3 is not changed) */ /* epilog code: return to the new thread's "qt_blocki" caller */ lmw %r13,GPR_SAVE_13(%r1) /* restore non-volatile reg */ lwz %r0,LR_SAVE+BLOCKI_FSIZE(%r1) /* recover return addr */ lwz %r11,BLOCKI_CR_SAVE(%r1) /* recover CR */ mtlr %r0 /* return address in the link reg */ mtcr %r11 /* restore CR */ addi %r1,%r1,BLOCKI_FSIZE /* free the stack frame */ blr /* return */ #if 0 .if 0 #endif } #if 0 .endif #endif /* Abort the current thread and start a new thread. */ #if 0 .if 0 #endif void qt_abort (void *helper, void *a0, void *a1, void *newthread); asm void qt_abort (void *helper, void *a0, void *a1, void *newthread) { #if 0 .endif #endif #if 0 qt_abort: _qt_abort: #endif /* prolog code */ /* there is no prolog. It will never come back */ /* call helper(qt_t *old, void *a0, void *a1) */ mtlr %r3 /* "helper" addr in the link reg */ mr %r1,%r6 /* swap to the new thread (i.e. to its SP) */ /* we don't need to set "old", we can pass just garbage. Actually, since r3 is not changed, "old" is set to "helper" (don't care) */ blrl /* call "helper" */ /* the "helper" return value is returned (since r3 is not changed) */ /* epilog code: return to the new thread's "qt_blocki" caller */ lmw %r13,GPR_SAVE_13(%r1) /* restore non-volatile reg */ lwz %r0,LR_SAVE+BLOCKI_FSIZE(%r1) /* recover return addr */ lwz %r11,BLOCKI_CR_SAVE(%r1) /* recover CR */ mtlr %r0 /* return address in the link reg */ mtcr %r11 /* restore CR */ addi %r1,%r1,BLOCKI_FSIZE /* free the stack frame */ blr /* return */ #if 0 .if 0 #endif } #if 0 .endif #endif /* Block the current thread saving all non volatile registers and start * a new thread. */ #if 0 .if 0 #endif void *qt_block (void *helper, void *a0, void *a1, void *newthread); asm void *qt_block (void *helper, void *a0, void *a1, void *newthread) { #if 0 .endif #endif # if 0 qt_block: _qt_block: #endif /* prolog code */ stwu %r1,-BLOCK_FSIZE(%r1) /* allocate the stack frame */ mflr %r0 /* return addr in r0 */ stw %r0,LR_SAVE+BLOCK_FSIZE(%r1) /* save return addr in the stack */ /* save non-volatile fp reg */ stfd %f31,FPR_SAVE_31(%r1) stfd %f30,FPR_SAVE_30(%r1) stfd %f29,FPR_SAVE_29(%r1) stfd %f28,FPR_SAVE_28(%r1) stfd %f27,FPR_SAVE_27(%r1) stfd %f26,FPR_SAVE_26(%r1) stfd %f25,FPR_SAVE_25(%r1) stfd %f24,FPR_SAVE_24(%r1) stfd %f23,FPR_SAVE_23(%r1) stfd %f22,FPR_SAVE_22(%r1) stfd %f21,FPR_SAVE_21(%r1) stfd %f20,FPR_SAVE_20(%r1) stfd %f19,FPR_SAVE_19(%r1) stfd %f18,FPR_SAVE_18(%r1) stfd %f17,FPR_SAVE_17(%r1) stfd %f16,FPR_SAVE_16(%r1) stfd %f15,FPR_SAVE_15(%r1) stfd %f14,FPR_SAVE_14(%r1) /* block the thread */ bl qt_blocki /* the thread is going to be resumed */ /* restore non-volatile fp reg */ lfd %f31,FPR_SAVE_31(%r1) lfd %f30,FPR_SAVE_30(%r1) lfd %f29,FPR_SAVE_29(%r1) lfd %f28,FPR_SAVE_28(%r1) lfd %f27,FPR_SAVE_27(%r1) lfd %f26,FPR_SAVE_26(%r1) lfd %f25,FPR_SAVE_25(%r1) lfd %f24,FPR_SAVE_24(%r1) lfd %f23,FPR_SAVE_23(%r1) lfd %f22,FPR_SAVE_22(%r1) lfd %f21,FPR_SAVE_21(%r1) lfd %f20,FPR_SAVE_20(%r1) lfd %f19,FPR_SAVE_19(%r1) lfd %f18,FPR_SAVE_18(%r1) lfd %f17,FPR_SAVE_17(%r1) lfd %f16,FPR_SAVE_16(%r1) lfd %f15,FPR_SAVE_15(%r1) lfd %f14,FPR_SAVE_14(%r1) lwz %r0,LR_SAVE+BLOCK_FSIZE(%r1) /* recover return addr */ mtlr %r0 /* return address in the link reg */ addi %r1,%r1,BLOCK_FSIZE /* free the stack frame */ blr /* return */ #if 0 .if 0 #endif } #if 0 .endif #endif /* Start a single argument thread using parameters preloaded in the stack * during thread initialization (see comments on stack initialization in the * heather file). * * Executes: * * only(u, t, userf); */ #if 0 .if 0 #endif void qt_start(void); asm void qt_start(void) { #if 0 .endif #endif #if 0 qt_start: _qt_start: #endif lwz %r3,PAR_0(%r1) /* "u" in r3 */ lwz %r4,PAR_1(%r1) /* "t" in r4 */ lwz %r5,PAR_2(%r1) /* "userf" in r5 */ lwz %r6,PAR_3(%r1) /* "only" in r6 */ mtlr %r6 /* "only" address in the link reg */ /* call only(u, t, userf) */ blrl /* jump to "only" */ /* error if it returns */ b qt_error /* dead code (some inline asm "wants" the epilog, or they genetare it) */ blr #if 0 .if 0 #endif } #if 0 .endif #endif /* Start a variant argument thread using parameters preloaded in the stack * during thread initialization (see comments on stack initialization in the * heather file). * * Executes: * * startup(t); * userf_return = userf(...); * cleanup(pt, userf_return); * ***** Stack layout on start ***** backchain -> STACK BOTTOM (higher address) +==========================+ backchain - 4 -> | | + LOCAL VARIABLES AREA + .............. + + | | +--------------------------+ | | + ALIGNMEBNT PAD + .............. + (if needed) + | | +--------------------------+ | | arg(n) + + | | + VARIABLE ARGUMENT LIST + .............. + for userf call + SP + PAR(5) -> | | arg(1) + + SP + PAR(4) -> | | arg(0) +--------------------------+ SP + PAR(3) -> | | cleanup par + + SP + PAR(2) -> | | userf par + PARAMETER AREA + SP + PAR(1) -> | | startup par + + SP + PAR(0) -> | | t par +--------------------------+ | | + LINKAGE AREA + SP -> | | +==========================+ STACK TOP (lower address) Stack grows down | V ***** Stack layout before call userf ***** backchain -> STACK BOTTOM (higher address) +==========================+ backchain - 4 -> | | + LOCAL VARIABLES AREA + .............. + + | | +--------------------------+ | | + ALIGNMEBNT PAD + .............. + (if needed) + | | +--------------------------+ | | arg(n) + + | | + VARIABLE ARGUMENT LIST + .............. + for userf call + SP + PAR(1) -> | | arg(1) + + SP + PAR(0) -> | | arg(0) +--------------------------+ | | + LINKAGE AREA + SP -> | | +==========================+ STACK TOP (lower address) Stack grows down | V * To call "userf(...)", the argument list must be adiacent to the linkage * area. Instead of copy the argument list, we move back the linkage area * (actually, we just increase the SP and copy the backchain). "t" and * "cleanup" are saved in a local variable area in order to call * cleanup(pt, userf_return). */ #if 0 .if 0 #endif void qt_vstart(void); asm void qt_vstart(void) { #if 0 .endif #endif #if 0 qt_vstart: _qt_vstart: #endif /* NOTICE: the callee routines could save parameter registers in the caller's * stack parameter area. We put "t" in PAR(0) in such a way, if startup(t) * will save "t", it will be saved on the same location thus not delething * any other parameter. */ /* since we will move back the linckage area (to make it adiacent to the * parameter list), we need to save "t" and "cleanup". We have made room for * this on the bottom of the stack frame. */ /* save parameters in the local variable area */ lwz %r11,0(%r1) /* get the backchain */ lwz %r3,P_T(%r1) lwz %r4,P_CLEANUP(%r1) stw %r3,P_T_SAVE(%r11) /* save "pt" */ stw %r4,P_CLEANUP_SAVE(%r11) /* save "cleanup" */ /* call startup(t) */ lwz %r5,P_STARTUP(%r1) mtlr %r5 blrl /* call "startup" */ /* call userf(...) */ lwz %r11,0(%r1) /* reload backchain (r11 is volatile) */ lwz %r4,P_USERF(%r1) /* load "userf" */ mtlr %r4 /* first eight parameter of the variant list must be copyed in * GPR3-GPR10. There is a four places offset due to "t", "startup", * userf" and "cleanup" */ lwz %r3,PAR_4(%r1) lwz %r4,PAR_5(%r1) lwz %r5,PAR_6(%r1) lwz %r6,PAR_7(%r1) lwz %r7,PAR_8(%r1) lwz %r8,PAR_9(%r1) lwz %r9,PAR_10(%r1) lwz %r10,PAR_11(%r1) /* move the linkage area to be adiacent to the argument list */ stw %r11,VARGS_BKOFF(%r1) /* copy backchain */ addi %r1,%r1,VARGS_BKOFF /* move back the stack */ blrl /* call "userf" */ /* call qt_cleanup(void *pt, void *vuserf_return) */ lwz %r11,0(%r1) /* reload backchain (r11 is volatile) */ mr %r4,%r3 /* push "userf" return as 2nd parameter */ lwz %r3,P_T_SAVE(%r11) /* reload "pt" */ lwz %r5,P_CLEANUP_SAVE(%r11) /* reload "cleanup" */ mtlr %r5 blrl b qt_error /* dead code (some inline asm "wants" the epilog, or they genetare it) */ blr #if 0 .if 0 #endif } #if 0 .endif #endif