1/* powerpc_mach.s -- assembly support. */ 2 3/* 4 * QuickThreads -- Threads-building toolkit. 5 * Copyright (c) 1993 by David Keppel 6 * 7 * Permission to use, copy, modify and distribute this software and 8 * its documentation for any purpose and without fee is hereby 9 * granted, provided that the above copyright notice and this notice 10 * appear in all copies. This software is provided as a 11 * proof-of-concept and for demonstration purposes; there is no 12 * representation about the suitability of this software for any 13 * purpose. 14 15 16 * PowerPC-Mach thread switching module. 17 * Darwin (MacOS X) assembly 18 * 19 * NOTICE: Syntax for register names is not the GNU one. Register are 20 * named "rx" and "fx", not "%rx" and "%fx" as usual for the GNU "as" tool. 21 * Darwin "as" tool is based on GNU "as" but follows the "official" PowerPC 22 * syntax. 23 * 24 * 25 * This software is largely based on the original PowerPC-Linux porting 26 * developed by Ken Aaker <kenaaker@silverbacksystems.com> 27 * 28 * Marco Bucci <marco.bucci@inwind.it> 29 * December 2002 30 * 31 */ 32 33 34/* 35 * 36 * PowerPC Register convections: 37 * 38 * r0 volatile 39 * r1 SP 40 * r2 system reserved 41 * r3-r4 volatile for parameter passing and function return 42 * r5-r10 volatile for parameter passing 43 * r11-r12 volatile 44 * r13-r14 non volatile registers 45 * f0 volatile 46 * f1 volatile for parameter passing and function return 47 * f2-f13 volatile for parameter passing 48 * f14-f31 non volatile 49 * 50 * cr2-cr4 non volatile 51 * 52 * 53 * See on the heather file for more documentation. 54 * 55 * 56 * 57 * IMPLEMENTATION NOTES 58 * 59 * 60 * 1) Condition register saving 61 * On most machines, the condition code register is caller-save. 62 * On the PPC, the condition code register is callee-save, so the 63 * thread context switch must preserve it. 64 * 65 * 66 * 2) Floating point registers saving 67 * On resuming a thread, floating point registers are or not restored just 68 * depending on which block routine suspended the thread (i.e. regardless 69 * whether "qt_block", "qt_blocki" or "qt_abort" is used to resume it). 70 * This behaviour is obtained by implementing "qt_block" by means af a nested 71 * call to "qt_blocki". As a result, the blocking of a thread always goes 72 * and returns through "qt_blocki and, if a thread was blocked by "qt_block", 73 * its execution resumes from the floating point restoring code on exit 74 * of "qt_block". 75 * 76 * Thanks to David Keppel that explained me this "simple" trick. 77 * 78 * 79 * 3) C languace code debugging 80 * This software was developed and debugged using the Metrowerks 81 * Code Warrior PPC integrated assembler. It can be still used with the 82 * Code Warrior compiler by means of the file "powerpc_mach_asm_debug.c" 83 * that include it. 84 * In order to avoid "copy and paste" bugs, and make easyer the maintaining, 85 * I made the minimal changes, so you can find some strange code as: 86 * 87 * #if 0 88 * .if 0 89 * C code here 90 * .endif 91 * #endif 92 * 93 * This is just to embed some C code that is needed by the Code Warrior 94 * integrated assembler. 95 * 96 * 97 * 4) Assembly constants generation 98 * Constants used in the assembly code are generated by running 99 * the C code in the sequel (commented). It uses the C macros declared in 100 * the C heather in order to guarantee that the C interface and the assebly 101 * code are "aligned". I avoided the use of an assebler preprocessor since 102 * they are not so standard and moreover using macro espressions makes the 103 * assembly debugging more difficult. 104 * 105 * 106 107 108#include <iostream> 109#include "powerpc_mach.h" 110 111int main() 112{ 113 using namespace std; 114 115 int i; 116 117 cout << ".set LR_SAVE, " << PPC_LR_SAVE << endl; 118 cout << ".set CR_SAVE, " << PPC_CR_SAVE << endl; 119 cout << ".set BLOCKI_FSIZE, " << QUICKTHREADS_BLOCKI_FRAME_SIZE << endl; 120 cout << ".set BLOCK_FSIZE, " << QUICKTHREADS_BLOCK_FRAME_SIZE << endl; 121 122 cout << endl; 123 for(i=0; i<12; i++) 124 cout << ".set PAR_" << i << ", " << PPC_PAR(i) << endl; 125 126 cout << endl; 127 i = 13; 128 cout << ".set GPR_SAVE_" << i << ", " << QUICKTHREADS_BLOCKI_GPR_SAVE(i) << endl; 129 130 cout << endl; 131 for(i=31; i>13; i--) 132 cout << ".set FPR_SAVE_" << i << ", " << QUICKTHREADS_BLOCK_FPR_SAVE(i) << endl; 133 134 cout << endl; 135 cout << ".set VARGS_BKOFF, " << QUICKTHREADS_VARGS_BKOFF << endl; 136 137 138 cout << endl << endl << endl; 139 140 for(i=31; i>13; i--) 141 cout << "\tstfd\tf" << i << ",FPR_SAVE_" << i << "(r1)" << endl; 142 143 cout << endl; 144 for(i=31; i>13; i--) 145 cout << "\tlfd \tf" << i << ",FPR_SAVE_" << i << "(r1)" << endl; 146 147 cout << endl << endl << endl; 148 149 150 return 0; 151} 152 153 154 * 155 * 156 * 157 */ 158 159 160#if 0 161 162 .text 163 .align 4 164 165 .globl qt_block 166 .globl _qt_block 167 .globl qt_blocki 168 .globl _qt_blocki 169 .globl qt_abort 170 .globl _qt_abort 171 .globl qt_start 172 .globl _qt_start 173 .globl qt_vstart 174 .globl _qt_vstart 175 176 177.set LR_SAVE, 8 178.set CR_SAVE, 4 179.set BLOCKI_FSIZE, 128 180.set BLOCK_FSIZE, 192 181 182.set PAR_0, 24 183.set PAR_1, 28 184.set PAR_2, 32 185.set PAR_3, 36 186.set PAR_4, 40 187.set PAR_5, 44 188.set PAR_6, 48 189.set PAR_7, 52 190.set PAR_8, 56 191.set PAR_9, 60 192.set PAR_10, 64 193.set PAR_11, 68 194 195.set GPR_SAVE_13, 52 196 197.set FPR_SAVE_31, 184 198.set FPR_SAVE_30, 176 199.set FPR_SAVE_29, 168 200.set FPR_SAVE_28, 160 201.set FPR_SAVE_27, 152 202.set FPR_SAVE_26, 144 203.set FPR_SAVE_25, 136 204.set FPR_SAVE_24, 128 205.set FPR_SAVE_23, 120 206.set FPR_SAVE_22, 112 207.set FPR_SAVE_21, 104 208.set FPR_SAVE_20, 96 209.set FPR_SAVE_19, 88 210.set FPR_SAVE_18, 80 211.set FPR_SAVE_17, 72 212.set FPR_SAVE_16, 64 213.set FPR_SAVE_15, 56 214.set FPR_SAVE_14, 48 215 216 217/* various offsets used by "qt_varg" */ 218.set P_T, PAR_0 219.set P_STARTUP, PAR_1 220.set P_USERF, PAR_2 221.set P_CLEANUP, PAR_3 222 /* the offset used to move back the linkage area to be adiacent to 223 * the variant argument list before calling "userf(...) */ 224.set VARGS_BKOFF, 16 /* skip "t", "startup", "userf" and "cleanup" */ 225 226 /* location where "t" and "cleanup" are saved (with respect of 227 * the stack frame base) */ 228.set P_T_SAVE, -4 229.set P_CLEANUP_SAVE, -8 230 231#endif 232 233 234 235/* Block the current thread saving all integer non volatile registers and 236 * start a new thread. 237 */ 238#if 0 239.if 0 240#endif 241void *qt_blocki (void *helper, void *a0, void *a1, void *newthread); 242asm void *qt_blocki (void *helper, void *a0, void *a1, void *newthread) 243{ 244#if 0 245.endif 246#endif 247 248#if 0 249qt_blocki: 250_qt_blocki: 251#endif 252/* prolog code */ 253 stwu r1,-BLOCKI_FSIZE(r1) /* allocate the stack frame */ 254 mflr r0 /* return addr in r0 */ 255 mfcr r11 /* CR in r11 */ 256 stw r0,LR_SAVE+BLOCKI_FSIZE(r1) /* save return addr in the stack */ 257 stw r11,CR_SAVE+BLOCKI_FSIZE(r1) /* save CR in the stack */ 258 stmw r13,GPR_SAVE_13(r1) /* save non-volatile reg */ 259 260/* call helper(qt_t *old, void *a0, void *a1) */ 261 mtlr r3 /* "helper" addr in the link reg */ 262 mr r3,r1 /* current thread (i.e. the SP) in arg "old" */ 263 mr r1,r6 /* swap to the new thread (i.e. to its SP) */ 264 blrl /* jump to "helper" */ 265/* the "helper" return value is returned (since r3 is not changed) */ 266 267/* epilog code: return to the new thread's "qt_blocki" caller */ 268 lmw r13,GPR_SAVE_13(r1) /* restore non-volatile reg */ 269 lwz r0,LR_SAVE+BLOCKI_FSIZE(r1) /* recover return addr */ 270 lwz r11,CR_SAVE+BLOCKI_FSIZE(r1) /* recover CR */ 271 mtlr r0 /* return address in the link reg */ 272 mtcr r11 /* restore CR */ 273 addi r1,r1,BLOCKI_FSIZE /* free the stack frame */ 274 blr /* return */ 275 276#if 0 277.if 0 278#endif 279} 280#if 0 281.endif 282#endif 283 284 285 286/* Abort the current thread and start a new thread. 287 */ 288#if 0 289.if 0 290#endif 291void qt_abort (void *helper, void *a0, void *a1, void *newthread); 292asm void qt_abort (void *helper, void *a0, void *a1, void *newthread) 293{ 294#if 0 295.endif 296#endif 297 298#if 0 299qt_abort: 300_qt_abort: 301#endif 302/* prolog code */ 303/* there is no prolog. It will never come back */ 304 305/* call helper(qt_t *old, void *a0, void *a1) */ 306 mtlr r3 /* "helper" addr in the link reg */ 307 mr r1,r6 /* swap to the new thread (i.e. to its SP) */ 308/* we don't need to set "old", we can pass just garbage. Actually, since r3 309 is not changed, "old" is set to "helper" (don't care) */ 310 blrl /* call "helper" */ 311/* the "helper" return value is returned (since r3 is not changed) */ 312 313/* epilog code: return to the new thread's "qt_blocki" caller */ 314 lmw r13,GPR_SAVE_13(r1) /* restore non-volatile reg */ 315 lwz r0,LR_SAVE+BLOCKI_FSIZE(r1) /* recover return addr */ 316 lwz r11,CR_SAVE+BLOCKI_FSIZE(r1) /* recover CR */ 317 mtlr r0 /* return address in the link reg */ 318 mtcr r11 /* restore CR */ 319 addi r1,r1,BLOCKI_FSIZE /* free the stack frame */ 320 blr /* return */ 321 322#if 0 323.if 0 324#endif 325} 326#if 0 327.endif 328#endif 329 330 331 332/* Block the current thread saving all non volatile registers and start 333 * a new thread. 334 */ 335#if 0 336.if 0 337#endif 338void *qt_block (void *helper, void *a0, void *a1, void *newthread); 339asm void *qt_block (void *helper, void *a0, void *a1, void *newthread) 340{ 341#if 0 342.endif 343#endif 344 345# if 0 346qt_block: 347_qt_block: 348#endif 349/* prolog code */ 350 stwu r1,-BLOCK_FSIZE(r1) /* allocate the stack frame */ 351 mflr r0 /* return addr in r0 */ 352 stw r0,LR_SAVE+BLOCK_FSIZE(r1) /* save return addr in the stack */ 353 354/* save non-volatile fp reg */ 355 stfd f31,FPR_SAVE_31(r1) 356 stfd f30,FPR_SAVE_30(r1) 357 stfd f29,FPR_SAVE_29(r1) 358 stfd f28,FPR_SAVE_28(r1) 359 stfd f27,FPR_SAVE_27(r1) 360 stfd f26,FPR_SAVE_26(r1) 361 stfd f25,FPR_SAVE_25(r1) 362 stfd f24,FPR_SAVE_24(r1) 363 stfd f23,FPR_SAVE_23(r1) 364 stfd f22,FPR_SAVE_22(r1) 365 stfd f21,FPR_SAVE_21(r1) 366 stfd f20,FPR_SAVE_20(r1) 367 stfd f19,FPR_SAVE_19(r1) 368 stfd f18,FPR_SAVE_18(r1) 369 stfd f17,FPR_SAVE_17(r1) 370 stfd f16,FPR_SAVE_16(r1) 371 stfd f15,FPR_SAVE_15(r1) 372 stfd f14,FPR_SAVE_14(r1) 373/* block the thread */ 374 bl qt_blocki 375/* the thread is going to be resumed */ 376/* restore non-volatile fp reg */ 377 lfd f31,FPR_SAVE_31(r1) 378 lfd f30,FPR_SAVE_30(r1) 379 lfd f29,FPR_SAVE_29(r1) 380 lfd f28,FPR_SAVE_28(r1) 381 lfd f27,FPR_SAVE_27(r1) 382 lfd f26,FPR_SAVE_26(r1) 383 lfd f25,FPR_SAVE_25(r1) 384 lfd f24,FPR_SAVE_24(r1) 385 lfd f23,FPR_SAVE_23(r1) 386 lfd f22,FPR_SAVE_22(r1) 387 lfd f21,FPR_SAVE_21(r1) 388 lfd f20,FPR_SAVE_20(r1) 389 lfd f19,FPR_SAVE_19(r1) 390 lfd f18,FPR_SAVE_18(r1) 391 lfd f17,FPR_SAVE_17(r1) 392 lfd f16,FPR_SAVE_16(r1) 393 lfd f15,FPR_SAVE_15(r1) 394 lfd f14,FPR_SAVE_14(r1) 395 396 lwz r0,LR_SAVE+BLOCK_FSIZE(r1) /* recover return addr */ 397 mtlr r0 /* return address in the link reg */ 398 addi r1,r1,BLOCK_FSIZE /* free the stack frame */ 399 blr /* return */ 400 401#if 0 402.if 0 403#endif 404} 405#if 0 406.endif 407#endif 408 409 410 411/* Start a single argument thread using parameters preloaded in the stack 412 * during thread initialization (see comments on stack initialization in the 413 * heather file). 414 * 415 * Executes: 416 * 417 * only(u, t, userf); 418 */ 419#if 0 420.if 0 421#endif 422void qt_start(void); 423asm void qt_start(void) 424{ 425#if 0 426.endif 427#endif 428 429#if 0 430qt_start: 431_qt_start: 432#endif 433 lwz r3,PAR_0(r1) /* "u" in r3 */ 434 lwz r4,PAR_1(r1) /* "t" in r4 */ 435 lwz r5,PAR_2(r1) /* "userf" in r5 */ 436 lwz r6,PAR_3(r1) /* "only" in r6 */ 437 mtlr r6 /* "only" address in the link reg */ 438/* call only(u, t, userf) */ 439 blrl /* jump to "only" */ 440/* error if it returns */ 441 b _qt_error 442/* dead code (some inline asm "wants" the epilog, or they genetare it) */ 443 blr 444 445#if 0 446.if 0 447#endif 448} 449#if 0 450.endif 451#endif 452 453 454 455/* Start a variant argument thread using parameters preloaded in the stack 456 * during thread initialization (see comments on stack initialization in the 457 * heather file). 458 * 459 * Executes: 460 * 461 * startup(t); 462 * userf_return = userf(...); 463 * cleanup(pt, userf_return); 464 * 465 466 467 ***** Stack layout on start ***** 468 469 470 backchain -> STACK BOTTOM (higher address) 471 +==========================+ 472 backchain - 4 -> | | 473 + LOCAL VARIABLES AREA + 474 .............. 475 + + 476 | | 477 +--------------------------+ 478 | | 479 + ALIGNMEBNT PAD + 480 .............. 481 + (if needed) + 482 | | 483 +--------------------------+ 484 | | arg(n) 485 + + 486 | | 487 + VARIABLE ARGUMENT LIST + 488 .............. 489 + for userf call + 490 SP + PAR(5) -> | | arg(1) 491 + + 492 SP + PAR(4) -> | | arg(0) 493 +--------------------------+ 494 SP + PAR(3) -> | | cleanup par 495 + + 496 SP + PAR(2) -> | | userf par 497 + PARAMETER AREA + 498 SP + PAR(1) -> | | startup par 499 + + 500 SP + PAR(0) -> | | t par 501 +--------------------------+ 502 | | 503 + LINKAGE AREA + 504 SP -> | | 505 +==========================+ 506 STACK TOP (lower address) 507 508 Stack grows down 509 | 510 V 511 512 513 514 ***** Stack layout before call userf ***** 515 516 517 backchain -> STACK BOTTOM (higher address) 518 +==========================+ 519 backchain - 4 -> | | 520 + LOCAL VARIABLES AREA + 521 .............. 522 + + 523 | | 524 +--------------------------+ 525 | | 526 + ALIGNMEBNT PAD + 527 .............. 528 + (if needed) + 529 | | 530 +--------------------------+ 531 | | arg(n) 532 + + 533 | | 534 + VARIABLE ARGUMENT LIST + 535 .............. 536 + for userf call + 537 SP + PAR(1) -> | | arg(1) 538 + + 539 SP + PAR(0) -> | | arg(0) 540 +--------------------------+ 541 | | 542 + LINKAGE AREA + 543 SP -> | | 544 +==========================+ 545 STACK TOP (lower address) 546 547 Stack grows down 548 | 549 V 550 551 552 * To call "userf(...)", the argument list must be adiacent to the linkage 553 * area. Instead of copy the argument list, we move back the linkage area 554 * (actually, we just increase the SP and copy the backchain). "t" and 555 * "cleanup" are saved in a local variable area in order to call 556 * cleanup(pt, userf_return). 557 558*/ 559 560 561#if 0 562.if 0 563#endif 564void qt_vstart(void); 565asm void qt_vstart(void) 566{ 567#if 0 568.endif 569#endif 570 571#if 0 572qt_vstart: 573_qt_vstart: 574#endif 575/* NOTICE: the callee routines could save parameter registers in the caller's 576 * stack parameter area. We put "t" in PAR(0) in such a way, if startup(t) 577 * will save "t", it will be saved on the same location thus not delething 578 * any other parameter. 579 */ 580 581/* since we will move back the linckage area (to make it adiacent to the 582 * parameter list), we need to save "t" and "cleanup". We have made room for 583 * this on the bottom of the stack frame. */ 584 585/* save parameters in the local variable area */ 586 lwz r11,0(r1) /* get the backchain */ 587 lwz r3,P_T(r1) 588 lwz r4,P_CLEANUP(r1) 589 stw r3,P_T_SAVE(r11) /* save "pt" */ 590 stw r4,P_CLEANUP_SAVE(r11) /* save "cleanup" */ 591 592/* call startup(t) */ 593 lwz r5,P_STARTUP(r1) 594 mtlr r5 595 blrl /* call "startup" */ 596 597/* call userf(...) */ 598 lwz r11,0(r1) /* reload backchain (r11 is volatile) */ 599 lwz r4,P_USERF(r1) /* load "userf" */ 600 mtlr r4 601 602 /* first eight parameter of the variant list must be copyed in 603 * GPR3-GPR10. There is a four places offset due to "t", "startup", 604 * userf" and "cleanup" */ 605 606 lwz r3,PAR_4(r1) 607 lwz r4,PAR_5(r1) 608 lwz r5,PAR_6(r1) 609 lwz r6,PAR_7(r1) 610 lwz r7,PAR_8(r1) 611 lwz r8,PAR_9(r1) 612 lwz r9,PAR_10(r1) 613 lwz r10,PAR_11(r1) 614 615 616 /* move the linkage area to be adiacent to the argument list */ 617 stw r11,VARGS_BKOFF(r1) /* copy backchain */ 618 addi r1,r1,VARGS_BKOFF /* move back the stack */ 619 620 blrl /* call "userf" */ 621 622/* call qt_cleanup(void *pt, void *vuserf_return) */ 623 lwz r11,0(r1) /* reload backchain (r11 is volatile) */ 624 625 mr r4,r3 /* push "userf" return as 2nd parameter */ 626 lwz r3,P_T_SAVE(r11) /* reload "pt" */ 627 lwz r5,P_CLEANUP_SAVE(r11) /* reload "cleanup" */ 628 mtlr r5 629 blrl 630 b _qt_error 631/* dead code (some inline asm "wanst" the epilog, or they genetare it) */ 632 blr 633 634#if 0 635.if 0 636#endif 637} 638#if 0 639.endif 640#endif 641 642