1/* powerpc-sys5.s -- assembly support. */ 2 3/* 4 * QuickThreads -- Threads-building toolkit. 5 * Copyright (c) 1993 by David Keppel 6 * 7 * Permission to use, copy, modify and distribute this software and 8 * its documentation for any purpose and without fee is hereby 9 * granted, provided that the above copyright notice and this notice 10 * appear in all copies. This software is provided as a 11 * proof-of-concept and for demonstration purposes; there is no 12 * representation about the suitability of this software for any 13 * purpose. 14 15 16 * PowerPC-System V thread switching module. 17 * 18 * This software is largely based on the original PowerPC-Linux porting 19 * developed by Ken Aaker <kenaaker@silverbacksystems.com> 20 * 21 * Marco Bucci <marco.bucci@inwind.it> 22 * December 2002 23 * 24 */ 25 26 27/* 28 * 29 * PowerPC Register convections: 30 * 31 * r0 volatile 32 * r1 SP 33 * r2 system reserved 34 * r3-r4 volatile for parameter passing and function return 35 * r5-r10 volatile for parameter passing 36 * r11-r12 volatile 37 * r13-r14 non volatile registers 38 * f0 volatile 39 * f1 volatile for parameter passing and function return 40 * f2-f13 volatile for parameter passing 41 * f14-f31 non volatile 42 * 43 * cr2-cr4 non volatile 44 * 45 * 46 * See on the heather file for more documentation. 47 * 48 * 49 * 50 * IMPLEMENTATION NOTES 51 * 52 * 53 * 1) Condition register saving 54 * On most machines, the condition code register is caller-save. 55 * On the PPC, the condition code register is callee-save, so the 56 * thread context switch must preserve it. 57 * 58 * 59 * 2) Floating point registers saving 60 * On resuming a thread, floating point registers are or not restored just 61 * depending on which block routine suspended the thread (i.e. regardless 62 * whether "qt_block", "qt_blocki" or "qt_abort" is used to resume it). 63 * This behaviour is obtained by implementing "qt_block" by means af a nested 64 * call to "qt_blocki". As a result, the blocking of a thread always goes 65 * and returns through "qt_blocki and, if a thread was blocked by "qt_block", 66 * its execution resumes from the floating point restoring code on exit 67 * of "qt_block". 68 * 69 * Thanks to David Keppel that explained me this "simple" trick. 70 * 71 * 72 * 3) C languace code debugging 73 * The original version of this software was developed and debugged under 74 * MacOS X using the Metrowerks Code Warrior PPC integrated assembler. 75 * It could be still used with a C inline assembler by means of a suitable 76 * file to include it. 77 * In order to avoid "copy and paste" bugs, and make easyer the maintaining, 78 * I made the minimal changes, so you can find some strange code as: 79 * 80 * #if 0 81 * .if 0 82 * C code here 83 * .endif 84 * #endif 85 * 86 * This is just to embed some C code that is needed by the Code Warrior 87 * integrated assembler. 88 * 89 * 90 * 4) Assembly constants generation 91 * Constants used in the assembly code are generated by running 92 * the C code in the sequel (commented). It uses the C macros declared in 93 * the C heather in order to guarantee that the C interface and the assebly 94 * code are "aligned". I avoided the use of an assebler preprocessor since 95 * they are not so standard and moreover using macro espressions makes the 96 * assembly debugging more difficult. 97 * 98 * 99 100 101#include <iostream> 102#include "powerpc_sys5.h" 103 104int main() 105{ 106 using namespace std; 107 108 int i; 109 110 cout << ".set LR_SAVE, " << PPC_LR_SAVE << endl; 111 cout << ".set BLOCKI_FSIZE, " << QUICKTHREADS_BLOCKI_FRAME_SIZE << endl; 112 cout << ".set BLOCKI_CR_SAVE, " << QUICKTHREADS_BLOCKI_CR_SAVE << endl; 113 cout << ".set BLOCK_FSIZE, " << QUICKTHREADS_BLOCK_FRAME_SIZE << endl; 114 115 cout << endl; 116 for(i=0; i<12; i++) 117 cout << ".set PAR_" << i << ", " << PPC_PAR(i) << endl; 118 119 cout << endl; 120 i = 13; 121 cout << ".set GPR_SAVE_" << i << ", " << QUICKTHREADS_BLOCKI_GPR_SAVE(i) << endl; 122 123 cout << endl; 124 for(i=31; i>13; i--) 125 cout << ".set FPR_SAVE_" << i << ", " << QUICKTHREADS_BLOCK_FPR_SAVE(i) << endl; 126 127 cout << endl; 128 cout << ".set VARGS_BKOFF, " << QUICKTHREADS_VARGS_BKOFF << endl; 129 130 131 cout << endl << endl << endl; 132 133 for(i=31; i>13; i--) 134 cout << "\tstfd\tf" << i << ",FPR_SAVE_" << i << "(%r1)" << endl; 135 136 cout << endl; 137 for(i=31; i>13; i--) 138 cout << "\tlfd \tf" << i << ",FPR_SAVE_" << i << "(%r1)" << endl; 139 140 cout << endl << endl << endl; 141 142 143 return 0; 144} 145 146 147 148 * 149 * 150 * 151 */ 152 153 154#if 0 155 156 .text 157 .align 4 158 159 .globl qt_block 160 .globl _qt_block 161 .globl qt_blocki 162 .globl _qt_blocki 163 .globl qt_abort 164 .globl _qt_abort 165 .globl qt_start 166 .globl _qt_start 167 .globl qt_vstart 168 .globl _qt_vstart 169 170 171.set LR_SAVE, 4 172.set BLOCKI_FSIZE, 96 173.set BLOCKI_CR_SAVE, 8 /* CR is saved into the callee's stack frame */ 174.set BLOCK_FSIZE, 160 175 176.set PAR_0, 8 177.set PAR_1, 12 178.set PAR_2, 16 179.set PAR_3, 20 180.set PAR_4, 24 181.set PAR_5, 28 182.set PAR_6, 32 183.set PAR_7, 36 184.set PAR_8, 40 185.set PAR_9, 44 186.set PAR_10, 48 187.set PAR_11, 52 188 189.set GPR_SAVE_13, 20 190 191.set FPR_SAVE_31, 152 192.set FPR_SAVE_30, 144 193.set FPR_SAVE_29, 136 194.set FPR_SAVE_28, 128 195.set FPR_SAVE_27, 120 196.set FPR_SAVE_26, 112 197.set FPR_SAVE_25, 104 198.set FPR_SAVE_24, 96 199.set FPR_SAVE_23, 88 200.set FPR_SAVE_22, 80 201.set FPR_SAVE_21, 72 202.set FPR_SAVE_20, 64 203.set FPR_SAVE_19, 56 204.set FPR_SAVE_18, 48 205.set FPR_SAVE_17, 40 206.set FPR_SAVE_16, 32 207.set FPR_SAVE_15, 24 208.set FPR_SAVE_14, 16 209 210 211 212 213/* various offsets used by "qt_varg" */ 214.set P_T, PAR_0 215.set P_STARTUP, PAR_1 216.set P_USERF, PAR_2 217.set P_CLEANUP, PAR_3 218 /* the offset used to move back the linkage area to be adiacent to 219 * the variant argument list before calling "userf(...). 220 * Skip "t", "startup", "userf", "cleanup" and first 221 * 8 parameters (since they are passed via registers) */ 222.set VARGS_BKOFF, 48 223 224 /* location where "t" and "cleanup" are saved (with respect of 225 * the stack frame base) */ 226.set P_T_SAVE, -4 227.set P_CLEANUP_SAVE, -8 228 229#endif 230 231 232 233/* Block the current thread saving all integer non volatile registers and 234 * start a new thread. 235 */ 236#if 0 237.if 0 238#endif 239void *qt_blocki (void *helper, void *a0, void *a1, void *newthread); 240asm void *qt_blocki (void *helper, void *a0, void *a1, void *newthread) 241{ 242#if 0 243.endif 244#endif 245 246#if 0 247qt_blocki: 248_qt_blocki: 249#endif 250/* prolog code */ 251 stwu %r1,-BLOCKI_FSIZE(%r1) /* allocate the stack frame */ 252 mflr %r0 /* return addr in r0 */ 253 mfcr %r11 /* CR in r11 */ 254 stw %r0,LR_SAVE+BLOCKI_FSIZE(%r1) /* save return addr in the stack */ 255 stw %r11,BLOCKI_CR_SAVE(%r1) /* save CR in the stack */ 256 stmw %r13,GPR_SAVE_13(%r1) /* save non-volatile reg */ 257 258/* call helper(qt_t *old, void *a0, void *a1) */ 259 mtlr %r3 /* "helper" addr in the link reg */ 260 mr %r3,%r1 /* current thread (i.e. the SP) in arg "old" */ 261 mr %r1,%r6 /* swap to the new thread (i.e. to its SP) */ 262 blrl /* jump to "helper" */ 263/* the "helper" return value is returned (since r3 is not changed) */ 264 265/* epilog code: return to the new thread's "qt_blocki" caller */ 266 lmw %r13,GPR_SAVE_13(%r1) /* restore non-volatile reg */ 267 lwz %r0,LR_SAVE+BLOCKI_FSIZE(%r1) /* recover return addr */ 268 lwz %r11,BLOCKI_CR_SAVE(%r1) /* recover CR */ 269 mtlr %r0 /* return address in the link reg */ 270 mtcr %r11 /* restore CR */ 271 addi %r1,%r1,BLOCKI_FSIZE /* free the stack frame */ 272 blr /* return */ 273 274#if 0 275.if 0 276#endif 277} 278#if 0 279.endif 280#endif 281 282 283 284/* Abort the current thread and start a new thread. 285 */ 286#if 0 287.if 0 288#endif 289void qt_abort (void *helper, void *a0, void *a1, void *newthread); 290asm void qt_abort (void *helper, void *a0, void *a1, void *newthread) 291{ 292#if 0 293.endif 294#endif 295 296#if 0 297qt_abort: 298_qt_abort: 299#endif 300/* prolog code */ 301/* there is no prolog. It will never come back */ 302 303/* call helper(qt_t *old, void *a0, void *a1) */ 304 mtlr %r3 /* "helper" addr in the link reg */ 305 mr %r1,%r6 /* swap to the new thread (i.e. to its SP) */ 306/* we don't need to set "old", we can pass just garbage. Actually, since r3 307 is not changed, "old" is set to "helper" (don't care) */ 308 blrl /* call "helper" */ 309/* the "helper" return value is returned (since r3 is not changed) */ 310 311/* epilog code: return to the new thread's "qt_blocki" caller */ 312 lmw %r13,GPR_SAVE_13(%r1) /* restore non-volatile reg */ 313 lwz %r0,LR_SAVE+BLOCKI_FSIZE(%r1) /* recover return addr */ 314 lwz %r11,BLOCKI_CR_SAVE(%r1) /* recover CR */ 315 mtlr %r0 /* return address in the link reg */ 316 mtcr %r11 /* restore CR */ 317 addi %r1,%r1,BLOCKI_FSIZE /* free the stack frame */ 318 blr /* return */ 319 320#if 0 321.if 0 322#endif 323} 324#if 0 325.endif 326#endif 327 328 329 330/* Block the current thread saving all non volatile registers and start 331 * a new thread. 332 */ 333#if 0 334.if 0 335#endif 336void *qt_block (void *helper, void *a0, void *a1, void *newthread); 337asm void *qt_block (void *helper, void *a0, void *a1, void *newthread) 338{ 339#if 0 340.endif 341#endif 342 343# if 0 344qt_block: 345_qt_block: 346#endif 347/* prolog code */ 348 stwu %r1,-BLOCK_FSIZE(%r1) /* allocate the stack frame */ 349 mflr %r0 /* return addr in r0 */ 350 stw %r0,LR_SAVE+BLOCK_FSIZE(%r1) /* save return addr in the stack */ 351 352/* save non-volatile fp reg */ 353 stfd %f31,FPR_SAVE_31(%r1) 354 stfd %f30,FPR_SAVE_30(%r1) 355 stfd %f29,FPR_SAVE_29(%r1) 356 stfd %f28,FPR_SAVE_28(%r1) 357 stfd %f27,FPR_SAVE_27(%r1) 358 stfd %f26,FPR_SAVE_26(%r1) 359 stfd %f25,FPR_SAVE_25(%r1) 360 stfd %f24,FPR_SAVE_24(%r1) 361 stfd %f23,FPR_SAVE_23(%r1) 362 stfd %f22,FPR_SAVE_22(%r1) 363 stfd %f21,FPR_SAVE_21(%r1) 364 stfd %f20,FPR_SAVE_20(%r1) 365 stfd %f19,FPR_SAVE_19(%r1) 366 stfd %f18,FPR_SAVE_18(%r1) 367 stfd %f17,FPR_SAVE_17(%r1) 368 stfd %f16,FPR_SAVE_16(%r1) 369 stfd %f15,FPR_SAVE_15(%r1) 370 stfd %f14,FPR_SAVE_14(%r1) 371/* block the thread */ 372 bl qt_blocki 373/* the thread is going to be resumed */ 374/* restore non-volatile fp reg */ 375 lfd %f31,FPR_SAVE_31(%r1) 376 lfd %f30,FPR_SAVE_30(%r1) 377 lfd %f29,FPR_SAVE_29(%r1) 378 lfd %f28,FPR_SAVE_28(%r1) 379 lfd %f27,FPR_SAVE_27(%r1) 380 lfd %f26,FPR_SAVE_26(%r1) 381 lfd %f25,FPR_SAVE_25(%r1) 382 lfd %f24,FPR_SAVE_24(%r1) 383 lfd %f23,FPR_SAVE_23(%r1) 384 lfd %f22,FPR_SAVE_22(%r1) 385 lfd %f21,FPR_SAVE_21(%r1) 386 lfd %f20,FPR_SAVE_20(%r1) 387 lfd %f19,FPR_SAVE_19(%r1) 388 lfd %f18,FPR_SAVE_18(%r1) 389 lfd %f17,FPR_SAVE_17(%r1) 390 lfd %f16,FPR_SAVE_16(%r1) 391 lfd %f15,FPR_SAVE_15(%r1) 392 lfd %f14,FPR_SAVE_14(%r1) 393 394 lwz %r0,LR_SAVE+BLOCK_FSIZE(%r1) /* recover return addr */ 395 mtlr %r0 /* return address in the link reg */ 396 addi %r1,%r1,BLOCK_FSIZE /* free the stack frame */ 397 blr /* return */ 398 399#if 0 400.if 0 401#endif 402} 403#if 0 404.endif 405#endif 406 407 408 409/* Start a single argument thread using parameters preloaded in the stack 410 * during thread initialization (see comments on stack initialization in the 411 * heather file). 412 * 413 * Executes: 414 * 415 * only(u, t, userf); 416 */ 417#if 0 418.if 0 419#endif 420void qt_start(void); 421asm void qt_start(void) 422{ 423#if 0 424.endif 425#endif 426 427#if 0 428qt_start: 429_qt_start: 430#endif 431 lwz %r3,PAR_0(%r1) /* "u" in r3 */ 432 lwz %r4,PAR_1(%r1) /* "t" in r4 */ 433 lwz %r5,PAR_2(%r1) /* "userf" in r5 */ 434 lwz %r6,PAR_3(%r1) /* "only" in r6 */ 435 mtlr %r6 /* "only" address in the link reg */ 436/* call only(u, t, userf) */ 437 blrl /* jump to "only" */ 438/* error if it returns */ 439 b qt_error 440/* dead code (some inline asm "wants" the epilog, or they genetare it) */ 441 blr 442 443#if 0 444.if 0 445#endif 446} 447#if 0 448.endif 449#endif 450 451 452 453/* Start a variant argument thread using parameters preloaded in the stack 454 * during thread initialization (see comments on stack initialization in the 455 * heather file). 456 * 457 * Executes: 458 * 459 * startup(t); 460 * userf_return = userf(...); 461 * cleanup(pt, userf_return); 462 * 463 464 465 ***** Stack layout on start ***** 466 467 468 backchain -> STACK BOTTOM (higher address) 469 +==========================+ 470 backchain - 4 -> | | 471 + LOCAL VARIABLES AREA + 472 .............. 473 + + 474 | | 475 +--------------------------+ 476 | | 477 + ALIGNMEBNT PAD + 478 .............. 479 + (if needed) + 480 | | 481 +--------------------------+ 482 | | arg(n) 483 + + 484 | | 485 + VARIABLE ARGUMENT LIST + 486 .............. 487 + for userf call + 488 SP + PAR(5) -> | | arg(1) 489 + + 490 SP + PAR(4) -> | | arg(0) 491 +--------------------------+ 492 SP + PAR(3) -> | | cleanup par 493 + + 494 SP + PAR(2) -> | | userf par 495 + PARAMETER AREA + 496 SP + PAR(1) -> | | startup par 497 + + 498 SP + PAR(0) -> | | t par 499 +--------------------------+ 500 | | 501 + LINKAGE AREA + 502 SP -> | | 503 +==========================+ 504 STACK TOP (lower address) 505 506 Stack grows down 507 | 508 V 509 510 511 512 ***** Stack layout before call userf ***** 513 514 515 backchain -> STACK BOTTOM (higher address) 516 +==========================+ 517 backchain - 4 -> | | 518 + LOCAL VARIABLES AREA + 519 .............. 520 + + 521 | | 522 +--------------------------+ 523 | | 524 + ALIGNMEBNT PAD + 525 .............. 526 + (if needed) + 527 | | 528 +--------------------------+ 529 | | arg(n) 530 + + 531 | | 532 + VARIABLE ARGUMENT LIST + 533 .............. 534 + for userf call + 535 SP + PAR(1) -> | | arg(1) 536 + + 537 SP + PAR(0) -> | | arg(0) 538 +--------------------------+ 539 | | 540 + LINKAGE AREA + 541 SP -> | | 542 +==========================+ 543 STACK TOP (lower address) 544 545 Stack grows down 546 | 547 V 548 549 550 * To call "userf(...)", the argument list must be adiacent to the linkage 551 * area. Instead of copy the argument list, we move back the linkage area 552 * (actually, we just increase the SP and copy the backchain). "t" and 553 * "cleanup" are saved in a local variable area in order to call 554 * cleanup(pt, userf_return). 555 556*/ 557 558 559#if 0 560.if 0 561#endif 562void qt_vstart(void); 563asm void qt_vstart(void) 564{ 565#if 0 566.endif 567#endif 568 569#if 0 570qt_vstart: 571_qt_vstart: 572#endif 573/* NOTICE: the callee routines could save parameter registers in the caller's 574 * stack parameter area. We put "t" in PAR(0) in such a way, if startup(t) 575 * will save "t", it will be saved on the same location thus not delething 576 * any other parameter. 577 */ 578 579/* since we will move back the linckage area (to make it adiacent to the 580 * parameter list), we need to save "t" and "cleanup". We have made room for 581 * this on the bottom of the stack frame. */ 582 583/* save parameters in the local variable area */ 584 lwz %r11,0(%r1) /* get the backchain */ 585 lwz %r3,P_T(%r1) 586 lwz %r4,P_CLEANUP(%r1) 587 stw %r3,P_T_SAVE(%r11) /* save "pt" */ 588 stw %r4,P_CLEANUP_SAVE(%r11) /* save "cleanup" */ 589 590/* call startup(t) */ 591 lwz %r5,P_STARTUP(%r1) 592 mtlr %r5 593 blrl /* call "startup" */ 594 595/* call userf(...) */ 596 lwz %r11,0(%r1) /* reload backchain (r11 is volatile) */ 597 lwz %r4,P_USERF(%r1) /* load "userf" */ 598 mtlr %r4 599 600 /* first eight parameter of the variant list must be copyed in 601 * GPR3-GPR10. There is a four places offset due to "t", "startup", 602 * userf" and "cleanup" */ 603 604 lwz %r3,PAR_4(%r1) 605 lwz %r4,PAR_5(%r1) 606 lwz %r5,PAR_6(%r1) 607 lwz %r6,PAR_7(%r1) 608 lwz %r7,PAR_8(%r1) 609 lwz %r8,PAR_9(%r1) 610 lwz %r9,PAR_10(%r1) 611 lwz %r10,PAR_11(%r1) 612 613 614 /* move the linkage area to be adiacent to the argument list */ 615 stw %r11,VARGS_BKOFF(%r1) /* copy backchain */ 616 addi %r1,%r1,VARGS_BKOFF /* move back the stack */ 617 618 blrl /* call "userf" */ 619 620/* call qt_cleanup(void *pt, void *vuserf_return) */ 621 lwz %r11,0(%r1) /* reload backchain (r11 is volatile) */ 622 623 mr %r4,%r3 /* push "userf" return as 2nd parameter */ 624 lwz %r3,P_T_SAVE(%r11) /* reload "pt" */ 625 lwz %r5,P_CLEANUP_SAVE(%r11) /* reload "cleanup" */ 626 mtlr %r5 627 blrl 628 b qt_error 629/* dead code (some inline asm "wants" the epilog, or they genetare it) */ 630 blr 631 632#if 0 633.if 0 634#endif 635} 636#if 0 637.endif 638#endif 639 640