1/* 2 + * QuickThreads -- Threads-building toolkit. 3 + * Copyright (c) 1993 by David Keppel 4 + * 5 + * Permission to use, copy, modify and distribute this software and 6 + * its documentation for any purpose and without fee is hereby 7 + * granted, provided that the above copyright notice and this notice 8 + * appear in all copies. This software is provided as a 9 + * proof-of-concept and for demonstration purposes; there is no 10 + * representation about the suitability of this software for any 11 + * purpose. 12 + * 13 14 + * PowerPC-Mach thread switching module. 15 + * 16 + * This software is largely based on the original PowerPC-Linux porting 17 + * developed by Ken Aaker <kenaaker@silverbacksystems.com> 18 + * 19 + * Marco Bucci <marco.bucci@inwind.it> 20 + * December 2002 21 + * 22 + */ 23 24 25#ifndef QUICKTHREADS_POWERPC_H 26#define QUICKTHREADS_POWERPC_H 27 28 29/***************************************************************************** 30 * 31 * DESCRIPTION 32 * 33 * This is the QuickThreads switching module implementation for PowerPC 34 * running under Mach kernel. It was developed and tested under MacOS X, that 35 * is under Darwin (the UNIX-BSD fundation of MacOS X). 36 * 37 * Notice that the Mach PowerPC ABI (Application Binary Interface) [1] is 38 * not the same than System V ABI [2] used by most of the LINUX PowerPC 39 * implementations. 40 * 41 * IMPLEMENTATION NOTES 42 * 43 * 1) Porting on System V ABI 44 * Excluding the variant argument calling convention, Mach and System V ABI 45 * are enough similar and it could be possible to use some simple macro, to 46 * adapt the code for both the ABIs. Actually, the only relevant difference 47 * is in the linkage area structure and in the position where the Link and 48 * the Condition registers are saved. As to the calling conventions, there 49 * are differences with floating points argument passing and with variant 50 * argument lists. Notice that, on Mach, the caller's stack frame allocates 51 * space to hold all arguments ([1] p.51), while on System V, the caller's 52 * stack frame allocates space to hold just the arguments that don't fit into 53 * registers ([2] p.3.18). 54 * 55 * 2) Variant argument list implementation 56 * Variant argument calling on a RISC machine is not easy to implement since 57 * parameters are passed via registers instead of via stack. In a general 58 * variant argument implementation, the caller's stack must map the whole 59 * parameter list following the rules related to the use of the GPR and FPR 60 * parameter registers and the stack alignment ([1] p.54). 61 * This implementation is quite simple and not general. It works under the 62 * hypothesis that arguments are 4-bytes aligned integers. 63 * 64 * 3) This heather file organisation 65 * I preferred to not make confusion between macros that are needed (i.e. 66 * directly used) by QuickThreads and internal "implementation" macros. You 67 * will find QuickThreds macros in the end of this header. Sometime they just 68 * refer to an analogous "internal" macro. On the top, there are the macros 69 * that I used to make more clean (I hope) the implementation. I could include 70 * some system heather (as to stack layout definitions, prologs and epilogs, 71 * etc.), but I preferred to have a self-contained heather in order to make 72 * all more clear for mantaining and for possible porting on another ABI. 73 * 74 * 75 * REFERENCES 76 * 77 * [1] - Mach-O Runtime Architecture 78 * Runtime Concepts and Conventions for Mac OS X Programs 79 * Preliminary July 2002 80 * 81 * [2] - SYSTEM V APPLICATION BINARY INTERFACE 82 * PowerPC Processor Supplement 83 * September 1995 84 * 85 * On MacOS X, more documentation is available by installing the "Developer 86 * Tools". Useful macros and documentation can be found in the system headers 87 * files such as asm.h, asm_help.h etc. (see /usr/architecture/ppc/ or 88 * /System/Library/Frameworks/Kernel.framework/Headers/architecture/ppc/). 89 90 *****************************************************************************/ 91 92/***************************************************************************** 93 * 94 * PowerPC Mach-O Stack frame (see [1]) 95 * 96 97 ................ 98 + + 99 | | reserved 100 + CALLER'S LINKAGE AREA + 101 | | Caller's LR 102 + + 103 | | Caller's CR 104 + + 105 backchain -> | | Caller's backchain 106 +==========================+ 107 | | FPR31 108 + FPR SAVE AREA + 109 .............. 110 + + 111 | | FPRn 112 +--------------------------+ 113 | | GPR31 114 + GPR SAVE AREA + 115 .............. 116 + + 117 | | GPRn 118 +--------------------------+ 119 | | 120 + ALIGNMEBNT PAD + 121 .............. 122 + (if needed) + 123 | | 124 +--------------------------+ 125 | | 126 + LOCAL VARIABLES AREA + 127 .............. 128 + + 129 | | 130 +--------------------------+ 131 | | PAR(n) 132 + + 133 | | 134 + PARAMETER AREA + 135 .............. 136 + for FUTURE call + 137 | | PAR(1) 138 + + 139 SP + 24 -> | | PAR(0) 140 +--------------------------+ 141 SP + 20 -> | | Caller's TOC 142 + + 143 SP + 16 -> | | reserved 144 + + 145 SP + 12 -> | | reserved 146 + LINKAGE AREA + 147 SP + 8 -> | | LR callee-save for FUTURE call 148 + + 149 SP + 4 -> | | CR callee-save for FUTURE call 150 + + 151 SP -> | | backchain 152 +==========================+ 153 STACK TOP (lower address) 154 155 Stack grows down 156 | 157 V 158 * NOTE: 159 * 160 * 1) Parameter are allocated in the CALLER's parameter area. This area must 161 * be large enough to hold all parameters regardless if they are or not passed 162 * in registers. 163 * 164 * The caller parameter area is used: 165 * - by the caller, to store parameters to the callee that cannot fit in 166 * registers (no more parameter registers are available); 167 * - by the callee, to save parameter registers (for istance because they are 168 * needed for a further call). 169 * 170 * Obviously, the callee saves parameter registers, in the location in which 171 * they are mapped on the caller's stack frame. So, be aware that, if 172 * something else is stored in that location, it could be deleted after a call. 173 * 174 * 2) The callee saves LR and CR in the caller's linkage area. All other 175 * callee's state are saved in its own stack frame. 176 * 177 178 *****************************************************************************/ 179 180 181/***************************************************************************** 182 * 183 * Stack initialization for a single argument thread 184 * 185 186 187 top + QUICKTHREADS_STKBASE -> STACK BOTTOM (higher address) 188 +==========================+ 189 | | 190 + + 191 .............. 192 + + 193 | | 194 +--------------------------+ 195 top + QUICKTHREADS_ONLY_INDEX * 4 -> | only param | PAR(3) 196 + + 197 top + QUICKTHREADS_USER_INDEX * 4 -> | userf param | PAR(2) 198 + + 199 top + QUICKTHREADS_ARGT_INDEX * 4 -> | t param | PAR(1) 200 + + 201 top + QUICKTHREADS_ARGU_INDEX * 4 -> | u param | PAR(0) 202 +--------------------------+ 203 | | 204 + + 205 .............. 206 + + 207 top + QUICKTHREADS_RETURN_INDEX * 4 -> | qt_start | LR save 208 + + 209 .............. 210 + + 211 top + QUICKTHREADS_BLOCKI_FRAME_SIZE -> | top + QUICKTHREADS_STKBASE | backchain 212 +==========================+ 213 | | 214 + + 215 .............. 216 + + 217 | | 218 +--------------------------+ 219 | | 220 + + 221 .............. 222 + + 223 top -> |top + QUICKTHREADS_BLOCKI_FRAME_SIZE| backchain 224 +==========================+ 225 STACK TOP (lower address) 226 227 Stack grows down 228 | 229 V 230 231 ***************************************************************************** 232 * 233 * Stack initialization for a variant argument thread 234 * 235 236 bottom -> STACK BOTTOM (higher address) 237 +==========================+ 238 | | 239 + + 240 .............. 241 + + 242 top + QUICKTHREADS_VSTKBASE -> | arg(0) | PAR(4) 243 +--------------------------+ 244 top + QUICKTHREADS_CLEANUP_INDEX * 4 -> | cleanup param | PAR(3) 245 + + 246 top + QUICKTHREADS_USER_INDEX * 4 -> | userf param | PAR(2) 247 + + 248 top + QUICKTHREADS_VSTARTUP_INDEX * 4 ->| startup param | PAR(1) 249 + + 250 top + QUICKTHREADS_ARGT_INDEX * 4 -> | t param | PAR(0) 251 +--------------------------+ 252 | | 253 + + 254 .............. 255 + + 256 top + QUICKTHREADS_RETURN_INDEX * 4 -> | qt_start | LR save 257 + + 258 .............. 259 top + QUICKTHREADS_BLOCKI_FRAME_SIZE -> | top + QUICKTHREADS_STKBASE | backchain 260 +==========================+ 261 | | 262 + + 263 .............. 264 + + 265 | | 266 +--------------------------+ 267 | | 268 + + 269 .............. 270 + + 271 top -> |top + QUICKTHREADS_BLOCKI_FRAME_SIZE| backchain 272 +==========================+ 273 STACK TOP (lower address) 274 275 Stack grows down 276 | 277 V 278 279* NOTE: 280* 281* Parameters are passed to "qt_start" or to "qt_vstart" putting them into 282* the stack frames of "qt_start" or "qt_vstart" themselves. This not a 283* conventional parameter passing because parameters should be put into the 284* caller's stack, not into the callee's one. Actually we must consider 285* that as a preload of the parameter area that "qt_start" or "qt_vstart" 286* will use for their own calls. 287* Be aware of the fact that, during a call, the caller's parameter area is, 288* in a certain sense, volatile. In facts, the callee can save parameter 289* registers on the caller's parameter area. 290* 291 *****************************************************************************/ 292 293 294/***************************************************************************** 295 296 Define PowerPC Mach-O related macros 297 298 *****************************************************************************/ 299 300 301 302typedef unsigned long PPC_W; 303 304/* Stack pointer must always be a multiple of 16 */ 305#define PPC_STACK_INCR 16 306#define PPC_ROUND_STACK(length) \ 307 (((length)+PPC_STACK_INCR-1) & ~(PPC_STACK_INCR-1)) 308 309 310#define PPC_LINKAGE_AREA 24 311#define PPC_CR_SAVE 4 312#define PPC_LR_SAVE 8 313 314#define PPC_PARAM_AREA(n) (4*(n)) 315 316#define PPC_GPR_SAVE_AREA (4*19) /* GPR13-GPR31 must be saved */ 317#define PPC_FPR_SAVE_AREA (8*18) /* FPR14-FPR31 must be saved */ 318 319/* Define parameter offset on the stack. 320 * NOTICE: Parameters are numbered 0, 1, ..., n. 321*/ 322#define PPC_PAR(i) (PPC_LINKAGE_AREA+(i)*4) 323 324/***************************************************************************** 325 326 Define stack frames 327 328 *****************************************************************************/ 329 330 331/* Define the "qt_blocki" and "qt_abort" stack frame. We use the same stack 332 * frame for both. 333 * 334 335 top + S -> 336 +==========================+ 337 top + S - 4 -> | | GPR31 338 + GPR SAVE AREA + 339 .............. 340 + + 341 top + S - 19 * 4 -> | | GPR13 342 +--------------------------+ 343 | | 344 + ALIGNMEBNT PAD + 345 .............. 346 + (if needed) + 347 | | 348 +--------------------------+ 349 | | 350 + + 351 | | 352 + PARAMETER AREA + 353 | | 354 + + 355 top + 24 -> | | 356 +--------------------------+ 357 | | 358 + LINKAGE AREA + 359 top -> | | 360 +==========================+ 361 */ 362 363#define QUICKTHREADS_BLOCKI_FRAME_SIZE \ 364 PPC_ROUND_STACK(PPC_LINKAGE_AREA+PPC_PARAM_AREA(4)+PPC_GPR_SAVE_AREA) 365 366/* Offset to the base of the GPR save area. Save from GPR13 to GPR31 367 * increasing address. 368 */ 369#define QUICKTHREADS_BLOCKI_GPR_SAVE(i) (QUICKTHREADS_BLOCKI_FRAME_SIZE-4+(i-31)*4) 370 371 372 373/* Define the "qt_block" stack frame. Notice that since "qt_black" calls 374 * "qt_blocki", GPR registers are saved into "qt_blocki" stack frame. 375 * 376 377 top + S -> 378 +==========================+ 379 top + S - 8 -> | | FPR31 380 + FPR SAVE AREA + 381 .............. 382 + + 383 top + S - 18 * 8 -> | | FPR14 384 +--------------------------+ 385 | | 386 + ALIGNMEBNT PAD + 387 .............. 388 + (if needed) + 389 | | 390 +--------------------------+ 391 | | 392 + + 393 | | 394 + PARAMETER AREA + 395 | | 396 + + 397 top + 24 -> | | 398 +--------------------------+ 399 | | 400 + LINKAGE AREA + 401 top -> | | 402 +==========================+ 403 */ 404 405#define QUICKTHREADS_BLOCK_FRAME_SIZE \ 406 PPC_ROUND_STACK(PPC_LINKAGE_AREA+PPC_PARAM_AREA(4)+PPC_FPR_SAVE_AREA) 407 408/* Offset to the location where registers are saved. 409 */ 410#define QUICKTHREADS_BLOCK_FPR_SAVE(i) (QUICKTHREADS_BLOCK_FRAME_SIZE-8+(i-31)*8) 411 412 413/* Define the "qt_start" frame size. It consists just of the linkage area and 414 * the parameter area. 415 * 416 417 +==========================+ 418 | | 419 + ALIGNMEBNT PAD + 420 .............. 421 + (if needed) + 422 | | 423 +--------------------------+ 424 | | only par 425 + + 426 | | userf par 427 + PARAMETER AREA + 428 | | t par 429 + + 430 top + 24 -> | | u par 431 +--------------------------+ 432 | | 433 + LINKAGE AREA + 434 top -> | | 435 +==========================+ 436 437 */ 438#define QUICKTHREADS_START_FRAME_SIZE PPC_ROUND_STACK(PPC_LINKAGE_AREA+PPC_PARAM_AREA(4)) 439 440 441 442/* Define the "qt_vstart" frame. It consists of the linkage area, the fix parameter 443 * area, the variant argument list and a local variable area used in "qt_vstart" 444 * implementation. 445 * 446 447 backchain -> 448 +==========================+ 449 backchain - 4 -> | | 450 + LOCAL VARIABLES AREA + 451 .............. 452 + + 453 | | 454 +--------------------------+ 455 | | 456 + ALIGNMEBNT PAD + 457 .............. 458 + (if needed) + 459 | | 460 +--------------------------+ 461 | | arg(n) 462 + + 463 | | 464 + VARIABLE ARGUMENT LIST + 465 .............. 466 + for userf call + 467 | | arg(1) 468 + + 469 top + 24 + 16 -> | | arg(0) 470 +--------------------------+ 471 | | cleanup par 472 + + 473 | | userf par 474 + PARAMETER AREA + 475 | | startup par 476 + + 477 top + 24 -> | | t par 478 +--------------------------+ 479 | | 480 + LINKAGE AREA + 481 top -> | | 482 +==========================+ 483 484 */ 485#define QUICKTHREADS_VARGS_LOCAL_AREA (4*4) /* local variable area */ 486 487/* The offset the stack will be moved back before to call "userf(...)". 488 * The linckage area must be moved to be adiacent to the part of the variant 489 * argument list that is in the stack. 490 */ 491#define QUICKTHREADS_VARGS_BKOFF PPC_PARAM_AREA(4) 492 493#define QUICKTHREADS_VSTART_FRAME_SIZE(varbytes) \ 494 PPC_ROUND_STACK(PPC_LINKAGE_AREA+PPC_PARAM_AREA(4)+(varbytes)+ \ 495 QUICKTHREADS_VARGS_LOCAL_AREA) 496 497/* Offset to the base of the varian argument list */ 498#define QUICKTHREADS_VSTART_LIST_BASE (PPC_LINKAGE_AREA+PPC_PARAM_AREA(4)) 499 500 501/* Notice that qt_start and qt_vstart have no parameters, actually their 502 * parameters are written in their stack frame during thread initialization 503 */ 504extern void qt_start(void); 505extern void qt_vstart(void); 506 507 508 509/* Offset (in words) of the location where the block routine saves its return 510 * address (i.e. LR). SP points the top of the block routine stack and, 511 * following ppc calling conventions, the return address is saved in the 512 * previous (caller's) stack frame. 513 */ 514#define QUICKTHREADS_RETURN_INDEX ((QUICKTHREADS_BLOCKI_FRAME_SIZE+PPC_LR_SAVE)/sizeof(PPC_W)) 515 516/* static variable used to get the stack bottom in "VARGS" initialization */ 517/* static void *qt_sp_bottom_save; */ 518 519#define QUICKTHREADS_ARG_INDEX(i) ((QUICKTHREADS_BLOCKI_FRAME_SIZE+PPC_PAR(i))/sizeof(PPC_W)) 520 521/***************************************************************************** 522 523 QuickThreads needed definitions 524 525 *****************************************************************************/ 526 527 528#define QUICKTHREADS_GROW_DOWN 529#define QUICKTHREADS_STKALIGN PPC_STACK_INCR 530typedef PPC_W qt_word_t; 531 532 533/* This macro is used by "QUICKTHREADS_ARGS" to initialize a single argument thread. 534 * - set "qt_start" as the "qt_block" or "qt_blocki" return address; 535 * - set the top of the stack backchain; 536 * - set the next backchain (not needed, but just to be "clean"). 537 */ 538#define QUICKTHREADS_ARGS_MD(sp) \ 539 (QUICKTHREADS_SPUT (sp, QUICKTHREADS_RETURN_INDEX, qt_start), \ 540 QUICKTHREADS_SPUT (sp, 0, sp+QUICKTHREADS_BLOCKI_FRAME_SIZE), \ 541 QUICKTHREADS_SPUT (sp, QUICKTHREADS_BLOCKI_FRAME_SIZE/sizeof(PPC_W), \ 542 sp+QUICKTHREADS_BLOCKI_FRAME_SIZE+QUICKTHREADS_START_FRAME_SIZE)) 543 544 545/* This macro is used by "QUICKTHREADS_VARGS" to initialize a variant argument thread. 546 * It returns the pointer to the top of the argument list. 547 * We also use it to get the stack bottom via a static variable. This is a bit 548 * "dirty", it could be better to do it in "qt_vargs", but we don't want change 549 * anything out of this file. 550 * We need the stack bottom to allocate a local variable area used by 551 * "qt_vstart". 552 */ 553#define QUICKTHREADS_VARGS_MD0(sp, varbytes) \ 554 ((qt_sp_bottom_save = sp), \ 555 ((qt_t *)(((char *)(sp)) - \ 556 (QUICKTHREADS_VSTART_FRAME_SIZE(varbytes)-QUICKTHREADS_VSTART_LIST_BASE)))) 557 558 559/* This macro is used by "QUICKTHREADS_VARGS" to initialize a variant argument thread. 560 * - set "qt_start" as the "qt_block" or "qt_blocki" return address; 561 * - set the top of the stackback chain; 562 * - set the next backchain (it points the stack botton). 563 */ 564#define QUICKTHREADS_VARGS_MD1(sp) \ 565 (QUICKTHREADS_SPUT (sp, QUICKTHREADS_RETURN_INDEX, qt_vstart), \ 566 QUICKTHREADS_SPUT (sp, 0, sp+QUICKTHREADS_BLOCKI_FRAME_SIZE), \ 567 QUICKTHREADS_SPUT (sp, (QUICKTHREADS_BLOCKI_FRAME_SIZE)/sizeof(PPC_W), \ 568 qt_sp_bottom_save)) 569 570 571/* Activate "qt_vargs" as the initialization routine for the variant 572 * argument threads 573 */ 574#define QUICKTHREADS_VARGS_DEFAULT 575 576/* Override "qt_vargs" with "qt_vargs_stdarg". 577 * On LinuxPPC "qt_vargs" doesn't work, "qt_vargs_stdarg" uses a more 578 * standard way to retrieve arguments from the variant list. 579 */ 580#define QUICKTHREADS_VARGS(sp, nbytes, vargs, pt, startup, vuserf, cleanup) \ 581 ((qt_t *)qt_vargs_stdarg (sp, nbytes, vargs, pt, startup, vuserf, cleanup)) 582 583 584/* This macro is used by "QUICKTHREADS_ADJ(sp)" to get the stack top form the stack 585 * bottom during a single argument thread initialization. 586 * It is the space we need to allocate for a single argument thread: the stack 587 * frame for the block routine ("qt_block" or "qt_blocki") and for "qt_start". 588 */ 589#define QUICKTHREADS_STKBASE \ 590 (QUICKTHREADS_BLOCKI_FRAME_SIZE+QUICKTHREADS_START_FRAME_SIZE) 591 592/* This macro is used by "QUICKTHREADS_VADJ(sp)" to get the stack top from the base 593 * of the variant argument list during a variant argument thread initialization. 594 */ 595#define QUICKTHREADS_VSTKBASE (QUICKTHREADS_BLOCKI_FRAME_SIZE+QUICKTHREADS_VSTART_LIST_BASE) 596 597/* The *index* (positive offset) of where to put each value. */ 598 599#define QUICKTHREADS_ARGU_INDEX QUICKTHREADS_ARG_INDEX(0) 600#define QUICKTHREADS_ARGT_INDEX QUICKTHREADS_ARG_INDEX(1) 601#define QUICKTHREADS_USER_INDEX QUICKTHREADS_ARG_INDEX(2) 602#define QUICKTHREADS_ONLY_INDEX QUICKTHREADS_ARG_INDEX(3) 603 604 605#define QUICKTHREADS_VARGT_INDEX QUICKTHREADS_ARG_INDEX(0) 606#define QUICKTHREADS_VSTARTUP_INDEX QUICKTHREADS_ARG_INDEX(1) 607#define QUICKTHREADS_VUSERF_INDEX QUICKTHREADS_ARG_INDEX(2) 608#define QUICKTHREADS_VCLEANUP_INDEX QUICKTHREADS_ARG_INDEX(3) 609 610#endif /* ndef QUICKTHREADS_POWERPC_H */ 611 612