1/* 2 + * QuickThreads -- Threads-building toolkit. 3 + * Copyright (c) 1993 by David Keppel 4 + * 5 + * Permission to use, copy, modify and distribute this software and 6 + * its documentation for any purpose and without fee is hereby 7 + * granted, provided that the above copyright notice and this notice 8 + * appear in all copies. This software is provided as a 9 + * proof-of-concept and for demonstration purposes; there is no 10 + * representation about the suitability of this software for any 11 + * purpose. 12 + * 13 14 + * PowerPC-Sys5 thread switching module. 15 + * 16 + * This software is largely based on the original PowerPC-Linux porting 17 + * developed by Ken Aaker <kenaaker@silverbacksystems.com> 18 + * 19 + * Marco Bucci <marco.bucci@inwind.it> 20 + * December 2002 21 + * 22 + */ 23 24 25#ifndef QUICKTHREADS_POWERPC_H 26#define QUICKTHREADS_POWERPC_H 27 28 29/***************************************************************************** 30 * 31 * DESCRIPTION 32 * 33 * This is the QuickThreads switching module implementation for PowerPC 34 * running under System V ABI (Application Binary Interface) [2]. It was 35 * developed by porting the MacOS X version and tested under LinuxPPC. 36 * 37 * Notice that this is not the same than the PowerPC Mach ABI used by MacOSX 38 * [1]. 39 * 40 * IMPLEMENTATION NOTES 41 * 42 * 1) Porting on System V ABI 43 * Excluding the variant argument calling convention, Mach and System V ABI 44 * are enough similar and it could be possible to use some simple macro, to 45 * adapt the code for both the ABIs. Actually, the only relevant difference 46 * is in the linkage area structure and in the position where the Link and 47 * the Condition registers are saved. As to the calling conventions, there 48 * are differences with floating points argument passing and with variant 49 * argument lists. Notice that, on Mach, the caller's stack frame allocates 50 * space to hold all arguments ([1] p.51), while on System V, the caller's 51 * stack frame allocates space to hold just the arguments that don't fit into 52 * registers ([2] p.3.18). 53 * 54 * 2) Variant argument list implementation 55 * Variant argument calling on a RISC machine is not easy to implement since 56 * parameters are passed via registers instead of via stack. In a general 57 * variant argument implementation, the caller's stack must map the whole 58 * parameter list following the rules related to the use of the GPR and FPR 59 * parameter registers and the stack alignment ([2] p.3-21). 60 * This implementation is quite simple and not general. It works under the 61 * hypothesis that arguments are 4-bytes aligned integers. 62 * 63 * 3) This heather file organisation 64 * I preferred to not make confusion between macros that are needed (i.e. 65 * directly used) by QuickThreads and internal "implementation" macros. You 66 * will find QuickThreds macros in the end of this header. Sometime they just 67 * refer to an analogous "internal" macro. On the top, there are the macros 68 * that I used to make more clean (I hope) the implementation. I could include 69 * some system heather (as to stack layout definitions, prologs and epilogs, 70 * etc.), but I preferred to have a self-contained heather in order to make 71 * all more clear for mantaining and for possible porting on another ABI. 72 * 73 * 74 * REFERENCES 75 * 76 * [1] - Mach-O Runtime Architecture 77 * Runtime Concepts and Conventions for Mac OS X Programs 78 * Preliminary July 2002 79 * 80 * [2] - SYSTEM V APPLICATION BINARY INTERFACE 81 * PowerPC Processor Supplement 82 * September 1995 83 * 84 85 *****************************************************************************/ 86 87/***************************************************************************** 88 * 89 * PowerPC System V Stack frame (see [2]) 90 * 91 92 ................ 93 + + 94 | | 95 +--------------------------+ 96 | | Caller's LR 97 + CALLER'S LINKAGE AREA + 98 backchain -> | | Caller's backchain 99 +==========================+ 100 | | FPR31 101 + FPR SAVE AREA + 102 .............. 103 + + 104 | | FPRn 105 +--------------------------+ 106 | | GPR31 107 + GPR SAVE AREA + 108 .............. 109 + + 110 | | GPRn 111 +--------------------------+ 112 | | 113 + ALIGNMEBNT PAD + 114 .............. 115 + (if needed) + 116 | | 117 +--------------------------+ 118 | CR SAVE | 119 +--------------------------+ 120 | | 121 + LOCAL VARIABLES AREA + 122 .............. 123 + + 124 | | 125 +--------------------------+ 126 | | PAR(n-7) 127 + + 128 | | 129 + PARAMETER AREA + 130 .............. 131 + for FUTURE call + 132 | | PAR(9) 133 + + 134 SP + 8 -> | | PAR(8) 135 +--------------------------+ 136 SP + 4 -> | | LR callee-save for FUTURE call 137 + LINKAGE AREA + 138 SP -> | | backchain 139 +==========================+ 140 STACK TOP (lower address) 141 142 Stack grows down 143 | 144 V 145 146 147 * NOTE: 148 * 149 * 1) In this figure parameter are supposed to be integer 4-bytes aligned and 150 * are numbered 0, 1, 2,... n. 151 * 152 * 2) Parameter are allocated in the CALLER's parameter area. This area must 153 * be large enough to hold all parameters that cannot fit in registers (no 154 * more parameter registers are available); 155 * 156 * 3) The callee saves LR in the caller's linkage area. CR as all other 157 * callee's state are saved in its own stack frame. 158 * 159 160 *****************************************************************************/ 161 162 163/***************************************************************************** 164 * 165 * Stack initialization for a single argument thread 166 * 167 168 169 top + QUICKTHREADS_STKBASE -> STACK BOTTOM (higher address) 170 +==========================+ 171 | | 172 + + 173 .............. 174 + + 175 | | 176 +--------------------------+ 177 top + QUICKTHREADS_ONLY_INDEX * 4 -> | only param | PAR(3) 178 + + 179 top + QUICKTHREADS_USER_INDEX * 4 -> | userf param | PAR(2) 180 + + 181 top + QUICKTHREADS_ARGT_INDEX * 4 -> | t param | PAR(1) 182 + + 183 top + QUICKTHREADS_ARGU_INDEX * 4 -> | u param | PAR(0) 184 +--------------------------+ 185 top + QUICKTHREADS_RETURN_INDEX * 4 -> | qt_start | LR save 186 + + 187 top + QUICKTHREADS_BLOCKI_FRAME_SIZE -> | top + QUICKTHREADS_STKBASE | backchain 188 +==========================+ 189 | | 190 + + 191 .............. 192 + + 193 | | 194 +--------------------------+ 195 | | 196 + + 197 top -> |top + QUICKTHREADS_BLOCKI_FRAME_SIZE| backchain 198 +==========================+ 199 STACK TOP (lower address) 200 201 Stack grows down 202 | 203 V 204 205 ***************************************************************************** 206 * 207 * Stack initialization for a variant argument thread 208 * 209 210 bottom -> STACK BOTTOM (higher address) 211 +==========================+ 212 | | 213 + + 214 .............. 215 + + 216 top + QUICKTHREADS_VSTKBASE -> | arg(0) | PAR(4) 217 +--------------------------+ 218 top + QUICKTHREADS_CLEANUP_INDEX * 4 -> | cleanup param | PAR(3) 219 + + 220 top + QUICKTHREADS_USER_INDEX * 4 -> | userf param | PAR(2) 221 + + 222 top + QUICKTHREADS_VSTARTUP_INDEX * 4 ->| startup param | PAR(1) 223 + + 224 top + QUICKTHREADS_ARGT_INDEX * 4 -> | t param | PAR(0) 225 +--------------------------+ 226 top + QUICKTHREADS_RETURN_INDEX * 4 -> | qt_start | LR save 227 + + 228 top + QUICKTHREADS_BLOCKI_FRAME_SIZE -> | top + QUICKTHREADS_STKBASE | backchain 229 +==========================+ 230 | | 231 + + 232 .............. 233 + + 234 | | 235 +--------------------------+ 236 | | 237 + + 238 top -> |top + QUICKTHREADS_BLOCKI_FRAME_SIZE| backchain 239 +==========================+ 240 STACK TOP (lower address) 241 242 Stack grows down 243 | 244 V 245 246* NOTE: 247* 248* Parameters are passed to "qt_start" or to "qt_vstart" putting them into 249* the stack frames of "qt_start" or "qt_vstart" themselves. This not a 250* conventional parameter passing because parameters should be put into the 251* caller's stack, not into the callee's one. Actually we must consider 252* that as a preload of the parameter area that "qt_start" or "qt_vstart" 253* will use for their own calls. 254* Be aware of the fact that, during a call, the caller's parameter area is, 255* in a certain sense, volatile. In facts, the callee can save parameter 256* registers on the caller's parameter area. 257* 258 *****************************************************************************/ 259 260 261/***************************************************************************** 262 263 Define PowerPC System V related macros 264 265 *****************************************************************************/ 266 267 268 269typedef unsigned long PPC_W; 270 271/* Stack pointer must always be a multiple of 16 */ 272#define PPC_STACK_INCR 16 273#define PPC_ROUND_STACK(length) \ 274 (((length)+PPC_STACK_INCR-1) & ~(PPC_STACK_INCR-1)) 275 276 277#define PPC_LINKAGE_AREA 8 278#define PPC_LR_SAVE 4 279 280#define PPC_PARAM_AREA(n) (4*(n)) 281 282#define PPC_GPR_SAVE_AREA (4*19) /* GPR13-GPR31 must be saved */ 283#define PPC_FPR_SAVE_AREA (8*18) /* FPR14-FPR31 must be saved */ 284 285/* Define parameter offset on the stack. 286 * NOTICE: Parameters are numbered 0, 1, ..., n. 287*/ 288#define PPC_PAR(i) (PPC_LINKAGE_AREA+(i)*4) 289 290/***************************************************************************** 291 292 Define stack frames 293 294 *****************************************************************************/ 295 296 297/* Define the "qt_blocki" and "qt_abort" stack frame. We use the same stack 298 * frame for both. 299 * 300 301 top + S -> 302 +==========================+ 303 top + S - 4 -> | | GPR31 304 + GPR SAVE AREA + 305 .............. 306 + + 307 top + S - 19 * 4 -> | | GPR13 308 +--------------------------+ 309 | | 310 + ALIGNMEBNT PAD + 311 .............. 312 + (if needed) + 313 | | 314 +--------------------------+ 315 top + 8 -> | CR SAVE | 316 +--------------------------+ 317 | | 318 + LINKAGE AREA + 319 top -> | | 320 +==========================+ 321 */ 322 323#define QUICKTHREADS_BLOCKI_FRAME_SIZE \ 324 PPC_ROUND_STACK(PPC_LINKAGE_AREA+4+PPC_GPR_SAVE_AREA) 325 326#define QUICKTHREADS_BLOCKI_CR_SAVE 8 327 328/* Offset to the base of the GPR save area. Save from GPR13 to GPR31 329 * increasing address. 330 */ 331#define QUICKTHREADS_BLOCKI_GPR_SAVE(i) (QUICKTHREADS_BLOCKI_FRAME_SIZE-4+(i-31)*4) 332 333 334 335/* Define the "qt_block" stack frame. Notice that since "qt_black" calls 336 * "qt_blocki", GPR registers are saved into "qt_blocki" stack frame. 337 * 338 339 top + S -> 340 +==========================+ 341 top + S - 8 -> | | FPR31 342 + FPR SAVE AREA + 343 .............. 344 + + 345 top + S - 18 * 8 -> | | FPR14 346 +--------------------------+ 347 | | 348 + ALIGNMEBNT PAD + 349 .............. 350 + (if needed) + 351 | | 352 +--------------------------+ 353 | | 354 + LINKAGE AREA + 355 top -> | | 356 +==========================+ 357 */ 358 359#define QUICKTHREADS_BLOCK_FRAME_SIZE \ 360 PPC_ROUND_STACK(PPC_LINKAGE_AREA+PPC_FPR_SAVE_AREA) 361 362/* Offset to the location where registers are saved. 363 */ 364#define QUICKTHREADS_BLOCK_FPR_SAVE(i) (QUICKTHREADS_BLOCK_FRAME_SIZE-8+(i-31)*8) 365 366 367/* Define the "qt_start" frame size. It consists just of the linkage area and 368 * the parameter area. 369 * 370 371 +==========================+ 372 | | 373 + ALIGNMEBNT PAD + 374 .............. 375 + (if needed) + 376 | | 377 +--------------------------+ 378 | | only par 379 + + 380 | | userf par 381 + PARAMETER AREA + 382 | | t par 383 + + 384 top + 8 -> | | u par 385 +--------------------------+ 386 | | 387 + LINKAGE AREA + 388 top -> | | 389 +==========================+ 390 391 */ 392#define QUICKTHREADS_START_FRAME_SIZE PPC_ROUND_STACK(PPC_LINKAGE_AREA+PPC_PARAM_AREA(4)) 393 394 395 396/* Define the "qt_vstart" frame. It consists of the linkage area, the fix parameter 397 * area, the variant argument list and a local variable area used in "qt_vstart" 398 * implementation. 399 * 400 401 backchain -> 402 +==========================+ 403 backchain - 4 -> | | 404 + LOCAL VARIABLES AREA + 405 .............. 406 + + 407 | | 408 +--------------------------+ 409 | | 410 + ALIGNMEBNT PAD + 411 .............. 412 + (if needed) + 413 | | 414 +--------------------------+ 415 | | arg(n) 416 + + 417 | | 418 + VARIABLE ARGUMENT LIST + 419 .............. 420 + for userf call + 421 | | arg(1) 422 + + 423 top + 8 + 16 -> | | arg(0) 424 +--------------------------+ 425 | | cleanup par 426 + + 427 | | userf par 428 + PARAMETER AREA + 429 | | startup par 430 + + 431 top + 8 -> | | t par 432 +--------------------------+ 433 | | 434 + LINKAGE AREA + 435 top -> | | 436 +==========================+ 437 438 */ 439#define QUICKTHREADS_VARGS_LOCAL_AREA (4*4) /* local variable area */ 440 441/* The offset the stack will be moved back before to call "userf(...)". 442 * The linckage area must be moved to be adiacent to the part of the variant 443 * argument list that is in the stack. Notice that, since the first 8 444 * parameters are passed via registers, the offset is equal to the size of 445 * 4+8 parameters. */ 446#define QUICKTHREADS_VARGS_BKOFF PPC_PARAM_AREA(4+8) 447 448#define QUICKTHREADS_VSTART_FRAME_SIZE(varbytes) \ 449 PPC_ROUND_STACK(PPC_LINKAGE_AREA+QUICKTHREADS_VARGS_BKOFF+(varbytes)+ \ 450 QUICKTHREADS_VARGS_LOCAL_AREA) 451 452/* Offset to the base of the varian argument list */ 453#define QUICKTHREADS_VSTART_LIST_BASE (PPC_LINKAGE_AREA+PPC_PARAM_AREA(4)) 454 455 456/* Notice that qt_start and qt_vstart have no parameters, actually their 457 * parameters are written in their stack frame during thread initialization 458 */ 459extern void qt_start(void); 460extern void qt_vstart(void); 461 462 463 464/* Offset (in words) of the location where the block routine saves its return 465 * address (i.e. LR). SP points the top of the block routine stack and, 466 * following ppc calling conventions, the return address is saved in the 467 * previous (caller's) stack frame. 468 */ 469#define QUICKTHREADS_RETURN_INDEX ((QUICKTHREADS_BLOCKI_FRAME_SIZE+PPC_LR_SAVE)/sizeof(PPC_W)) 470 471/* static variable used to get the stack bottom in "VARGS" initialization */ 472static void *qt_sp_bottom_save; 473 474#define QUICKTHREADS_ARG_INDEX(i) ((QUICKTHREADS_BLOCKI_FRAME_SIZE+PPC_PAR(i))/sizeof(PPC_W)) 475 476/***************************************************************************** 477 478 QuickThreads needed definitions 479 480 *****************************************************************************/ 481 482 483#define QUICKTHREADS_GROW_DOWN 484#define QUICKTHREADS_STKALIGN PPC_STACK_INCR 485typedef PPC_W qt_word_t; 486 487 488/* This macro is used by "QUICKTHREADS_ARGS" to initialize a single argument thread. 489 * - set "qt_start" as the "qt_block" or "qt_blocki" return address; 490 * - set the top of the stack backchain; 491 * - set the next backchain (not needed, but just to be "clean"). 492 */ 493#define QUICKTHREADS_ARGS_MD(sp) \ 494 (QUICKTHREADS_SPUT (sp, QUICKTHREADS_RETURN_INDEX, qt_start), \ 495 QUICKTHREADS_SPUT (sp, 0, sp+QUICKTHREADS_BLOCKI_FRAME_SIZE), \ 496 QUICKTHREADS_SPUT (sp, QUICKTHREADS_BLOCKI_FRAME_SIZE/sizeof(PPC_W), \ 497 sp+QUICKTHREADS_BLOCKI_FRAME_SIZE+QUICKTHREADS_START_FRAME_SIZE)) 498 499 500/* This macro is used by "QUICKTHREADS_VARGS" to initialize a variant argument thread. 501 * It returns the pointer to the top of the argument list. 502 * We also use it to get the stack bottom via a static variable. This is a bit 503 * "dirty", it could be better to do it in "qt_vargs", but we don't want change 504 * anything out of this file. 505 * We need the stack bottom to allocate a local variable area used by 506 * "qt_vstart". 507 */ 508#define QUICKTHREADS_VARGS_MD0(sp, varbytes) \ 509 ((qt_sp_bottom_save = sp), \ 510 ((qt_t *)(((char *)(sp)) - \ 511 (QUICKTHREADS_VSTART_FRAME_SIZE(varbytes)-QUICKTHREADS_VSTART_LIST_BASE)))) 512 513 514/* This macro is used by "QUICKTHREADS_VARGS" to initialize a variant argument thread. 515 * - set "qt_start" as the "qt_block" or "qt_blocki" return address; 516 * - set the top of the stackback chain; 517 * - set the next backchain (it points the stack botton). 518 */ 519#define QUICKTHREADS_VARGS_MD1(sp) \ 520 (QUICKTHREADS_SPUT (sp, QUICKTHREADS_RETURN_INDEX, qt_vstart), \ 521 QUICKTHREADS_SPUT (sp, 0, sp+QUICKTHREADS_BLOCKI_FRAME_SIZE), \ 522 QUICKTHREADS_SPUT (sp, (QUICKTHREADS_BLOCKI_FRAME_SIZE)/sizeof(PPC_W), \ 523 qt_sp_bottom_save)) 524 525 526/* Activate "qt_vargs" as the initialization routine for the variant 527 * argument threads 528 */ 529#define QUICKTHREADS_VARGS_DEFAULT 530 531/* Override "qt_vargs" with "qt_vargs_stdarg". 532 * On LinuxPPC "qt_vargs" doesn't work, "qt_vargs_stdarg" uses a more 533 * standard way to retrieve arguments from the variant list. 534 */ 535#define QUICKTHREADS_VARGS(sp, nbytes, vargs, pt, startup, vuserf, cleanup) \ 536 ((qt_t *)qt_vargs_stdarg (sp, nbytes, vargs, pt, startup, vuserf, cleanup)) 537 538 539/* This macro is used by "QUICKTHREADS_ADJ(sp)" to get the stack top form the stack 540 * bottom during a single argument thread initialization. 541 * It is the space we need to allocate for a single argument thread: the stack 542 * frame for the block routine ("qt_block" or "qt_blocki") and for "qt_start". 543 */ 544#define QUICKTHREADS_STKBASE \ 545 (QUICKTHREADS_BLOCKI_FRAME_SIZE+QUICKTHREADS_START_FRAME_SIZE) 546 547/* This macro is used by "QUICKTHREADS_VADJ(sp)" to get the stack top from the base 548 * of the variant argument list during a variant argument thread initialization. 549 */ 550#define QUICKTHREADS_VSTKBASE (QUICKTHREADS_BLOCKI_FRAME_SIZE+QUICKTHREADS_VSTART_LIST_BASE) 551 552/* The *index* (positive offset) of where to put each value. */ 553 554#define QUICKTHREADS_ARGU_INDEX QUICKTHREADS_ARG_INDEX(0) 555#define QUICKTHREADS_ARGT_INDEX QUICKTHREADS_ARG_INDEX(1) 556#define QUICKTHREADS_USER_INDEX QUICKTHREADS_ARG_INDEX(2) 557#define QUICKTHREADS_ONLY_INDEX QUICKTHREADS_ARG_INDEX(3) 558 559 560#define QUICKTHREADS_VARGT_INDEX QUICKTHREADS_ARG_INDEX(0) 561#define QUICKTHREADS_VSTARTUP_INDEX QUICKTHREADS_ARG_INDEX(1) 562#define QUICKTHREADS_VUSERF_INDEX QUICKTHREADS_ARG_INDEX(2) 563#define QUICKTHREADS_VCLEANUP_INDEX QUICKTHREADS_ARG_INDEX(3) 564 565#endif /* ndef QUICKTHREADS_POWERPC_H */ 566 567