1/*
2   + * QuickThreads -- Threads-building toolkit.
3   + * Copyright (c) 1993 by David Keppel
4   + *
5   + * Permission to use, copy, modify and distribute this software and
6   + * its documentation for any purpose and without fee is hereby
7   + * granted, provided that the above copyright notice and this notice
8   + * appear in all copies.  This software is provided as a
9   + * proof-of-concept and for demonstration purposes; there is no
10   + * representation about the suitability of this software for any
11   + * purpose.
12   + *
13
14   + * PowerPC-Mach thread switching module.
15   + *
16   + * This software is largely based on the original PowerPC-Linux porting
17   + * developed by Ken Aaker <kenaaker@silverbacksystems.com>
18   + *
19   + * Marco Bucci <marco.bucci@inwind.it>
20   + * December 2002
21   + *
22   + */
23
24
25#ifndef QUICKTHREADS_POWERPC_H
26#define QUICKTHREADS_POWERPC_H
27
28
29/*****************************************************************************
30 *
31 * DESCRIPTION
32 *
33 * This is the QuickThreads switching module implementation for PowerPC
34 * running under Mach kernel. It was developed and tested under MacOS X, that
35 * is under Darwin (the UNIX-BSD fundation of MacOS X).
36 *
37 * Notice that the Mach PowerPC ABI (Application Binary Interface) [1] is
38 * not the same than System V ABI [2] used by most of the LINUX PowerPC
39 * implementations.
40 *
41 * IMPLEMENTATION NOTES
42 *
43 * 1) Porting on System V ABI
44 * Excluding the variant argument calling convention, Mach and System V ABI
45 * are enough similar and it could be possible to use some simple macro, to
46 * adapt the code for both the ABIs. Actually, the only relevant difference
47 * is in the linkage area structure and in the position where the Link and
48 * the Condition registers are saved. As to the calling conventions, there
49 * are differences with floating points argument passing and with variant
50 * argument lists. Notice that, on Mach, the caller's stack frame allocates
51 * space to hold all arguments ([1] p.51), while on System V, the caller's
52 * stack frame allocates space to hold just the arguments that don't fit into
53 * registers ([2] p.3.18).
54 *
55 * 2) Variant argument list implementation
56 * Variant argument calling on a RISC machine is not easy to implement since
57 * parameters are passed via registers instead of via stack. In a general
58 * variant argument implementation, the caller's stack must map the whole
59 * parameter list following the rules related to the use of the GPR and FPR
60 * parameter registers and the stack alignment ([1] p.54).
61 * This implementation is quite simple and not general. It works under the
62 * hypothesis that arguments are 4-bytes aligned integers.
63 *
64 * 3) This heather file organisation
65 * I preferred to not make confusion between macros that are needed (i.e.
66 * directly used) by QuickThreads and internal "implementation" macros. You
67 * will find QuickThreds macros in the end of this header. Sometime they just
68 * refer to an analogous "internal" macro. On the top, there are the macros
69 * that I used to make more clean (I hope) the implementation. I could include
70 * some system heather (as to stack layout definitions, prologs and epilogs,
71 * etc.), but I preferred to have a self-contained heather in order to make
72 * all more clear for mantaining and for possible porting on another ABI.
73 *
74 *
75 * REFERENCES
76 *
77 * [1] - Mach-O Runtime Architecture
78 *       Runtime Concepts and Conventions for Mac OS X Programs
79 *       Preliminary July 2002
80 *
81 * [2] - SYSTEM V APPLICATION BINARY INTERFACE
82 *       PowerPC Processor Supplement
83 *       September 1995
84 *
85 * On MacOS X, more documentation is available by installing the "Developer
86 * Tools". Useful macros and documentation can be found in the system headers
87 * files such as asm.h, asm_help.h etc. (see /usr/architecture/ppc/ or
88 * /System/Library/Frameworks/Kernel.framework/Headers/architecture/ppc/).
89
90 *****************************************************************************/
91
92/*****************************************************************************
93 *
94 *  PowerPC Mach-O Stack frame (see [1])
95 *
96
97                      ................
98                +                          +
99                |                          | reserved
100                +  CALLER'S LINKAGE AREA   +
101                |                          | Caller's LR
102                +                          +
103                |                          | Caller's CR
104                +                          +
105 backchain ->   |                          | Caller's backchain
106                +==========================+
107                |                          | FPR31
108                +      FPR SAVE AREA       +
109                       ..............
110                +                          +
111                |                          | FPRn
112                +--------------------------+
113                |                          | GPR31
114                +      GPR SAVE AREA       +
115                       ..............
116                +                          +
117                |                          | GPRn
118                +--------------------------+
119                |                          |
120                +      ALIGNMEBNT PAD      +
121                       ..............
122                +       (if needed)        +
123                |                          |
124                +--------------------------+
125                |                          |
126                +   LOCAL VARIABLES AREA   +
127                       ..............
128                +                          +
129                |                          |
130                +--------------------------+
131                |                          | PAR(n)
132                +                          +
133                |                          |
134                +      PARAMETER AREA      +
135                       ..............
136                +      for FUTURE call     +
137                |                          | PAR(1)
138                +                          +
139 SP + 24 ->     |                          | PAR(0)
140                +--------------------------+
141 SP + 20 ->     |                          | Caller's TOC
142                +                          +
143 SP + 16 ->     |                          | reserved
144                +                          +
145 SP + 12 ->     |                          | reserved
146                +       LINKAGE AREA       +
147 SP + 8 ->      |                          | LR callee-save for FUTURE call
148                +                          +
149 SP + 4 ->      |                          | CR callee-save for FUTURE call
150                +                          +
151 SP ->          |                          | backchain
152                +==========================+
153                STACK TOP (lower address)
154
155                     Stack grows down
156                             |
157                             V
158 * NOTE:
159 *
160 * 1) Parameter are allocated in the CALLER's parameter area. This area must
161 * be large enough to hold all parameters regardless if they are or not passed
162 * in registers.
163 *
164 * The caller parameter area is used:
165 * - by the caller, to store parameters to the callee that cannot fit in
166 *  registers (no more parameter registers are available);
167 * - by the callee, to save parameter registers (for istance because they are
168 * needed for a further call).
169 *
170 * Obviously, the callee saves parameter registers, in the location in which
171 * they are mapped on the caller's stack frame. So, be aware that, if
172 * something else is stored in that location, it could be deleted after a call.
173 *
174 * 2) The callee saves LR and CR in the caller's linkage area. All other
175 * callee's state are saved in its own stack frame.
176 *
177
178 *****************************************************************************/
179
180
181/*****************************************************************************
182 *
183 * Stack initialization for a single argument thread
184 *
185
186
187 top + QUICKTHREADS_STKBASE ->           STACK BOTTOM (higher address)
188                               +==========================+
189                               |                          |
190                               +                          +
191                                     ..............
192                               +                          +
193                               |                          |
194                               +--------------------------+
195 top + QUICKTHREADS_ONLY_INDEX * 4 ->    | only param               | PAR(3)
196                               +                          +
197 top + QUICKTHREADS_USER_INDEX * 4 ->    | userf param              | PAR(2)
198                               +                          +
199 top + QUICKTHREADS_ARGT_INDEX * 4 ->    | t param                  | PAR(1)
200                               +                          +
201 top + QUICKTHREADS_ARGU_INDEX * 4 ->    | u param                  | PAR(0)
202                               +--------------------------+
203                               |                          |
204                               +                          +
205                                     ..............
206                               +                          +
207 top + QUICKTHREADS_RETURN_INDEX * 4 ->  | qt_start                 | LR save
208                               +                          +
209                                     ..............
210                               +                          +
211 top + QUICKTHREADS_BLOCKI_FRAME_SIZE -> | top + QUICKTHREADS_STKBASE         | backchain
212                               +==========================+
213                               |                          |
214                               +                          +
215                                     ..............
216                               +                          +
217                               |                          |
218                               +--------------------------+
219                               |                          |
220                               +                          +
221                                     ..............
222                               +                          +
223 top ->                        |top + QUICKTHREADS_BLOCKI_FRAME_SIZE| backchain
224                               +==========================+
225                               STACK TOP (lower address)
226
227                                    Stack grows down
228                                           |
229                                           V
230
231 *****************************************************************************
232 *
233 * Stack initialization for a variant argument thread
234 *
235
236 bottom ->                     STACK BOTTOM (higher address)
237                               +==========================+
238                               |                          |
239                               +                          +
240                                     ..............
241                               +                          +
242 top + QUICKTHREADS_VSTKBASE ->          | arg(0)                   | PAR(4)
243                               +--------------------------+
244 top + QUICKTHREADS_CLEANUP_INDEX * 4 -> | cleanup param            | PAR(3)
245                               +                          +
246 top + QUICKTHREADS_USER_INDEX * 4 ->    | userf param              | PAR(2)
247                               +                          +
248 top + QUICKTHREADS_VSTARTUP_INDEX * 4 ->| startup param            | PAR(1)
249                               +                          +
250 top + QUICKTHREADS_ARGT_INDEX * 4 ->    | t param                  | PAR(0)
251                               +--------------------------+
252                               |                          |
253                               +                          +
254                                     ..............
255                               +                          +
256 top + QUICKTHREADS_RETURN_INDEX * 4 ->  | qt_start                 | LR save
257                               +                          +
258                                     ..............
259 top + QUICKTHREADS_BLOCKI_FRAME_SIZE -> | top + QUICKTHREADS_STKBASE         | backchain
260                               +==========================+
261                               |                          |
262                               +                          +
263                                     ..............
264                               +                          +
265                               |                          |
266                               +--------------------------+
267                               |                          |
268                               +                          +
269                                     ..............
270                               +                          +
271 top ->                        |top + QUICKTHREADS_BLOCKI_FRAME_SIZE| backchain
272                               +==========================+
273                               STACK TOP (lower address)
274
275                                    Stack grows down
276                                          |
277                                          V
278
279* NOTE:
280*
281* Parameters are passed to "qt_start" or to "qt_vstart" putting them into
282* the stack frames of "qt_start" or "qt_vstart" themselves. This not a
283* conventional parameter passing because parameters should be put into the
284* caller's stack, not into the callee's one. Actually  we must consider
285* that as a preload of the parameter area that "qt_start" or "qt_vstart"
286* will use for their own calls.
287*  Be aware of the fact that, during a call, the caller's parameter area is,
288* in a certain sense, volatile. In facts, the callee can save parameter
289* registers on the caller's parameter area.
290*
291 *****************************************************************************/
292
293
294/*****************************************************************************
295
296   Define PowerPC Mach-O related macros
297
298 *****************************************************************************/
299
300
301
302typedef unsigned long PPC_W;
303
304/* Stack pointer must always be a multiple of 16 */
305#define	PPC_STACK_INCR	16
306#define	PPC_ROUND_STACK(length)	\
307	(((length)+PPC_STACK_INCR-1) & ~(PPC_STACK_INCR-1))
308
309
310#define PPC_LINKAGE_AREA 24
311#define PPC_CR_SAVE 4
312#define PPC_LR_SAVE 8
313
314#define PPC_PARAM_AREA(n) (4*(n))
315
316#define PPC_GPR_SAVE_AREA (4*19)		/* GPR13-GPR31 must be saved */
317#define PPC_FPR_SAVE_AREA (8*18)		/* FPR14-FPR31 must be saved */
318
319/* Define parameter offset on the stack.
320 * NOTICE: Parameters are numbered 0, 1, ..., n.
321*/
322#define PPC_PAR(i) (PPC_LINKAGE_AREA+(i)*4)
323
324/*****************************************************************************
325
326   Define stack frames
327
328 *****************************************************************************/
329
330
331/* Define the "qt_blocki" and "qt_abort" stack frame. We use the same stack
332 * frame for both.
333 *
334
335 top + S ->
336                        +==========================+
337 top + S - 4 ->         |                          | GPR31
338                        +      GPR SAVE AREA       +
339                               ..............
340                        +                          +
341 top + S - 19 * 4 ->    |                          | GPR13
342                        +--------------------------+
343                        |                          |
344                        +      ALIGNMEBNT PAD      +
345                               ..............
346                        +       (if needed)        +
347                        |                          |
348                        +--------------------------+
349                        |                          |
350                        +                          +
351                        |                          |
352                        +      PARAMETER AREA      +
353                        |                          |
354                        +                          +
355 top + 24 ->            |                          |
356                        +--------------------------+
357                        |                          |
358                        +       LINKAGE AREA       +
359 top ->                 |                          |
360                        +==========================+
361 */
362
363#define QUICKTHREADS_BLOCKI_FRAME_SIZE \
364	PPC_ROUND_STACK(PPC_LINKAGE_AREA+PPC_PARAM_AREA(4)+PPC_GPR_SAVE_AREA)
365
366/* Offset to the base of the GPR save area. Save from GPR13 to GPR31
367 * increasing address.
368 */
369#define QUICKTHREADS_BLOCKI_GPR_SAVE(i) (QUICKTHREADS_BLOCKI_FRAME_SIZE-4+(i-31)*4)
370
371
372
373/* Define the "qt_block" stack frame. Notice that since "qt_black" calls
374 * "qt_blocki", GPR registers are saved into "qt_blocki" stack frame.
375 *
376
377 top + S ->
378                        +==========================+
379 top + S - 8 ->         |                          | FPR31
380                        +      FPR SAVE AREA       +
381                               ..............
382                        +                          +
383 top + S - 18 * 8 ->    |                          | FPR14
384                        +--------------------------+
385                        |                          |
386                        +      ALIGNMEBNT PAD      +
387                               ..............
388                        +       (if needed)        +
389                        |                          |
390                        +--------------------------+
391                        |                          |
392                        +                          +
393                        |                          |
394                        +      PARAMETER AREA      +
395                        |                          |
396                        +                          +
397 top + 24 ->            |                          |
398                        +--------------------------+
399                        |                          |
400                        +       LINKAGE AREA       +
401 top ->                 |                          |
402                        +==========================+
403 */
404
405#define QUICKTHREADS_BLOCK_FRAME_SIZE \
406	PPC_ROUND_STACK(PPC_LINKAGE_AREA+PPC_PARAM_AREA(4)+PPC_FPR_SAVE_AREA)
407
408/* Offset to the location where registers are saved.
409 */
410#define QUICKTHREADS_BLOCK_FPR_SAVE(i) (QUICKTHREADS_BLOCK_FRAME_SIZE-8+(i-31)*8)
411
412
413/* Define the "qt_start" frame size. It consists just of the linkage area and
414 * the parameter area.
415 *
416
417                        +==========================+
418                        |                          |
419                        +      ALIGNMEBNT PAD      +
420                               ..............
421                        +       (if needed)        +
422                        |                          |
423                        +--------------------------+
424                        |                          | only par
425                        +                          +
426                        |                          | userf par
427                        +      PARAMETER AREA      +
428                        |                          | t par
429                        +                          +
430 top + 24 ->            |                          | u par
431                        +--------------------------+
432                        |                          |
433                        +       LINKAGE AREA       +
434 top ->                 |                          |
435                        +==========================+
436
437 */
438#define QUICKTHREADS_START_FRAME_SIZE PPC_ROUND_STACK(PPC_LINKAGE_AREA+PPC_PARAM_AREA(4))
439
440
441
442/* Define the "qt_vstart" frame. It consists of the linkage area, the fix parameter
443 * area, the variant argument list and a local variable area used in "qt_vstart"
444 * implementation.
445 *
446
447 backchain ->
448                        +==========================+
449 backchain - 4 ->       |                          |
450                        +   LOCAL VARIABLES AREA   +
451                               ..............
452                        +                          +
453                        |                          |
454                        +--------------------------+
455                        |                          |
456                        +      ALIGNMEBNT PAD      +
457                               ..............
458                        +       (if needed)        +
459                        |                          |
460                        +--------------------------+
461                        |                          | arg(n)
462                        +                          +
463                        |                          |
464                        +  VARIABLE ARGUMENT LIST  +
465                               ..............
466                        +      for userf call      +
467                        |                          | arg(1)
468                        +                          +
469 top + 24 + 16 ->       |                          | arg(0)
470                        +--------------------------+
471                        |                          | cleanup par
472                        +                          +
473                        |                          | userf par
474                        +      PARAMETER AREA      +
475                        |                          | startup par
476                        +                          +
477 top + 24 ->            |                          | t par
478                        +--------------------------+
479                        |                          |
480                        +       LINKAGE AREA       +
481 top ->                 |                          |
482                        +==========================+
483
484 */
485#define QUICKTHREADS_VARGS_LOCAL_AREA (4*4)		/* local variable area */
486
487/* The offset the stack will be moved back before to call "userf(...)".
488 * The linckage area must be moved to be adiacent to the part of the variant
489 * argument list that is in the stack.
490 */
491#define QUICKTHREADS_VARGS_BKOFF PPC_PARAM_AREA(4)
492
493#define QUICKTHREADS_VSTART_FRAME_SIZE(varbytes) \
494	PPC_ROUND_STACK(PPC_LINKAGE_AREA+PPC_PARAM_AREA(4)+(varbytes)+ \
495		QUICKTHREADS_VARGS_LOCAL_AREA)
496
497/* Offset to the base of the varian argument list */
498#define QUICKTHREADS_VSTART_LIST_BASE (PPC_LINKAGE_AREA+PPC_PARAM_AREA(4))
499
500
501/* Notice that qt_start and qt_vstart have no parameters, actually their
502 * parameters are written in their stack frame during thread initialization
503 */
504extern void qt_start(void);
505extern void qt_vstart(void);
506
507
508
509/* Offset (in words) of the location where the block routine saves its return
510 * address (i.e. LR). SP points the top of the block routine stack and,
511 * following ppc calling conventions, the return address is saved in the
512 * previous (caller's) stack frame.
513 */
514#define QUICKTHREADS_RETURN_INDEX ((QUICKTHREADS_BLOCKI_FRAME_SIZE+PPC_LR_SAVE)/sizeof(PPC_W))
515
516/* static variable used to get the stack bottom in "VARGS" initialization */
517/* static void *qt_sp_bottom_save; */
518
519#define QUICKTHREADS_ARG_INDEX(i) ((QUICKTHREADS_BLOCKI_FRAME_SIZE+PPC_PAR(i))/sizeof(PPC_W))
520
521/*****************************************************************************
522
523	QuickThreads needed definitions
524
525 *****************************************************************************/
526
527
528#define QUICKTHREADS_GROW_DOWN
529#define QUICKTHREADS_STKALIGN	PPC_STACK_INCR
530typedef PPC_W qt_word_t;
531
532
533/* This macro is used by "QUICKTHREADS_ARGS" to initialize a single argument thread.
534 * - set "qt_start" as the "qt_block" or "qt_blocki" return address;
535 * - set the top of the stack backchain;
536 * - set the next backchain (not needed, but just to be "clean").
537 */
538#define QUICKTHREADS_ARGS_MD(sp) \
539	(QUICKTHREADS_SPUT (sp, QUICKTHREADS_RETURN_INDEX, qt_start), \
540	QUICKTHREADS_SPUT (sp, 0, sp+QUICKTHREADS_BLOCKI_FRAME_SIZE), \
541	QUICKTHREADS_SPUT (sp, QUICKTHREADS_BLOCKI_FRAME_SIZE/sizeof(PPC_W), \
542		sp+QUICKTHREADS_BLOCKI_FRAME_SIZE+QUICKTHREADS_START_FRAME_SIZE))
543
544
545/* This macro is used by "QUICKTHREADS_VARGS" to initialize a variant argument thread.
546 * It returns the pointer to the top of the argument list.
547 * We also use it to get the stack bottom via a static variable. This is a bit
548 * "dirty", it could be better to do it in "qt_vargs", but we don't want change
549 * anything out of this file.
550 * We need the stack bottom to allocate a local variable area used by
551 * "qt_vstart".
552 */
553#define QUICKTHREADS_VARGS_MD0(sp, varbytes) \
554  ((qt_sp_bottom_save = sp), \
555  ((qt_t *)(((char *)(sp)) - \
556		(QUICKTHREADS_VSTART_FRAME_SIZE(varbytes)-QUICKTHREADS_VSTART_LIST_BASE))))
557
558
559/* This macro is used by "QUICKTHREADS_VARGS" to initialize a variant argument thread.
560 * - set "qt_start" as the "qt_block" or "qt_blocki" return address;
561 * - set the top of the stackback chain;
562 * - set the next backchain (it points the stack botton).
563 */
564#define QUICKTHREADS_VARGS_MD1(sp) \
565	(QUICKTHREADS_SPUT (sp, QUICKTHREADS_RETURN_INDEX, qt_vstart), \
566	QUICKTHREADS_SPUT (sp, 0, sp+QUICKTHREADS_BLOCKI_FRAME_SIZE), \
567	QUICKTHREADS_SPUT (sp, (QUICKTHREADS_BLOCKI_FRAME_SIZE)/sizeof(PPC_W), \
568		qt_sp_bottom_save))
569
570
571/* Activate "qt_vargs" as the initialization routine for the variant
572 * argument threads
573 */
574#define QUICKTHREADS_VARGS_DEFAULT
575
576/* Override "qt_vargs" with "qt_vargs_stdarg".
577 * On LinuxPPC "qt_vargs" doesn't work, "qt_vargs_stdarg" uses a more
578 * standard way to retrieve arguments from the variant list.
579 */
580#define QUICKTHREADS_VARGS(sp, nbytes, vargs, pt, startup, vuserf, cleanup) \
581      ((qt_t *)qt_vargs_stdarg (sp, nbytes, vargs, pt, startup, vuserf, cleanup))
582
583
584/* This macro is used by "QUICKTHREADS_ADJ(sp)" to get the stack top form the stack
585 * bottom during a single argument thread initialization.
586 * It is the space we need to allocate for a single argument thread: the stack
587 * frame for the block routine ("qt_block" or "qt_blocki") and for "qt_start".
588 */
589#define QUICKTHREADS_STKBASE \
590	(QUICKTHREADS_BLOCKI_FRAME_SIZE+QUICKTHREADS_START_FRAME_SIZE)
591
592/* This macro is used by "QUICKTHREADS_VADJ(sp)" to get the stack top from the base
593 * of the variant argument list during a variant argument thread initialization.
594 */
595#define QUICKTHREADS_VSTKBASE	(QUICKTHREADS_BLOCKI_FRAME_SIZE+QUICKTHREADS_VSTART_LIST_BASE)
596
597/* The *index* (positive offset) of where to put each value. */
598
599#define QUICKTHREADS_ARGU_INDEX	QUICKTHREADS_ARG_INDEX(0)
600#define QUICKTHREADS_ARGT_INDEX	QUICKTHREADS_ARG_INDEX(1)
601#define QUICKTHREADS_USER_INDEX	QUICKTHREADS_ARG_INDEX(2)
602#define QUICKTHREADS_ONLY_INDEX	QUICKTHREADS_ARG_INDEX(3)
603
604
605#define QUICKTHREADS_VARGT_INDEX		QUICKTHREADS_ARG_INDEX(0)
606#define QUICKTHREADS_VSTARTUP_INDEX	QUICKTHREADS_ARG_INDEX(1)
607#define QUICKTHREADS_VUSERF_INDEX		QUICKTHREADS_ARG_INDEX(2)
608#define QUICKTHREADS_VCLEANUP_INDEX	QUICKTHREADS_ARG_INDEX(3)
609
610#endif /* ndef QUICKTHREADS_POWERPC_H */
611
612