1/* powerpc_mach.s -- assembly support. */
2
3/*
4 * QuickThreads -- Threads-building toolkit.
5 * Copyright (c) 1993 by David Keppel
6 *
7 * Permission to use, copy, modify and distribute this software and
8 * its documentation for any purpose and without fee is hereby
9 * granted, provided that the above copyright notice and this notice
10 * appear in all copies.  This software is provided as a
11 * proof-of-concept and for demonstration purposes; there is no
12 * representation about the suitability of this software for any
13 * purpose.
14
15
16 * PowerPC-Mach thread switching module.
17 * Darwin (MacOS X) assembly
18 *
19 * NOTICE: Syntax for register names is not the GNU one. Register are
20 * named "rx" and "fx", not "%rx" and "%fx" as usual for the GNU "as" tool.
21 * Darwin "as" tool is based on GNU "as" but follows the "official" PowerPC
22 * syntax.
23 *
24 *
25 * This software is largely based on the original PowerPC-Linux porting
26 * developed by Ken Aaker <kenaaker@silverbacksystems.com>
27 *
28 * Marco Bucci <marco.bucci@inwind.it>
29 * December 2002
30 *
31 */
32
33
34/*
35 *
36 * PowerPC Register convections:
37 *
38 *  r0			volatile
39 *  r1			SP
40 *  r2			system reserved
41 *  r3-r4		volatile for parameter passing and function return
42 *  r5-r10		volatile for parameter passing
43 *  r11-r12		volatile
44 *  r13-r14		non volatile registers
45 *  f0			volatile
46 *  f1			volatile for parameter passing and function return
47 *  f2-f13		volatile for parameter passing
48 *  f14-f31		non volatile
49 *
50 *  cr2-cr4		non volatile
51 *
52 *
53 * See on the heather file for more documentation.
54 *
55 *
56 *
57 * IMPLEMENTATION NOTES
58 *
59 *
60 * 1) Condition register saving
61 * On most machines, the condition code register is caller-save.
62 * On the PPC, the condition code register is callee-save, so the
63 * thread context switch must preserve it.
64 *
65 *
66 * 2) Floating point registers saving
67 * On resuming a thread, floating point registers are or not restored just
68 * depending on which block routine suspended the thread (i.e. regardless
69 * whether "qt_block", "qt_blocki" or "qt_abort" is used to resume it).
70 * This behaviour is obtained by implementing "qt_block" by means af a nested
71 * call to "qt_blocki". As a result, the blocking of a thread always goes
72 * and returns through "qt_blocki and, if a thread was blocked by "qt_block",
73 * its execution resumes from the floating point restoring code on exit
74 * of "qt_block".
75 *
76 * Thanks to David Keppel that explained me this "simple" trick.
77 *
78 *
79 * 3) C languace code debugging
80 * This software was developed and debugged using the Metrowerks
81 * Code Warrior PPC integrated assembler. It can be still used with the
82 * Code Warrior compiler by means of the file "powerpc_mach_asm_debug.c"
83 * that include it.
84 * In order to avoid "copy and paste" bugs, and make easyer the maintaining,
85 * I made the minimal changes, so you can find some strange code as:
86 *
87 *   #if 0
88 *   .if 0
89 *      C code here
90 *   .endif
91 *   #endif
92 *
93 * This is just to embed some C code that is needed by the Code Warrior
94 * integrated assembler.
95 *
96 *
97 * 4) Assembly constants generation
98 * Constants used in the assembly code are generated by running
99 * the C code in the sequel (commented). It uses the C macros declared in
100 * the C heather in order to guarantee that the C interface and the assebly
101 * code are "aligned". I avoided the use of an assebler preprocessor since
102 * they are not so standard and moreover using macro espressions makes the
103 * assembly debugging more difficult.
104 *
105 *
106
107
108#include <iostream>
109#include "powerpc_mach.h"
110
111int main()
112{
113	using namespace std;
114
115	int i;
116
117	cout << ".set LR_SAVE, " << PPC_LR_SAVE << endl;
118	cout << ".set CR_SAVE, " << PPC_CR_SAVE << endl;
119	cout << ".set BLOCKI_FSIZE, " << QUICKTHREADS_BLOCKI_FRAME_SIZE << endl;
120	cout << ".set BLOCK_FSIZE, " << QUICKTHREADS_BLOCK_FRAME_SIZE << endl;
121
122	cout << endl;
123	for(i=0; i<12; i++)
124	 	cout << ".set PAR_" << i << ", " << PPC_PAR(i) << endl;
125
126	cout << endl;
127	i = 13;
128	cout << ".set GPR_SAVE_" << i << ", " << QUICKTHREADS_BLOCKI_GPR_SAVE(i) << endl;
129
130	cout << endl;
131	for(i=31; i>13; i--)
132	 	cout << ".set FPR_SAVE_" << i << ", " << QUICKTHREADS_BLOCK_FPR_SAVE(i) << endl;
133
134	cout << endl;
135	cout << ".set VARGS_BKOFF, " << QUICKTHREADS_VARGS_BKOFF << endl;
136
137
138	cout << endl << endl << endl;
139
140	for(i=31; i>13; i--)
141	 	cout << "\tstfd\tf" << i << ",FPR_SAVE_" << i << "(r1)" << endl;
142
143	cout << endl;
144	for(i=31; i>13; i--)
145	 	cout << "\tlfd \tf" << i << ",FPR_SAVE_" << i << "(r1)" << endl;
146
147	cout << endl << endl << endl;
148
149
150	return 0;
151}
152
153
154 *
155 *
156 *
157 */
158
159
160#if 0
161
162	.text
163	.align 4
164
165	.globl qt_block
166	.globl _qt_block
167	.globl qt_blocki
168	.globl _qt_blocki
169	.globl qt_abort
170	.globl _qt_abort
171	.globl qt_start
172	.globl _qt_start
173	.globl qt_vstart
174	.globl _qt_vstart
175
176
177.set LR_SAVE, 8
178.set CR_SAVE, 4
179.set BLOCKI_FSIZE, 128
180.set BLOCK_FSIZE, 192
181
182.set PAR_0, 24
183.set PAR_1, 28
184.set PAR_2, 32
185.set PAR_3, 36
186.set PAR_4, 40
187.set PAR_5, 44
188.set PAR_6, 48
189.set PAR_7, 52
190.set PAR_8, 56
191.set PAR_9, 60
192.set PAR_10, 64
193.set PAR_11, 68
194
195.set GPR_SAVE_13, 52
196
197.set FPR_SAVE_31, 184
198.set FPR_SAVE_30, 176
199.set FPR_SAVE_29, 168
200.set FPR_SAVE_28, 160
201.set FPR_SAVE_27, 152
202.set FPR_SAVE_26, 144
203.set FPR_SAVE_25, 136
204.set FPR_SAVE_24, 128
205.set FPR_SAVE_23, 120
206.set FPR_SAVE_22, 112
207.set FPR_SAVE_21, 104
208.set FPR_SAVE_20, 96
209.set FPR_SAVE_19, 88
210.set FPR_SAVE_18, 80
211.set FPR_SAVE_17, 72
212.set FPR_SAVE_16, 64
213.set FPR_SAVE_15, 56
214.set FPR_SAVE_14, 48
215
216
217/* various offsets used by "qt_varg" */
218.set P_T, PAR_0
219.set P_STARTUP, PAR_1
220.set P_USERF, PAR_2
221.set P_CLEANUP, PAR_3
222		/* the offset used to move back the linkage area to be adiacent to
223		 * the variant argument list  before  calling "userf(...) */
224.set VARGS_BKOFF, 16		/* skip "t", "startup", "userf" and "cleanup" */
225
226		/* location where "t" and "cleanup" are saved (with respect of
227		 * the stack frame base) */
228.set P_T_SAVE, -4
229.set P_CLEANUP_SAVE, -8
230
231#endif
232
233
234
235/* Block the current thread saving all integer non volatile registers and
236 * start a new thread.
237 */
238#if 0
239.if 0
240#endif
241void *qt_blocki (void *helper, void *a0, void *a1, void *newthread);
242asm void *qt_blocki (void *helper, void *a0, void *a1, void *newthread)
243{
244#if 0
245.endif
246#endif
247
248#if 0
249qt_blocki:
250_qt_blocki:
251#endif
252/* prolog code */
253	stwu	r1,-BLOCKI_FSIZE(r1)		/* allocate the stack frame */
254	mflr	r0							/* return addr in r0 */
255	mfcr	r11							/* CR in r11 */
256	stw		r0,LR_SAVE+BLOCKI_FSIZE(r1)	/* save return addr in the stack */
257	stw		r11,CR_SAVE+BLOCKI_FSIZE(r1)	/* save CR in the stack */
258	stmw	r13,GPR_SAVE_13(r1)	 		/* save non-volatile reg */
259
260/* call helper(qt_t *old, void *a0, void *a1) */
261	mtlr	r3				/* "helper" addr in the link reg */
262	mr		r3,r1			/* current thread (i.e. the SP) in arg "old" */
263	mr		r1,r6         	/* swap to the new thread (i.e. to its SP) */
264	blrl								/* jump to "helper" */
265/* the "helper" return value is returned (since r3 is not changed) */
266
267/* epilog code: return to the new thread's "qt_blocki" caller */
268	lmw     r13,GPR_SAVE_13(r1)			/* restore non-volatile reg */
269	lwz		r0,LR_SAVE+BLOCKI_FSIZE(r1)	/* recover return addr */
270	lwz		r11,CR_SAVE+BLOCKI_FSIZE(r1)	/* recover CR */
271	mtlr	r0							/* return address in the link reg */
272	mtcr	r11							/* restore CR */
273	addi    r1,r1,BLOCKI_FSIZE			/* free the stack frame */
274	blr									/* return */
275
276#if 0
277.if 0
278#endif
279}
280#if 0
281.endif
282#endif
283
284
285
286/* Abort the current thread and start a new thread.
287 */
288#if 0
289.if 0
290#endif
291void qt_abort (void *helper, void *a0, void *a1, void *newthread);
292asm void qt_abort (void *helper, void *a0, void *a1, void *newthread)
293{
294#if 0
295.endif
296#endif
297
298#if 0
299qt_abort:
300_qt_abort:
301#endif
302/* prolog code */
303/* there is no prolog. It will never come back */
304
305/* call helper(qt_t *old, void *a0, void *a1) */
306	mtlr	r3					/* "helper" addr in the link reg */
307	mr		r1,r6         		/* swap to the new thread (i.e. to its SP) */
308/* we don't need to set "old", we can pass just garbage. Actually, since r3
309 is not changed, "old" is set to "helper" (don't care) */
310	blrl								/* call "helper" */
311/* the "helper" return value is returned (since r3 is not changed) */
312
313/* epilog code: return to the new thread's "qt_blocki" caller */
314	lmw     r13,GPR_SAVE_13(r1)			/* restore non-volatile reg */
315	lwz		r0,LR_SAVE+BLOCKI_FSIZE(r1)	/* recover return addr */
316	lwz		r11,CR_SAVE+BLOCKI_FSIZE(r1)	/* recover CR */
317	mtlr	r0							/* return address in the link reg */
318	mtcr	r11							/* restore CR */
319	addi    r1,r1,BLOCKI_FSIZE			/* free the stack frame */
320	blr									/* return */
321
322#if 0
323.if 0
324#endif
325}
326#if 0
327.endif
328#endif
329
330
331
332/* Block the current thread saving all non volatile registers and start
333 * a new thread.
334 */
335#if 0
336.if 0
337#endif
338void *qt_block (void *helper, void *a0, void *a1, void *newthread);
339asm void *qt_block (void *helper, void *a0, void *a1, void *newthread)
340{
341#if 0
342.endif
343#endif
344
345# if 0
346qt_block:
347_qt_block:
348#endif
349/* prolog code */
350	stwu	r1,-BLOCK_FSIZE(r1)			/* allocate the stack frame */
351	mflr	r0							/* return addr in r0 */
352	stw		r0,LR_SAVE+BLOCK_FSIZE(r1)	/* save return addr in the stack */
353
354/* save non-volatile fp reg */
355    stfd    f31,FPR_SAVE_31(r1)
356    stfd    f30,FPR_SAVE_30(r1)
357    stfd    f29,FPR_SAVE_29(r1)
358    stfd    f28,FPR_SAVE_28(r1)
359    stfd    f27,FPR_SAVE_27(r1)
360    stfd    f26,FPR_SAVE_26(r1)
361    stfd    f25,FPR_SAVE_25(r1)
362    stfd    f24,FPR_SAVE_24(r1)
363    stfd    f23,FPR_SAVE_23(r1)
364    stfd    f22,FPR_SAVE_22(r1)
365    stfd    f21,FPR_SAVE_21(r1)
366    stfd    f20,FPR_SAVE_20(r1)
367    stfd    f19,FPR_SAVE_19(r1)
368    stfd    f18,FPR_SAVE_18(r1)
369    stfd    f17,FPR_SAVE_17(r1)
370    stfd    f16,FPR_SAVE_16(r1)
371    stfd    f15,FPR_SAVE_15(r1)
372    stfd    f14,FPR_SAVE_14(r1)
373/* block the thread */
374	bl		qt_blocki
375/* the thread is going to be resumed */
376/* restore non-volatile fp reg */
377    lfd     f31,FPR_SAVE_31(r1)
378    lfd     f30,FPR_SAVE_30(r1)
379    lfd     f29,FPR_SAVE_29(r1)
380    lfd     f28,FPR_SAVE_28(r1)
381    lfd     f27,FPR_SAVE_27(r1)
382    lfd     f26,FPR_SAVE_26(r1)
383    lfd     f25,FPR_SAVE_25(r1)
384    lfd     f24,FPR_SAVE_24(r1)
385    lfd     f23,FPR_SAVE_23(r1)
386    lfd     f22,FPR_SAVE_22(r1)
387    lfd     f21,FPR_SAVE_21(r1)
388    lfd     f20,FPR_SAVE_20(r1)
389    lfd     f19,FPR_SAVE_19(r1)
390    lfd     f18,FPR_SAVE_18(r1)
391    lfd     f17,FPR_SAVE_17(r1)
392    lfd     f16,FPR_SAVE_16(r1)
393    lfd     f15,FPR_SAVE_15(r1)
394    lfd     f14,FPR_SAVE_14(r1)
395
396	lwz		r0,LR_SAVE+BLOCK_FSIZE(r1)	/* recover return addr */
397	mtlr	r0							/* return address in the link reg */
398	addi    r1,r1,BLOCK_FSIZE			/* free the stack frame */
399	blr									/* return */
400
401#if 0
402.if 0
403#endif
404}
405#if 0
406.endif
407#endif
408
409
410
411/* Start a single argument thread using parameters preloaded in the stack
412 * during thread initialization (see comments on stack initialization in the
413 * heather file).
414 *
415 * Executes:
416 *
417 *    only(u, t, userf);
418 */
419#if 0
420.if 0
421#endif
422void qt_start(void);
423asm void qt_start(void)
424{
425#if 0
426.endif
427#endif
428
429#if 0
430qt_start:
431_qt_start:
432#endif
433        lwz     r3,PAR_0(r1)	     	/* "u" in r3 */
434        lwz     r4,PAR_1(r1)	     	/* "t" in r4 */
435        lwz     r5,PAR_2(r1)	     	/* "userf" in r5 */
436        lwz     r6,PAR_3(r1)	     	/* "only" in r6 */
437        mtlr    r6						/* "only" address in the link reg */
438/*  call only(u, t, userf) */
439        blrl                    		/* jump to "only" */
440/* error if it returns */
441        b       _qt_error
442/* dead code (some inline asm "wants" the epilog, or they genetare it) */
443        blr
444
445#if 0
446.if 0
447#endif
448}
449#if 0
450.endif
451#endif
452
453
454
455/* Start a variant argument thread using parameters preloaded in the stack
456 * during thread initialization (see comments on stack initialization in the
457 * heather file).
458 *
459 * Executes:
460 *
461 *    startup(t);
462 *    userf_return = userf(...);
463 *    cleanup(pt, userf_return);
464 *
465
466
467 ***** Stack layout on start *****
468
469
470 backchain ->           STACK BOTTOM (higher address)
471                        +==========================+
472 backchain - 4 ->       |                          |
473                        +   LOCAL VARIABLES AREA   +
474                               ..............
475                        +                          +
476                        |                          |
477                        +--------------------------+
478                        |                          |
479                        +      ALIGNMEBNT PAD      +
480                               ..............
481                        +       (if needed)        +
482                        |                          |
483                        +--------------------------+
484                        |                          | arg(n)
485                        +                          +
486                        |                          |
487                        +  VARIABLE ARGUMENT LIST  +
488                               ..............
489                        +      for userf call      +
490 SP + PAR(5) ->         |                          | arg(1)
491                        +                          +
492 SP + PAR(4) ->         |                          | arg(0)
493                        +--------------------------+
494 SP + PAR(3) ->         |                          | cleanup par
495                        +                          +
496 SP + PAR(2) ->         |                          | userf par
497                        +      PARAMETER AREA      +
498 SP + PAR(1) ->         |                          | startup par
499                        +                          +
500 SP + PAR(0) ->         |                          | t par
501                        +--------------------------+
502                        |                          |
503                        +       LINKAGE AREA       +
504 SP ->                  |                          |
505                        +==========================+
506                         STACK TOP (lower address)
507
508                             Stack grows down
509                                     |
510                                     V
511
512
513
514 ***** Stack layout before call userf *****
515
516
517 backchain ->           STACK BOTTOM (higher address)
518                        +==========================+
519 backchain - 4 ->       |                          |
520                        +   LOCAL VARIABLES AREA   +
521                               ..............
522                        +                          +
523                        |                          |
524                        +--------------------------+
525                        |                          |
526                        +      ALIGNMEBNT PAD      +
527                               ..............
528                        +       (if needed)        +
529                        |                          |
530                        +--------------------------+
531                        |                          | arg(n)
532                        +                          +
533                        |                          |
534                        +  VARIABLE ARGUMENT LIST  +
535                               ..............
536                        +      for userf call      +
537 SP + PAR(1) ->         |                          | arg(1)
538                        +                          +
539 SP + PAR(0) ->         |                          | arg(0)
540                        +--------------------------+
541                        |                          |
542                        +       LINKAGE AREA       +
543 SP ->                  |                          |
544                        +==========================+
545                         STACK TOP (lower address)
546
547                             Stack grows down
548                                     |
549                                     V
550
551
552 * To call "userf(...)", the argument list must be adiacent to the linkage
553 * area. Instead of copy the argument list, we move back the linkage area
554 * (actually, we just increase the SP and copy the backchain). "t" and
555 * "cleanup" are saved in a local variable area in order to call
556 * cleanup(pt, userf_return).
557
558*/
559
560
561#if 0
562.if 0
563#endif
564void qt_vstart(void);
565asm void qt_vstart(void)
566{
567#if 0
568.endif
569#endif
570
571#if 0
572qt_vstart:
573_qt_vstart:
574#endif
575/* NOTICE: the callee routines could save parameter registers in the caller's
576 * stack parameter area. We put "t" in PAR(0) in such a way, if startup(t)
577 * will save "t", it will be saved on the same location thus not delething
578 * any other parameter.
579 */
580
581/* since we will move back the linckage area (to make it adiacent to the
582 * parameter list), we need to save "t" and "cleanup". We have made room for
583 * this on the bottom of the stack frame. */
584
585/* save parameters in the local variable area */
586	lwz		r11,0(r1)				/* get the backchain */
587 	lwz     r3,P_T(r1)
588 	lwz     r4,P_CLEANUP(r1)
589	stw		r3,P_T_SAVE(r11)		/* save "pt" */
590	stw		r4,P_CLEANUP_SAVE(r11)	/* save "cleanup" */
591
592/* call startup(t) */
593    lwz     r5,P_STARTUP(r1)
594	mtlr    r5
595    blrl                    		/* call "startup" */
596
597/* call userf(...) */
598	lwz		r11,0(r1)				/* reload backchain (r11 is volatile) */
599	lwz		r4,P_USERF(r1)			/* load "userf"  */
600    mtlr    r4
601
602	/* first eight parameter of the variant list must be copyed in
603	 * GPR3-GPR10. There is a four places offset due to "t", "startup",
604	 * userf" and "cleanup" */
605
606	lwz		r3,PAR_4(r1)
607	lwz		r4,PAR_5(r1)
608	lwz		r5,PAR_6(r1)
609	lwz		r6,PAR_7(r1)
610	lwz		r7,PAR_8(r1)
611	lwz		r8,PAR_9(r1)
612	lwz		r9,PAR_10(r1)
613	lwz		r10,PAR_11(r1)
614
615
616	/* move the linkage area to be adiacent to the argument list */
617	stw		r11,VARGS_BKOFF(r1)		/* copy backchain */
618	addi	r1,r1,VARGS_BKOFF			/* move back the stack */
619
620	blrl							/* call "userf" */
621
622/* call qt_cleanup(void *pt, void *vuserf_return) */
623	lwz		r11,0(r1)				/* reload backchain (r11 is volatile) */
624
625	mr		r4,r3					/* push "userf" return as 2nd parameter */
626	lwz		r3,P_T_SAVE(r11)		/* reload "pt" */
627	lwz		r5,P_CLEANUP_SAVE(r11)	/* reload "cleanup" */
628	mtlr	r5
629	blrl
630	b       _qt_error
631/* dead code (some inline asm "wanst" the epilog, or they genetare it) */
632	blr
633
634#if 0
635.if 0
636#endif
637}
638#if 0
639.endif
640#endif
641
642