1/* powerpc-sys5.s -- assembly support. */
2
3/*
4 * QuickThreads -- Threads-building toolkit.
5 * Copyright (c) 1993 by David Keppel
6 *
7 * Permission to use, copy, modify and distribute this software and
8 * its documentation for any purpose and without fee is hereby
9 * granted, provided that the above copyright notice and this notice
10 * appear in all copies.  This software is provided as a
11 * proof-of-concept and for demonstration purposes; there is no
12 * representation about the suitability of this software for any
13 * purpose.
14
15
16 * PowerPC-System V thread switching module.
17 *
18 * This software is largely based on the original PowerPC-Linux porting
19 * developed by Ken Aaker <kenaaker@silverbacksystems.com>
20 *
21 * Marco Bucci <marco.bucci@inwind.it>
22 * December 2002
23 *
24 */
25
26
27/*
28 *
29 * PowerPC Register convections:
30 *
31 *  r0			volatile
32 *  r1			SP
33 *  r2			system reserved
34 *  r3-r4		volatile for parameter passing and function return
35 *  r5-r10		volatile for parameter passing
36 *  r11-r12		volatile
37 *  r13-r14		non volatile registers
38 *  f0			volatile
39 *  f1			volatile for parameter passing and function return
40 *  f2-f13		volatile for parameter passing
41 *  f14-f31		non volatile
42 *
43 *  cr2-cr4		non volatile
44 *
45 *
46 * See on the heather file for more documentation.
47 *
48 *
49 *
50 * IMPLEMENTATION NOTES
51 *
52 *
53 * 1) Condition register saving
54 * On most machines, the condition code register is caller-save.
55 * On the PPC, the condition code register is callee-save, so the
56 * thread context switch must preserve it.
57 *
58 *
59 * 2) Floating point registers saving
60 * On resuming a thread, floating point registers are or not restored just
61 * depending on which block routine suspended the thread (i.e. regardless
62 * whether "qt_block", "qt_blocki" or "qt_abort" is used to resume it).
63 * This behaviour is obtained by implementing "qt_block" by means af a nested
64 * call to "qt_blocki". As a result, the blocking of a thread always goes
65 * and returns through "qt_blocki and, if a thread was blocked by "qt_block",
66 * its execution resumes from the floating point restoring code on exit
67 * of "qt_block".
68 *
69 * Thanks to David Keppel that explained me this "simple" trick.
70 *
71 *
72 * 3) C languace code debugging
73 * The original version of this software was developed and debugged under
74 * MacOS X using the Metrowerks Code Warrior PPC integrated assembler.
75 * It could be still used with a C inline assembler by means of a suitable
76 * file to include it.
77 * In order to avoid "copy and paste" bugs, and make easyer the maintaining,
78 * I made the minimal changes, so you can find some strange code as:
79 *
80 *   #if 0
81 *   .if 0
82 *      C code here
83 *   .endif
84 *   #endif
85 *
86 * This is just to embed some C code that is needed by the Code Warrior
87 * integrated assembler.
88 *
89 *
90 * 4) Assembly constants generation
91 * Constants used in the assembly code are generated by running
92 * the C code in the sequel (commented). It uses the C macros declared in
93 * the C heather in order to guarantee that the C interface and the assebly
94 * code are "aligned". I avoided the use of an assebler preprocessor since
95 * they are not so standard and moreover using macro espressions makes the
96 * assembly debugging more difficult.
97 *
98 *
99
100
101#include <iostream>
102#include "powerpc_sys5.h"
103
104int main()
105{
106	using namespace std;
107
108	int i;
109
110	cout << ".set LR_SAVE, " << PPC_LR_SAVE << endl;
111	cout << ".set BLOCKI_FSIZE, " << QUICKTHREADS_BLOCKI_FRAME_SIZE << endl;
112	cout << ".set BLOCKI_CR_SAVE, " << QUICKTHREADS_BLOCKI_CR_SAVE << endl;
113	cout << ".set BLOCK_FSIZE, " << QUICKTHREADS_BLOCK_FRAME_SIZE << endl;
114
115	cout << endl;
116	for(i=0; i<12; i++)
117	 	cout << ".set PAR_" << i << ", " << PPC_PAR(i) << endl;
118
119	cout << endl;
120	i = 13;
121	cout << ".set GPR_SAVE_" << i << ", " << QUICKTHREADS_BLOCKI_GPR_SAVE(i) << endl;
122
123	cout << endl;
124	for(i=31; i>13; i--)
125	 	cout << ".set FPR_SAVE_" << i << ", " << QUICKTHREADS_BLOCK_FPR_SAVE(i) << endl;
126
127	cout << endl;
128	cout << ".set VARGS_BKOFF, " << QUICKTHREADS_VARGS_BKOFF << endl;
129
130
131	cout << endl << endl << endl;
132
133	for(i=31; i>13; i--)
134	 	cout << "\tstfd\tf" << i << ",FPR_SAVE_" << i << "(%r1)" << endl;
135
136	cout << endl;
137	for(i=31; i>13; i--)
138	 	cout << "\tlfd \tf" << i << ",FPR_SAVE_" << i << "(%r1)" << endl;
139
140	cout << endl << endl << endl;
141
142
143	return 0;
144}
145
146
147
148 *
149 *
150 *
151 */
152
153
154#if 0
155
156	.text
157	.align 4
158
159	.globl qt_block
160	.globl _qt_block
161	.globl qt_blocki
162	.globl _qt_blocki
163	.globl qt_abort
164	.globl _qt_abort
165	.globl qt_start
166	.globl _qt_start
167	.globl qt_vstart
168	.globl _qt_vstart
169
170
171.set LR_SAVE, 4
172.set BLOCKI_FSIZE, 96
173.set BLOCKI_CR_SAVE, 8  /* CR is saved into the callee's stack frame */
174.set BLOCK_FSIZE, 160
175
176.set PAR_0, 8
177.set PAR_1, 12
178.set PAR_2, 16
179.set PAR_3, 20
180.set PAR_4, 24
181.set PAR_5, 28
182.set PAR_6, 32
183.set PAR_7, 36
184.set PAR_8, 40
185.set PAR_9, 44
186.set PAR_10, 48
187.set PAR_11, 52
188
189.set GPR_SAVE_13, 20
190
191.set FPR_SAVE_31, 152
192.set FPR_SAVE_30, 144
193.set FPR_SAVE_29, 136
194.set FPR_SAVE_28, 128
195.set FPR_SAVE_27, 120
196.set FPR_SAVE_26, 112
197.set FPR_SAVE_25, 104
198.set FPR_SAVE_24, 96
199.set FPR_SAVE_23, 88
200.set FPR_SAVE_22, 80
201.set FPR_SAVE_21, 72
202.set FPR_SAVE_20, 64
203.set FPR_SAVE_19, 56
204.set FPR_SAVE_18, 48
205.set FPR_SAVE_17, 40
206.set FPR_SAVE_16, 32
207.set FPR_SAVE_15, 24
208.set FPR_SAVE_14, 16
209
210
211
212
213/* various offsets used by "qt_varg" */
214.set P_T, PAR_0
215.set P_STARTUP, PAR_1
216.set P_USERF, PAR_2
217.set P_CLEANUP, PAR_3
218		/* the offset used to move back the linkage area to be adiacent to
219		 * the variant argument list  before  calling "userf(...).
220		 * Skip "t", "startup", "userf", "cleanup" and first
221         * 8 parameters (since they are passed via registers) */
222.set VARGS_BKOFF, 48
223
224		/* location where "t" and "cleanup" are saved (with respect of
225		 * the stack frame base) */
226.set P_T_SAVE, -4
227.set P_CLEANUP_SAVE, -8
228
229#endif
230
231
232
233/* Block the current thread saving all integer non volatile registers and
234 * start a new thread.
235 */
236#if 0
237.if 0
238#endif
239void *qt_blocki (void *helper, void *a0, void *a1, void *newthread);
240asm void *qt_blocki (void *helper, void *a0, void *a1, void *newthread)
241{
242#if 0
243.endif
244#endif
245
246#if 0
247qt_blocki:
248_qt_blocki:
249#endif
250/* prolog code */
251	stwu	%r1,-BLOCKI_FSIZE(%r1)		/* allocate the stack frame */
252	mflr	%r0							/* return addr in r0 */
253	mfcr	%r11							/* CR in r11 */
254	stw		%r0,LR_SAVE+BLOCKI_FSIZE(%r1)	/* save return addr in the stack */
255	stw		%r11,BLOCKI_CR_SAVE(%r1)	/* save CR in the stack */
256	stmw	%r13,GPR_SAVE_13(%r1)	 		/* save non-volatile reg */
257
258/* call helper(qt_t *old, void *a0, void *a1) */
259	mtlr	%r3				/* "helper" addr in the link reg */
260	mr		%r3,%r1			/* current thread (i.e. the SP) in arg "old" */
261	mr		%r1,%r6         	/* swap to the new thread (i.e. to its SP) */
262	blrl								/* jump to "helper" */
263/* the "helper" return value is returned (since r3 is not changed) */
264
265/* epilog code: return to the new thread's "qt_blocki" caller */
266	lmw     %r13,GPR_SAVE_13(%r1)			/* restore non-volatile reg */
267	lwz		%r0,LR_SAVE+BLOCKI_FSIZE(%r1)	/* recover return addr */
268	lwz		%r11,BLOCKI_CR_SAVE(%r1)	/* recover CR */
269	mtlr	%r0							/* return address in the link reg */
270	mtcr	%r11							/* restore CR */
271	addi    %r1,%r1,BLOCKI_FSIZE			/* free the stack frame */
272	blr									/* return */
273
274#if 0
275.if 0
276#endif
277}
278#if 0
279.endif
280#endif
281
282
283
284/* Abort the current thread and start a new thread.
285 */
286#if 0
287.if 0
288#endif
289void qt_abort (void *helper, void *a0, void *a1, void *newthread);
290asm void qt_abort (void *helper, void *a0, void *a1, void *newthread)
291{
292#if 0
293.endif
294#endif
295
296#if 0
297qt_abort:
298_qt_abort:
299#endif
300/* prolog code */
301/* there is no prolog. It will never come back */
302
303/* call helper(qt_t *old, void *a0, void *a1) */
304	mtlr	%r3					/* "helper" addr in the link reg */
305	mr		%r1,%r6         		/* swap to the new thread (i.e. to its SP) */
306/* we don't need to set "old", we can pass just garbage. Actually, since r3
307 is not changed, "old" is set to "helper" (don't care) */
308	blrl								/* call "helper" */
309/* the "helper" return value is returned (since r3 is not changed) */
310
311/* epilog code: return to the new thread's "qt_blocki" caller */
312	lmw     %r13,GPR_SAVE_13(%r1)			/* restore non-volatile reg */
313	lwz		%r0,LR_SAVE+BLOCKI_FSIZE(%r1)	/* recover return addr */
314	lwz		%r11,BLOCKI_CR_SAVE(%r1)	/* recover CR */
315	mtlr	%r0							/* return address in the link reg */
316	mtcr	%r11							/* restore CR */
317	addi    %r1,%r1,BLOCKI_FSIZE			/* free the stack frame */
318	blr									/* return */
319
320#if 0
321.if 0
322#endif
323}
324#if 0
325.endif
326#endif
327
328
329
330/* Block the current thread saving all non volatile registers and start
331 * a new thread.
332 */
333#if 0
334.if 0
335#endif
336void *qt_block (void *helper, void *a0, void *a1, void *newthread);
337asm void *qt_block (void *helper, void *a0, void *a1, void *newthread)
338{
339#if 0
340.endif
341#endif
342
343# if 0
344qt_block:
345_qt_block:
346#endif
347/* prolog code */
348	stwu	%r1,-BLOCK_FSIZE(%r1)			/* allocate the stack frame */
349	mflr	%r0							/* return addr in r0 */
350	stw		%r0,LR_SAVE+BLOCK_FSIZE(%r1)	/* save return addr in the stack */
351
352/* save non-volatile fp reg */
353    stfd    %f31,FPR_SAVE_31(%r1)
354    stfd    %f30,FPR_SAVE_30(%r1)
355    stfd    %f29,FPR_SAVE_29(%r1)
356    stfd    %f28,FPR_SAVE_28(%r1)
357    stfd    %f27,FPR_SAVE_27(%r1)
358    stfd    %f26,FPR_SAVE_26(%r1)
359    stfd    %f25,FPR_SAVE_25(%r1)
360    stfd    %f24,FPR_SAVE_24(%r1)
361    stfd    %f23,FPR_SAVE_23(%r1)
362    stfd    %f22,FPR_SAVE_22(%r1)
363    stfd    %f21,FPR_SAVE_21(%r1)
364    stfd    %f20,FPR_SAVE_20(%r1)
365    stfd    %f19,FPR_SAVE_19(%r1)
366    stfd    %f18,FPR_SAVE_18(%r1)
367    stfd    %f17,FPR_SAVE_17(%r1)
368    stfd    %f16,FPR_SAVE_16(%r1)
369    stfd    %f15,FPR_SAVE_15(%r1)
370    stfd    %f14,FPR_SAVE_14(%r1)
371/* block the thread */
372	bl		qt_blocki
373/* the thread is going to be resumed */
374/* restore non-volatile fp reg */
375    lfd     %f31,FPR_SAVE_31(%r1)
376    lfd     %f30,FPR_SAVE_30(%r1)
377    lfd     %f29,FPR_SAVE_29(%r1)
378    lfd     %f28,FPR_SAVE_28(%r1)
379    lfd     %f27,FPR_SAVE_27(%r1)
380    lfd     %f26,FPR_SAVE_26(%r1)
381    lfd     %f25,FPR_SAVE_25(%r1)
382    lfd     %f24,FPR_SAVE_24(%r1)
383    lfd     %f23,FPR_SAVE_23(%r1)
384    lfd     %f22,FPR_SAVE_22(%r1)
385    lfd     %f21,FPR_SAVE_21(%r1)
386    lfd     %f20,FPR_SAVE_20(%r1)
387    lfd     %f19,FPR_SAVE_19(%r1)
388    lfd     %f18,FPR_SAVE_18(%r1)
389    lfd     %f17,FPR_SAVE_17(%r1)
390    lfd     %f16,FPR_SAVE_16(%r1)
391    lfd     %f15,FPR_SAVE_15(%r1)
392    lfd     %f14,FPR_SAVE_14(%r1)
393
394	lwz		%r0,LR_SAVE+BLOCK_FSIZE(%r1)	/* recover return addr */
395	mtlr	%r0							/* return address in the link reg */
396	addi    %r1,%r1,BLOCK_FSIZE			/* free the stack frame */
397	blr									/* return */
398
399#if 0
400.if 0
401#endif
402}
403#if 0
404.endif
405#endif
406
407
408
409/* Start a single argument thread using parameters preloaded in the stack
410 * during thread initialization (see comments on stack initialization in the
411 * heather file).
412 *
413 * Executes:
414 *
415 *    only(u, t, userf);
416 */
417#if 0
418.if 0
419#endif
420void qt_start(void);
421asm void qt_start(void)
422{
423#if 0
424.endif
425#endif
426
427#if 0
428qt_start:
429_qt_start:
430#endif
431        lwz     %r3,PAR_0(%r1)	     	/* "u" in r3 */
432        lwz     %r4,PAR_1(%r1)	     	/* "t" in r4 */
433        lwz     %r5,PAR_2(%r1)	     	/* "userf" in r5 */
434        lwz     %r6,PAR_3(%r1)	     	/* "only" in r6 */
435        mtlr    %r6						/* "only" address in the link reg */
436/*  call only(u, t, userf) */
437        blrl                    		/* jump to "only" */
438/* error if it returns */
439        b       qt_error
440/* dead code (some inline asm "wants" the epilog, or they genetare it) */
441        blr
442
443#if 0
444.if 0
445#endif
446}
447#if 0
448.endif
449#endif
450
451
452
453/* Start a variant argument thread using parameters preloaded in the stack
454 * during thread initialization (see comments on stack initialization in the
455 * heather file).
456 *
457 * Executes:
458 *
459 *    startup(t);
460 *    userf_return = userf(...);
461 *    cleanup(pt, userf_return);
462 *
463
464
465 ***** Stack layout on start *****
466
467
468 backchain ->           STACK BOTTOM (higher address)
469                        +==========================+
470 backchain - 4 ->       |                          |
471                        +   LOCAL VARIABLES AREA   +
472                               ..............
473                        +                          +
474                        |                          |
475                        +--------------------------+
476                        |                          |
477                        +      ALIGNMEBNT PAD      +
478                               ..............
479                        +       (if needed)        +
480                        |                          |
481                        +--------------------------+
482                        |                          | arg(n)
483                        +                          +
484                        |                          |
485                        +  VARIABLE ARGUMENT LIST  +
486                               ..............
487                        +      for userf call      +
488 SP + PAR(5) ->         |                          | arg(1)
489                        +                          +
490 SP + PAR(4) ->         |                          | arg(0)
491                        +--------------------------+
492 SP + PAR(3) ->         |                          | cleanup par
493                        +                          +
494 SP + PAR(2) ->         |                          | userf par
495                        +      PARAMETER AREA      +
496 SP + PAR(1) ->         |                          | startup par
497                        +                          +
498 SP + PAR(0) ->         |                          | t par
499                        +--------------------------+
500                        |                          |
501                        +       LINKAGE AREA       +
502 SP ->                  |                          |
503                        +==========================+
504                         STACK TOP (lower address)
505
506                             Stack grows down
507                                     |
508                                     V
509
510
511
512 ***** Stack layout before call userf *****
513
514
515 backchain ->           STACK BOTTOM (higher address)
516                        +==========================+
517 backchain - 4 ->       |                          |
518                        +   LOCAL VARIABLES AREA   +
519                               ..............
520                        +                          +
521                        |                          |
522                        +--------------------------+
523                        |                          |
524                        +      ALIGNMEBNT PAD      +
525                               ..............
526                        +       (if needed)        +
527                        |                          |
528                        +--------------------------+
529                        |                          | arg(n)
530                        +                          +
531                        |                          |
532                        +  VARIABLE ARGUMENT LIST  +
533                               ..............
534                        +      for userf call      +
535 SP + PAR(1) ->         |                          | arg(1)
536                        +                          +
537 SP + PAR(0) ->         |                          | arg(0)
538                        +--------------------------+
539                        |                          |
540                        +       LINKAGE AREA       +
541 SP ->                  |                          |
542                        +==========================+
543                         STACK TOP (lower address)
544
545                             Stack grows down
546                                     |
547                                     V
548
549
550 * To call "userf(...)", the argument list must be adiacent to the linkage
551 * area. Instead of copy the argument list, we move back the linkage area
552 * (actually, we just increase the SP and copy the backchain). "t" and
553 * "cleanup" are saved in a local variable area in order to call
554 * cleanup(pt, userf_return).
555
556*/
557
558
559#if 0
560.if 0
561#endif
562void qt_vstart(void);
563asm void qt_vstart(void)
564{
565#if 0
566.endif
567#endif
568
569#if 0
570qt_vstart:
571_qt_vstart:
572#endif
573/* NOTICE: the callee routines could save parameter registers in the caller's
574 * stack parameter area. We put "t" in PAR(0) in such a way, if startup(t)
575 * will save "t", it will be saved on the same location thus not delething
576 * any other parameter.
577 */
578
579/* since we will move back the linckage area (to make it adiacent to the
580 * parameter list), we need to save "t" and "cleanup". We have made room for
581 * this on the bottom of the stack frame. */
582
583/* save parameters in the local variable area */
584	lwz		%r11,0(%r1)				/* get the backchain */
585 	lwz     %r3,P_T(%r1)
586 	lwz     %r4,P_CLEANUP(%r1)
587	stw		%r3,P_T_SAVE(%r11)		/* save "pt" */
588	stw		%r4,P_CLEANUP_SAVE(%r11)	/* save "cleanup" */
589
590/* call startup(t) */
591    lwz     %r5,P_STARTUP(%r1)
592	mtlr    %r5
593    blrl                    		/* call "startup" */
594
595/* call userf(...) */
596	lwz		%r11,0(%r1)				/* reload backchain (r11 is volatile) */
597	lwz		%r4,P_USERF(%r1)			/* load "userf"  */
598    mtlr    %r4
599
600	/* first eight parameter of the variant list must be copyed in
601	 * GPR3-GPR10. There is a four places offset due to "t", "startup",
602	 * userf" and "cleanup" */
603
604	lwz		%r3,PAR_4(%r1)
605	lwz		%r4,PAR_5(%r1)
606	lwz		%r5,PAR_6(%r1)
607	lwz		%r6,PAR_7(%r1)
608	lwz		%r7,PAR_8(%r1)
609	lwz		%r8,PAR_9(%r1)
610	lwz		%r9,PAR_10(%r1)
611	lwz		%r10,PAR_11(%r1)
612
613
614	/* move the linkage area to be adiacent to the argument list */
615	stw		%r11,VARGS_BKOFF(%r1)		/* copy backchain */
616	addi	%r1,%r1,VARGS_BKOFF			/* move back the stack */
617
618	blrl							/* call "userf" */
619
620/* call qt_cleanup(void *pt, void *vuserf_return) */
621	lwz		%r11,0(%r1)				/* reload backchain (r11 is volatile) */
622
623	mr		%r4,%r3					/* push "userf" return as 2nd parameter */
624	lwz		%r3,P_T_SAVE(%r11)		/* reload "pt" */
625	lwz		%r5,P_CLEANUP_SAVE(%r11)	/* reload "cleanup" */
626	mtlr	%r5
627	blrl
628	b       qt_error
629/* dead code (some inline asm "wants" the epilog, or they genetare it) */
630	blr
631
632#if 0
633.if 0
634#endif
635}
636#if 0
637.endif
638#endif
639
640