1/*
2 * QuickThreads -- Threads-building toolkit.
3 * Copyright (c) 1993 by David Keppel
4 *
5 * Permission to use, copy, modify and distribute this software and
6 * its documentation for any purpose and without fee is hereby
7 * granted, provided that the above copyright notice and this notice
8 * appear in all copies.  This software is provided as a
9 * proof-of-concept and for demonstration purposes; there is no
10 * representation about the suitability of this software for any
11 * purpose.
12 */
13
14	.file	"ksr1.s"
15	.def	.debug;	.endef
16
17	.align 128
18	.globl qt_blocki
19	.globl qt_blocki$TXT
20	.globl qt_block
21	.globl qt_block$TXT
22	.globl qt_start$TXT
23        .globl qt_start
24	.globl qt_abort$TXT
25	.globl qt_abort
26	.globl qt_vstart
27	.globl qt_vstart$TXT
28
29#
30# KSR convention: on procedure calls, load both the procedure address
31# and a pointer to a constant block.  The address of function `f' is
32# `f$TXT', and the constant block address is `f'.  The constant block
33# has several reserved values:
34#
35#	8 bytes fpu register save mask
36#	4 bytes ipu register save mask
37#	4 bytes ceu register save mask
38#   f:  f$TXT
39#	... whatever you want ... (not quite...read on)
40#
41# Note, by the way, that a pointer to a function is passed as a
42# pointer to the constant area, and the constant area has the text
43# address.
44#
45
46#
47# Procedures that do not return structures prefix their code with
48#
49# proc$TXT:
50#   finop; cxnop
51#   finop; cxnop
52#   <proc code>
53#
54# Calls to those procedures branch to a 16 byte offset (4 instrs) in
55# to the procedure to skip those instructions.
56#
57# Procedures that return structures use a different code prefix:
58#
59# proc$TXT:
60#   finop; beq.qt %rc, %rc, 24		# return value entry
61#   finop; cxnop
62#   finop; movi8 0, %rc			# no return value entry
63#   <proc code>
64#
65# Calls that want the returned structure branch directly to the
66# procedure address.  Callers that don't want (or aren't expecting) a
67# return value branche 16 bytes in to the procedure, which will zero
68# %rc, telling the called procedure not to return a structure.
69#
70
71#
72# On entry:
73#   %i2 -- control block of helper function to run
74#          (dereference to get helper)
75#   %i3 -- a1
76#   %i4 -- a2
77#   %i5 -- sp of new to run
78#
79
80        .data
81	.half 0x0, 0x0, 0x7ffff000, 0x7fff8000
82qt_blocki:
83qt_abort:
84	.word qt_blocki$TXT
85	.word qt_restore$TXT
86
87	.text
88qt_abort$TXT:
89qt_blocki$TXT:
90	finop			; cxnop			# entry prefix
91	finop			; cxnop			# entry prefix
92	add8.ntr 75,%i31,%i31	; movi8 512,%c5		# ICR; stk adjust
93	finop			; ssub8.ntr 0,%sp,%c5,%sp
94	finop			; st8 %fp,504(%sp)	# Save caller's fp
95	finop			; st8 %cp,496(%sp)	# Save caller's cp
96	finop			; ld8 8(%c10),%c5	# ld qt_restore$TXT
97	finop			; st8 %c14,0(%sp)	# Save special ret addr
98	finop			; mov8_8 %c10, %cp	# Our cp
99	finop			; sadd8.ntr 0,%sp,%c5,%fp # Our frame ptr
100	finop			; st8 %c5,8(%sp)	# st qt_restore$TXT
101#
102# CEU registers %c15-%c24, %c26-%c30 (%c14 we restore later)
103#
104	finop			; st8  %c15,456(%sp)
105	finop			; st8  %c16,448(%sp)
106	finop			; st8  %c17,440(%sp)
107	finop			; st8  %c18,432(%sp)
108	finop			; st8  %c19,424(%sp)
109	finop			; st8  %c20,416(%sp)
110	finop			; st8  %c21,408(%sp)
111	finop			; st8  %c22,400(%sp)
112	finop			; st8  %c23,392(%sp)
113	finop			; st8  %c24,384(%sp)
114#
115# %c25 is the Enclosing Frame Pointer (EFP) -- since C doesn't
116# use nested procedures, we ignore it (leaving a gap, though)
117#
118	finop			; st8 %c26,368(%sp)
119	finop			; st8 %c27,360(%sp)
120	finop			; st8 %c28,352(%sp)
121	finop			; st8 %c29,344(%sp)
122	finop			; st8 %c30,336(%sp)
123#
124# IPU registers %i12-%i30
125#
126	finop			; st8 %i12,328(%sp)
127	finop			; st8 %i13,320(%sp)
128	finop			; st8 %i14,312(%sp)
129	finop			; st8 %i15,304(%sp)
130# (gap to get alignment for st64)
131# -- Doesn't work on version 1.1.3 of the OS
132#	finop			; st64 %i16,256(%sp)
133
134	finop			; st8 %i16,256(%sp)
135	finop			; st8 %i17,248(%sp)
136	finop			; st8 %i18,240(%sp)
137	finop			; st8 %i19,232(%sp)
138	finop			; st8 %i20,224(%sp)
139	finop			; st8 %i21,216(%sp)
140	finop			; st8 %i22,208(%sp)
141	finop			; st8 %i23,200(%sp)
142	finop			; st8 %i24,192(%sp)
143	finop			; st8 %i25,184(%sp)
144	finop			; st8 %i26,176(%sp)
145	finop			; st8 %i27,168(%sp)
146	finop			; st8 %i28,160(%sp)
147	finop			; st8 %i29,152(%sp)
148	finop			; st8 %i30,144(%sp)
149#
150# FPU already saved, or saving not necessary
151#
152
153#
154# Switch to the stack passed in as fourth argument to the block
155# routine (%i5) and call the helper routine passed in as the first
156# argument (%i2).  Note that the address of the helper's constant
157# block is passed in, so we must derefence it to get the helper's text
158# address.
159#
160	finop			; movb8_8 %i2,%c10	# helper's ConstBlock
161	finop			; cxnop			# Delay slot, fill w/
162	finop			; cxnop			# .. 2 st8 from above
163	finop			; ld8 0(%c10),%c4	# load addr of helper
164	finop			; movb8_8 %sp, %i2	# 1st arg to helper
165							# is this stack; other
166							# args remain in regs
167	finop			; movb8_8 %i5,%sp	# switch stacks
168	finop			; jsr %c14,16(%c4)	# call helper
169	movi8 3, %i0		; movi8 0,%c8		# nargs brain dmg
170	finop			; cxnop
171	finop			; cxnop
172#
173# Here is where behavior differs for threads being restored and threads
174# being started.  Blocked threads have a pointer to qt_restore$TXT on
175# the top of their stacks; manufactured stacks have a pointer to qt_start$TXT
176# on the top of their stacks.  With this setup, starting threads
177# skip the (unecessary) restore operations.
178#
179# We jump to an offset of 16 to either (1) skip past the two noop pairs
180# at the start of qt_start$TXT, or (2) skip past the two noop pairs
181# after qt_restore$TXT.
182#
183	finop			; ld8 8(%sp),%c4
184	finop			; cxnop
185	finop			; cxnop
186	finop			; jmp 16(%c4)
187qt_restore$TXT:
188	finop			; cxnop
189	finop			; cxnop
190#
191# Point of Restore:
192#
193# The helper funtion will return here.  Any result it has placed in
194# a return register (most likely %i0) will not get overwritten below
195# and will consequently be the return value of the blocking routine.
196#
197
198#
199# CEU registers %c15-%c24, %c26-%c30 (%c14 we restore later)
200#
201	finop			; ld8  456(%sp),%c15
202	finop			; ld8  448(%sp),%c16
203	finop			; ld8  440(%sp),%c17
204	finop			; ld8  432(%sp),%c18
205	finop			; ld8  424(%sp),%c19
206	finop			; ld8  416(%sp),%c20
207	finop			; ld8  408(%sp),%c21
208	finop			; ld8  400(%sp),%c22
209	finop			; ld8  392(%sp),%c23
210	finop			; ld8  384(%sp),%c24
211#
212# %c25 is the Enclosing Frame Pointer (EFP) -- since C doesn't
213# use nested procedures, we ignore it (leaving a gap, though)
214#
215	finop			; ld8 368(%sp),%c26
216	finop			; ld8 360(%sp),%c27
217	finop			; ld8 352(%sp),%c28
218	finop			; ld8 344(%sp),%c29
219	finop			; ld8 336(%sp),%c30
220#
221# IPU registers %i12-%i30
222#
223	finop			; ld8 328(%sp),%i12
224	finop			; ld8 320(%sp),%i13
225	finop			; ld8 312(%sp),%i14
226	finop			; ld8 304(%sp),%i15
227# (gap to get alignment for ld64)
228# -- Doesn't work on version 1.1.3 of the OS
229#	finop			; ld64 256(%sp),%i16
230
231	finop			; ld8 256(%sp),%i16
232	finop			; ld8 248(%sp),%i17
233	finop			; ld8 240(%sp),%i18
234	finop			; ld8 232(%sp),%i19
235	finop			; ld8 224(%sp),%i20
236	finop			; ld8 216(%sp),%i21
237	finop			; ld8 208(%sp),%i22
238	finop			; ld8 200(%sp),%i23
239	finop			; ld8 192(%sp),%i24
240	finop			; ld8 184(%sp),%i25
241	finop			; ld8 176(%sp),%i26
242	finop			; ld8 168(%sp),%i27
243	finop			; ld8 160(%sp),%i28
244	finop			; ld8 152(%sp),%i29
245	finop			; ld8 144(%sp),%i30
246
247#
248# FPU registers don't need to be loaded, or will be loaded by an
249# enclosing scope (e.g., if this is called by qt_block).
250#
251
252#
253# Load the special registers.  We don't load the stack ptr because
254# the new stack is passed in as an argument, we don't load the EFP
255# because we don't use it, and we load the return address specially
256# off the top of the stack.
257#
258	finop			; ld8 0(%sp),%c14	# return addr
259	finop			; ld8 496(%sp),%cp
260	finop			; ld8 504(%sp),%fp
261
262	finop			; jmp 32(%c14)		# jump back to thread
263	finop			; movi8 512,%c5		# stack adjust
264	finop			; sadd8.ntr 0,%sp,%c5,%sp
265
266        .data
267	.half 0x0, 0x0, 0x7ffff000, 0x7fff8000
268qt_block:
269	.word	qt_block$TXT
270	.word	qt_error
271	.word	qt_error$TXT
272	.word	qt_blocki
273#
274# Handle saving and restoring the FPU regs, relying on qt_blocki
275# to save and restore the remaining registers.
276#
277        .text
278qt_block$TXT:
279	finop			; cxnop			# entry prefix
280	finop			; cxnop			# entry prefix
281
282	add8.ntr 29,%i31,%i31	; movi8 512,%c5		# ICR; stk adjust
283	finop			; ssub8.ntr 0,%sp,%c5,%sp
284	finop			; st8 %fp,504(%sp)	# Save caller's fp
285	finop			; st8 %cp,496(%sp)	# Save caller's cp
286	finop			; st8 %c14,488(%sp)	# store ret addr
287	finop			; sadd8.ntr 0,%sp,%c5,%fp # Our frame ptr
288	finop			; mov8_8 %c10, %cp	# Our cp
289
290#
291# Store 8 registers at once...destination must be a multiple of 64
292#
293	finop			; st64 %f16,384(%sp)
294	finop			; st64 %f24,320(%sp)
295	finop			; st64 %f32,256(%sp)
296	finop			; st64 %f40,192(%sp)
297	finop			; st64 %f48,128(%sp)
298	finop			; st64 %f56,64(%sp)
299
300#
301# Call the integer blocking routine, passing the arguments passed to us
302#
303	finop			; ld8 24(%cp), %c10
304	finop			; cxnop
305	finop			; jsr %c14, qt_blocki$TXT
306	finop			; cxnop
307	finop			; cxnop
308	movi8 4,%i0		; movi8 0,%c8		# nargs brain dmg
309
310#
311# Load 8 registers at once...source must be a multiple of 64
312#
313	finop			; ld64 64(%sp),%f56
314	finop			; ld64 128(%sp),%f48
315	finop			; ld64 192(%sp),%f40
316	finop			; ld64 256(%sp),%f32
317	finop			; ld64 320(%sp),%f24
318	finop			; ld64 384(%sp),%f16
319
320	finop			; ld8 488(%sp),%c14
321	finop			; ld8 496(%sp),%cp
322	finop			; ld8 504(%sp),%fp
323	finop			; jmp 32(%c14)		# jump back to thread
324	finop			; movi8 512,%c5		# stack adjust
325	finop			; sadd8.ntr 0,%sp,%c5,%sp
326
327
328        .data
329	.half 0x0, 0x0, 0x7ffff000, 0x7fff8000
330qt_start:
331	.word qt_start$TXT
332#
333# A new thread is set up to "appear" as if it were executing code at
334# the beginning of qt_start and then it called a blocking routine
335# (qt_blocki).  So when a new thread starts to run, it gets unblocked
336# by the code above and "returns" to `qt_start$TXT' in the
337# restore step of the switch.  Blocked threads jump to 16(qt_restore$TXT),
338# and starting threads jump to 16(qt_start$TXT).
339#
340        .text
341qt_start$TXT:
342	finop			; cxnop			#
343	finop			; cxnop			#
344	finop			; ld8 40(%sp),%c10	# `only' constant block
345	finop			; ld8 32(%sp),%i4	# `userf' arg.
346	finop			; ld8 24(%sp),%i3	# `t' arg.
347	finop			; ld8 0(%c10),%c4	# `only' text location
348	finop			; ld8 16(%sp),%i2	# `u' arg.
349	finop			; cxnop
350	finop			; jsr %c14,16(%c4)	# call `only'
351#
352# Pop the frame used to store the thread's initial data
353#
354	finop			; sadd8.ntr 0,%sp,128,%sp
355	finop			; cxnop
356	movi8 2,%i0		; movi8 0,%c8		# nargs brain dmg
357#
358# If we ever return, it's an error.
359#
360	finop			; jmp qt_error$TXT
361	finop			; cxnop
362	finop			; cxnop
363	movi8 0,%i0		; movi8 0,%c8		# nargs brain dmg
364
365
366#
367# This stuff is broken
368#
369        .data
370	.half 0x0, 0x0, 0x7ffff000, 0x7fff8000
371qt_vstart:
372	.word qt_vstart$TXT
373
374	.text
375qt_vstart$TXT:
376	finop			; cxnop			# entry prefix
377	finop			; cxnop			# entry prefix
378	finop			; cxnop
379	finop			; cxnop
380	add8.ntr 11,%i31,%i31	; movi8 512,%c5
381	finop			; ssub8.ntr 0,%sp,%c5,%sp	# fix stack
382	finop			; ld8 8(%sp),%i2	# load `t' as arg to
383	finop			; cxnop			# `startup'
384	finop			; cxnop
385	finop			; ld8 16(%sp),%c10	# `startup' const block
386	finop			; cxnop
387	finop			; cxnop
388	finop			; ld8 0(%c10),%c4	# `startup' text loc.
389	finop			; cxnop
390	finop			; cxnop
391	finop			; jsr %c14,16(%c4)	# call `startup'
392	finop			; cxnop
393	finop			; cxnop
394	movi8 1, %i0		; movi8 0,%c8		# nargs brain dmg
395#
396#	finop			; sadd 0,%sp,128,%sp	# alter stack
397#
398	finop			; ld8 8(%sp),%i2	# load `t' as arg to
399	finop			; ld8 8(%sp),%i2	# load `t' as arg to
400	finop			; ld8 8(%sp),%i2	# load `t' as arg to
401	finop			; ld8 8(%sp),%i2	# load `t' as arg to
402
403	finop			; ld8 32(%sp),%c10	# `only' constant block
404	finop			; ld8 8(%sp),%i2	# `u' arg.
405	finop			; ld8 16(%sp),%i3	# `t' arg.
406	finop			; ld8 0(%c10),%c4	# `only' text location
407	finop			; ld8 24(%sp),%i4	# `userf' arg.
408	finop			; cxnop
409	finop			; jsr %c4,16(%c4)	# call `only'
410	finop			; cxnop
411	finop			; cxnop
412#
413# If the callee ever calls `nargs', the following instruction (pair)
414# will be executed.  However, we don't know when we compile this code
415# how many args are being passed.  So we give our best guess: 0.
416#
417	movi8 0,%i0		; movi8 0,%c8		# nargs brain dmg
418#
419# If we ever return, it's an error.
420#
421	finop			; jmp qt_error$TXT
422	finop			; cxnop
423	finop			; cxnop
424	movi8 0,%i0		; movi8 0,%c8		# nargs brain dmg
425