1/* 2 * QuickThreads -- Threads-building toolkit. 3 * Copyright (c) 1993 by David Keppel 4 * 5 * Permission to use, copy, modify and distribute this software and 6 * its documentation for any purpose and without fee is hereby 7 * granted, provided that the above copyright notice and this notice 8 * appear in all copies. This software is provided as a 9 * proof-of-concept and for demonstration purposes; there is no 10 * representation about the suitability of this software for any 11 * purpose. 12 */ 13 14 .file "ksr1.s" 15 .def .debug; .endef 16 17 .align 128 18 .globl qt_blocki 19 .globl qt_blocki$TXT 20 .globl qt_block 21 .globl qt_block$TXT 22 .globl qt_start$TXT 23 .globl qt_start 24 .globl qt_abort$TXT 25 .globl qt_abort 26 .globl qt_vstart 27 .globl qt_vstart$TXT 28 29# 30# KSR convention: on procedure calls, load both the procedure address 31# and a pointer to a constant block. The address of function `f' is 32# `f$TXT', and the constant block address is `f'. The constant block 33# has several reserved values: 34# 35# 8 bytes fpu register save mask 36# 4 bytes ipu register save mask 37# 4 bytes ceu register save mask 38# f: f$TXT 39# ... whatever you want ... (not quite...read on) 40# 41# Note, by the way, that a pointer to a function is passed as a 42# pointer to the constant area, and the constant area has the text 43# address. 44# 45 46# 47# Procedures that do not return structures prefix their code with 48# 49# proc$TXT: 50# finop; cxnop 51# finop; cxnop 52# <proc code> 53# 54# Calls to those procedures branch to a 16 byte offset (4 instrs) in 55# to the procedure to skip those instructions. 56# 57# Procedures that return structures use a different code prefix: 58# 59# proc$TXT: 60# finop; beq.qt %rc, %rc, 24 # return value entry 61# finop; cxnop 62# finop; movi8 0, %rc # no return value entry 63# <proc code> 64# 65# Calls that want the returned structure branch directly to the 66# procedure address. Callers that don't want (or aren't expecting) a 67# return value branche 16 bytes in to the procedure, which will zero 68# %rc, telling the called procedure not to return a structure. 69# 70 71# 72# On entry: 73# %i2 -- control block of helper function to run 74# (dereference to get helper) 75# %i3 -- a1 76# %i4 -- a2 77# %i5 -- sp of new to run 78# 79 80 .data 81 .half 0x0, 0x0, 0x7ffff000, 0x7fff8000 82qt_blocki: 83qt_abort: 84 .word qt_blocki$TXT 85 .word qt_restore$TXT 86 87 .text 88qt_abort$TXT: 89qt_blocki$TXT: 90 finop ; cxnop # entry prefix 91 finop ; cxnop # entry prefix 92 add8.ntr 75,%i31,%i31 ; movi8 512,%c5 # ICR; stk adjust 93 finop ; ssub8.ntr 0,%sp,%c5,%sp 94 finop ; st8 %fp,504(%sp) # Save caller's fp 95 finop ; st8 %cp,496(%sp) # Save caller's cp 96 finop ; ld8 8(%c10),%c5 # ld qt_restore$TXT 97 finop ; st8 %c14,0(%sp) # Save special ret addr 98 finop ; mov8_8 %c10, %cp # Our cp 99 finop ; sadd8.ntr 0,%sp,%c5,%fp # Our frame ptr 100 finop ; st8 %c5,8(%sp) # st qt_restore$TXT 101# 102# CEU registers %c15-%c24, %c26-%c30 (%c14 we restore later) 103# 104 finop ; st8 %c15,456(%sp) 105 finop ; st8 %c16,448(%sp) 106 finop ; st8 %c17,440(%sp) 107 finop ; st8 %c18,432(%sp) 108 finop ; st8 %c19,424(%sp) 109 finop ; st8 %c20,416(%sp) 110 finop ; st8 %c21,408(%sp) 111 finop ; st8 %c22,400(%sp) 112 finop ; st8 %c23,392(%sp) 113 finop ; st8 %c24,384(%sp) 114# 115# %c25 is the Enclosing Frame Pointer (EFP) -- since C doesn't 116# use nested procedures, we ignore it (leaving a gap, though) 117# 118 finop ; st8 %c26,368(%sp) 119 finop ; st8 %c27,360(%sp) 120 finop ; st8 %c28,352(%sp) 121 finop ; st8 %c29,344(%sp) 122 finop ; st8 %c30,336(%sp) 123# 124# IPU registers %i12-%i30 125# 126 finop ; st8 %i12,328(%sp) 127 finop ; st8 %i13,320(%sp) 128 finop ; st8 %i14,312(%sp) 129 finop ; st8 %i15,304(%sp) 130# (gap to get alignment for st64) 131# -- Doesn't work on version 1.1.3 of the OS 132# finop ; st64 %i16,256(%sp) 133 134 finop ; st8 %i16,256(%sp) 135 finop ; st8 %i17,248(%sp) 136 finop ; st8 %i18,240(%sp) 137 finop ; st8 %i19,232(%sp) 138 finop ; st8 %i20,224(%sp) 139 finop ; st8 %i21,216(%sp) 140 finop ; st8 %i22,208(%sp) 141 finop ; st8 %i23,200(%sp) 142 finop ; st8 %i24,192(%sp) 143 finop ; st8 %i25,184(%sp) 144 finop ; st8 %i26,176(%sp) 145 finop ; st8 %i27,168(%sp) 146 finop ; st8 %i28,160(%sp) 147 finop ; st8 %i29,152(%sp) 148 finop ; st8 %i30,144(%sp) 149# 150# FPU already saved, or saving not necessary 151# 152 153# 154# Switch to the stack passed in as fourth argument to the block 155# routine (%i5) and call the helper routine passed in as the first 156# argument (%i2). Note that the address of the helper's constant 157# block is passed in, so we must derefence it to get the helper's text 158# address. 159# 160 finop ; movb8_8 %i2,%c10 # helper's ConstBlock 161 finop ; cxnop # Delay slot, fill w/ 162 finop ; cxnop # .. 2 st8 from above 163 finop ; ld8 0(%c10),%c4 # load addr of helper 164 finop ; movb8_8 %sp, %i2 # 1st arg to helper 165 # is this stack; other 166 # args remain in regs 167 finop ; movb8_8 %i5,%sp # switch stacks 168 finop ; jsr %c14,16(%c4) # call helper 169 movi8 3, %i0 ; movi8 0,%c8 # nargs brain dmg 170 finop ; cxnop 171 finop ; cxnop 172# 173# Here is where behavior differs for threads being restored and threads 174# being started. Blocked threads have a pointer to qt_restore$TXT on 175# the top of their stacks; manufactured stacks have a pointer to qt_start$TXT 176# on the top of their stacks. With this setup, starting threads 177# skip the (unecessary) restore operations. 178# 179# We jump to an offset of 16 to either (1) skip past the two noop pairs 180# at the start of qt_start$TXT, or (2) skip past the two noop pairs 181# after qt_restore$TXT. 182# 183 finop ; ld8 8(%sp),%c4 184 finop ; cxnop 185 finop ; cxnop 186 finop ; jmp 16(%c4) 187qt_restore$TXT: 188 finop ; cxnop 189 finop ; cxnop 190# 191# Point of Restore: 192# 193# The helper funtion will return here. Any result it has placed in 194# a return register (most likely %i0) will not get overwritten below 195# and will consequently be the return value of the blocking routine. 196# 197 198# 199# CEU registers %c15-%c24, %c26-%c30 (%c14 we restore later) 200# 201 finop ; ld8 456(%sp),%c15 202 finop ; ld8 448(%sp),%c16 203 finop ; ld8 440(%sp),%c17 204 finop ; ld8 432(%sp),%c18 205 finop ; ld8 424(%sp),%c19 206 finop ; ld8 416(%sp),%c20 207 finop ; ld8 408(%sp),%c21 208 finop ; ld8 400(%sp),%c22 209 finop ; ld8 392(%sp),%c23 210 finop ; ld8 384(%sp),%c24 211# 212# %c25 is the Enclosing Frame Pointer (EFP) -- since C doesn't 213# use nested procedures, we ignore it (leaving a gap, though) 214# 215 finop ; ld8 368(%sp),%c26 216 finop ; ld8 360(%sp),%c27 217 finop ; ld8 352(%sp),%c28 218 finop ; ld8 344(%sp),%c29 219 finop ; ld8 336(%sp),%c30 220# 221# IPU registers %i12-%i30 222# 223 finop ; ld8 328(%sp),%i12 224 finop ; ld8 320(%sp),%i13 225 finop ; ld8 312(%sp),%i14 226 finop ; ld8 304(%sp),%i15 227# (gap to get alignment for ld64) 228# -- Doesn't work on version 1.1.3 of the OS 229# finop ; ld64 256(%sp),%i16 230 231 finop ; ld8 256(%sp),%i16 232 finop ; ld8 248(%sp),%i17 233 finop ; ld8 240(%sp),%i18 234 finop ; ld8 232(%sp),%i19 235 finop ; ld8 224(%sp),%i20 236 finop ; ld8 216(%sp),%i21 237 finop ; ld8 208(%sp),%i22 238 finop ; ld8 200(%sp),%i23 239 finop ; ld8 192(%sp),%i24 240 finop ; ld8 184(%sp),%i25 241 finop ; ld8 176(%sp),%i26 242 finop ; ld8 168(%sp),%i27 243 finop ; ld8 160(%sp),%i28 244 finop ; ld8 152(%sp),%i29 245 finop ; ld8 144(%sp),%i30 246 247# 248# FPU registers don't need to be loaded, or will be loaded by an 249# enclosing scope (e.g., if this is called by qt_block). 250# 251 252# 253# Load the special registers. We don't load the stack ptr because 254# the new stack is passed in as an argument, we don't load the EFP 255# because we don't use it, and we load the return address specially 256# off the top of the stack. 257# 258 finop ; ld8 0(%sp),%c14 # return addr 259 finop ; ld8 496(%sp),%cp 260 finop ; ld8 504(%sp),%fp 261 262 finop ; jmp 32(%c14) # jump back to thread 263 finop ; movi8 512,%c5 # stack adjust 264 finop ; sadd8.ntr 0,%sp,%c5,%sp 265 266 .data 267 .half 0x0, 0x0, 0x7ffff000, 0x7fff8000 268qt_block: 269 .word qt_block$TXT 270 .word qt_error 271 .word qt_error$TXT 272 .word qt_blocki 273# 274# Handle saving and restoring the FPU regs, relying on qt_blocki 275# to save and restore the remaining registers. 276# 277 .text 278qt_block$TXT: 279 finop ; cxnop # entry prefix 280 finop ; cxnop # entry prefix 281 282 add8.ntr 29,%i31,%i31 ; movi8 512,%c5 # ICR; stk adjust 283 finop ; ssub8.ntr 0,%sp,%c5,%sp 284 finop ; st8 %fp,504(%sp) # Save caller's fp 285 finop ; st8 %cp,496(%sp) # Save caller's cp 286 finop ; st8 %c14,488(%sp) # store ret addr 287 finop ; sadd8.ntr 0,%sp,%c5,%fp # Our frame ptr 288 finop ; mov8_8 %c10, %cp # Our cp 289 290# 291# Store 8 registers at once...destination must be a multiple of 64 292# 293 finop ; st64 %f16,384(%sp) 294 finop ; st64 %f24,320(%sp) 295 finop ; st64 %f32,256(%sp) 296 finop ; st64 %f40,192(%sp) 297 finop ; st64 %f48,128(%sp) 298 finop ; st64 %f56,64(%sp) 299 300# 301# Call the integer blocking routine, passing the arguments passed to us 302# 303 finop ; ld8 24(%cp), %c10 304 finop ; cxnop 305 finop ; jsr %c14, qt_blocki$TXT 306 finop ; cxnop 307 finop ; cxnop 308 movi8 4,%i0 ; movi8 0,%c8 # nargs brain dmg 309 310# 311# Load 8 registers at once...source must be a multiple of 64 312# 313 finop ; ld64 64(%sp),%f56 314 finop ; ld64 128(%sp),%f48 315 finop ; ld64 192(%sp),%f40 316 finop ; ld64 256(%sp),%f32 317 finop ; ld64 320(%sp),%f24 318 finop ; ld64 384(%sp),%f16 319 320 finop ; ld8 488(%sp),%c14 321 finop ; ld8 496(%sp),%cp 322 finop ; ld8 504(%sp),%fp 323 finop ; jmp 32(%c14) # jump back to thread 324 finop ; movi8 512,%c5 # stack adjust 325 finop ; sadd8.ntr 0,%sp,%c5,%sp 326 327 328 .data 329 .half 0x0, 0x0, 0x7ffff000, 0x7fff8000 330qt_start: 331 .word qt_start$TXT 332# 333# A new thread is set up to "appear" as if it were executing code at 334# the beginning of qt_start and then it called a blocking routine 335# (qt_blocki). So when a new thread starts to run, it gets unblocked 336# by the code above and "returns" to `qt_start$TXT' in the 337# restore step of the switch. Blocked threads jump to 16(qt_restore$TXT), 338# and starting threads jump to 16(qt_start$TXT). 339# 340 .text 341qt_start$TXT: 342 finop ; cxnop # 343 finop ; cxnop # 344 finop ; ld8 40(%sp),%c10 # `only' constant block 345 finop ; ld8 32(%sp),%i4 # `userf' arg. 346 finop ; ld8 24(%sp),%i3 # `t' arg. 347 finop ; ld8 0(%c10),%c4 # `only' text location 348 finop ; ld8 16(%sp),%i2 # `u' arg. 349 finop ; cxnop 350 finop ; jsr %c14,16(%c4) # call `only' 351# 352# Pop the frame used to store the thread's initial data 353# 354 finop ; sadd8.ntr 0,%sp,128,%sp 355 finop ; cxnop 356 movi8 2,%i0 ; movi8 0,%c8 # nargs brain dmg 357# 358# If we ever return, it's an error. 359# 360 finop ; jmp qt_error$TXT 361 finop ; cxnop 362 finop ; cxnop 363 movi8 0,%i0 ; movi8 0,%c8 # nargs brain dmg 364 365 366# 367# This stuff is broken 368# 369 .data 370 .half 0x0, 0x0, 0x7ffff000, 0x7fff8000 371qt_vstart: 372 .word qt_vstart$TXT 373 374 .text 375qt_vstart$TXT: 376 finop ; cxnop # entry prefix 377 finop ; cxnop # entry prefix 378 finop ; cxnop 379 finop ; cxnop 380 add8.ntr 11,%i31,%i31 ; movi8 512,%c5 381 finop ; ssub8.ntr 0,%sp,%c5,%sp # fix stack 382 finop ; ld8 8(%sp),%i2 # load `t' as arg to 383 finop ; cxnop # `startup' 384 finop ; cxnop 385 finop ; ld8 16(%sp),%c10 # `startup' const block 386 finop ; cxnop 387 finop ; cxnop 388 finop ; ld8 0(%c10),%c4 # `startup' text loc. 389 finop ; cxnop 390 finop ; cxnop 391 finop ; jsr %c14,16(%c4) # call `startup' 392 finop ; cxnop 393 finop ; cxnop 394 movi8 1, %i0 ; movi8 0,%c8 # nargs brain dmg 395# 396# finop ; sadd 0,%sp,128,%sp # alter stack 397# 398 finop ; ld8 8(%sp),%i2 # load `t' as arg to 399 finop ; ld8 8(%sp),%i2 # load `t' as arg to 400 finop ; ld8 8(%sp),%i2 # load `t' as arg to 401 finop ; ld8 8(%sp),%i2 # load `t' as arg to 402 403 finop ; ld8 32(%sp),%c10 # `only' constant block 404 finop ; ld8 8(%sp),%i2 # `u' arg. 405 finop ; ld8 16(%sp),%i3 # `t' arg. 406 finop ; ld8 0(%c10),%c4 # `only' text location 407 finop ; ld8 24(%sp),%i4 # `userf' arg. 408 finop ; cxnop 409 finop ; jsr %c4,16(%c4) # call `only' 410 finop ; cxnop 411 finop ; cxnop 412# 413# If the callee ever calls `nargs', the following instruction (pair) 414# will be executed. However, we don't know when we compile this code 415# how many args are being passed. So we give our best guess: 0. 416# 417 movi8 0,%i0 ; movi8 0,%c8 # nargs brain dmg 418# 419# If we ever return, it's an error. 420# 421 finop ; jmp qt_error$TXT 422 finop ; cxnop 423 finop ; cxnop 424 movi8 0,%i0 ; movi8 0,%c8 # nargs brain dmg 425