1/*
2 * Copyright (c) 2018, Cornell University
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or
6 * without modification, are permitted provided that the following
7 * conditions are met:
8 *
9 * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following
14 * disclaimer in the documentation and/or other materials provided
15 * with the distribution.
16 *
17 * Neither the name of Cornell University nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
22 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
23 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
26 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
29 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 *
35 * Authors: Tuan Ta
36 */
37
38//------------------------------------------------------------------------
39// This test_macros includes necessary functions and macros to create
40// and exit threads. They're used in multi-threaded assembly tests.
41// This assumes the target system can concurrently support 4 different
42// threads (i.e., 1 master thread and 3 child threads).
43//
44// Threads are synchronized through futex system call (i.e., wait and
45// wakeup operations).
46//------------------------------------------------------------------------
47
48#ifndef __TEST_MACROS_MT_FUTEX_H
49#define __TEST_MACROS_MT_FUTEX_H
50
51#define SYSCALL_FUTEX         98
52#define SYSCALL_GETTID        178
53#define SYSCALL_MUNMAP        215
54#define SYSCALL_CLONE         220
55#define SYSCALL_MMAP          222
56
57#define MEM_SIZE              (4096 * 1024)
58
59#define PROT_READ             0x1
60#define PROT_WRITE            0x2
61#define MMAP_PROT_FLAGS       (PROT_READ | PROT_WRITE)
62
63#define MAP_PRIVATE           0x02
64#define MAP_ANONYMOUS         0x20
65#define MAP_STACK             0x20000
66#define MMAP_MAP_FLAGS        (MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK)
67
68#define CLONE_VM              0x00000100
69#define CLONE_FS              0x00000200
70#define CLONE_FILES           0x00000400
71#define CLONE_SIGHAND         0x00000800
72#define CLONE_PARENT          0x00008000
73#define CLONE_THREAD          0x00010000
74#define CLONE_IO              0x80000000
75#define CLONE_PARENT_SETTID   0x00100000	/* set the TID in the parent */
76#define CLONE_CHILD_CLEARTID  0x00200000	/* clear the TID in the child */
77#define CLONE_SETTLS          0x00080000
78#define CLONE_FLAGS           (CLONE_VM | CLONE_FS | CLONE_FILES \
79                              | CLONE_SIGHAND | CLONE_PARENT \
80                              | CLONE_THREAD | CLONE_IO \
81                              | CLONE_PARENT_SETTID \
82                              | CLONE_CHILD_CLEARTID \
83                              | CLONE_SETTLS)
84
85#define FUTEX_WAIT            0
86#define FUTEX_WAKE            1
87#define FUTEX_CMP_REQUEUE     4
88#define FUTEX_WAKE_OP         5
89#define FUTEX_WAIT_BITSET     9
90#define FUTEX_WAKE_BITSET     10
91#define FUTEX_PRIVATE_FLAG    128
92#define FUTEX_CLOCK_REALTIME  256
93#define FUTEX_CMD_MASK        ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
94
95#define FUTEX_OP_SET          0  /* uaddr2 = oparg; */
96#define FUTEX_OP_ADD          1  /* uaddr2 += oparg; */
97#define FUTEX_OP_OR           2  /* uaddr2 |= oparg; */
98#define FUTEX_OP_ANDN         3  /* uaddr2 &= ~oparg; */
99#define FUTEX_OP_XOR          4  /* uaddr2 ^= oparg; */
100#define FUTEX_OP_ARG_SHIFT    8  /* Use (1 << oparg) as operand */
101
102#define FUTEX_OP_CMP_EQ       0  /* if (oldval == cmparg) wake */
103#define FUTEX_OP_CMP_NE       1  /* if (oldval != cmparg) wake */
104#define FUTEX_OP_CMP_LT       2  /* if (oldval < cmparg) wake */
105#define FUTEX_OP_CMP_LE       3  /* if (oldval <= cmparg) wake */
106#define FUTEX_OP_CMP_GT       4  /* if (oldval > cmparg) wake */
107#define FUTEX_OP_CMP_GE       5  /* if (oldval >= cmparg) wake */
108
109#define FUTEX_OP(op, oparg, cmp, cmparg)                    \
110                (((op & 0xf) << 28) |                       \
111                 ((cmp & 0xf) << 24) |                      \
112                 ((oparg & 0xfff) << 12) |                  \
113                 (cmparg & 0xfff))
114
115#define FUTEX_WAIT_PRIVATE        (FUTEX_WAIT | FUTEX_PRIVATE_FLAG)
116#define FUTEX_WAKE_PRIVATE        (FUTEX_WAKE | FUTEX_PRIVATE_FLAG)
117#define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG)
118#define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG)
119
120#define FAILURE               1
121#define SUCCESS               0
122
123//------------------------------------------------------------------------
124// _create_threads: create a given number of threads
125//
126//    The calling thread (a.k.a, master thread) saves information about its
127//    child threads in its stack in the following structure:
128//
129//    | child_stack_ptr_0       |  << fp: frame pointer
130//    | child_tls_ptr_0         |
131//    | child_thread_id_0       |
132//    | saved_child_thread_id_0 |
133//    | child_stack_ptr_1       |
134//    | child_tls_ptr_1         |
135//    | child_thread_id_1       |
136//    | saved_child_thread_id_1 |
137//    | ...                     |  << sp: stack pointer
138//
139//    For each child thread, we need to save the following information
140//    in the parent thread's stack frame:
141//
142//    - child_stack_ptr stores the lower address of the child thread's
143//      stack space
144//
145//    - child_tls_ptr stores the lower address of the child thread's
146//      thread local storage (TLS)
147//
148//    - child_thread_id stores the thread ID of the child thread. This
149//      variable will be cleared by the child thread when it exits.
150//
151//    - saved_child_thread_id also stores the thread ID of the child
152//      thread, but this variable is used only by the parent thread.
153//
154//    This function takes the number of threads to create in a0. It
155//    updates n_child_threads variable to the number of successfully
156//    created threads.
157//------------------------------------------------------------------------
158
159_create_threads:
160  mv      t0, a0                // get the number of threads
161  mv      s0, ra                // save return register
162  la      t3, n_worker_threads
1631:
164  // allocate a new stack space and save its pointer in the caller's stack
165  jal     ra, _alloc_mem
166  addi    sp, sp, -8
167  sd      a0, (sp)
168  mv      t1, a0
169
170  // allocate a new thread local storage (TLS) and save its pointer in the
171  // caller's stack
172  jal     ra, _alloc_mem
173  addi    sp, sp, -8
174  sd      a0, (sp)
175  mv      t2, a0
176
177  // allocate space in the caller's stack to store new thread ID
178  addi    sp, sp, -8
179
180  // clone a new thread
181  li      a0, CLONE_FLAGS
182  li      s2, MEM_SIZE
183  add     a1, t1, s2        // pointer to the high address of the new stack
184  mv      a2, sp            // ptid
185  mv      a3, t2            // pointer to the low address of the new TLS,
186                            // assuming TLS grows upward
187  mv      a4, sp            // ctid
188  li      a7, SYSCALL_CLONE // clone syscall number
189  ecall                     // call clone syscall
190  bltz    a0, 2f            // syscall error
191  beqz    a0, _mt_test      // only the new thread jumps to _mt_test
192
193  // save child thread ID in the caller's stack
194  addi      sp, sp, -8
195  sd        a0, (sp)
196
197  // decrement the number of threads to create
198  addi      t0, t0, -1
199
200  // increment the number of successfully created threads sofar
201  addi      t4, zero, 1
202  amoadd.d  zero, t4, (t3)
203
204  // check if we still need to spawn more threads
205  bnez      t0, 1b
206  j         3f
2072:
208  // handle clone syscall error by deleting the last memory frame created
209  // for the unsuccessfully spawned thread.
210  addi      sp, sp, 8       // skip child_thread_id
211
212  // deallocate last allocated tls
213  ld        a0, (sp)
214  jal       ra, _dealloc_mem
215  addi      sp, sp, 8
216
217  // deallocate last allocated stack
218  ld        a0, (sp)
219  jal       ra, _dealloc_mem
220  addi      sp, sp, 8
2213:
222  // finish creating threads
223  mv        ra, s0
224  ret
225
226//------------------------------------------------------------------------
227// _alloc_mem: allocate a memory space with size MEM_SIZE
228//
229//    This function returns the pointer to the newly allocated memory
230//    space in a0
231//------------------------------------------------------------------------
232
233_alloc_mem:
234  li      a0, 0
235  li      a1, MEM_SIZE
236  li      a2, MMAP_PROT_FLAGS
237  li      a3, MMAP_MAP_FLAGS
238  li      a4, -1
239  li      a5, 0
240  li      a7, SYSCALL_MMAP
241  ecall
242  ret
243
244//------------------------------------------------------------------------
245// _delete_threads: deallocate all child threads
246//
247//    This function assumes the following structure in the calling thread's
248//    stack frame
249//
250//    | child_stack_ptr_0       |  << fp: frame pointer
251//    | child_tls_ptr_0         |
252//    | child_thread_id_0       |
253//    | saved_child_thread_id_0 |
254//    | child_stack_ptr_1       |
255//    | child_tls_ptr_1         |
256//    | child_thread_id_1       |
257//    | saved_child_thread_id_1 |
258//    | ...                     |  << sp: stack pointer
259//
260//    This function takes the number of threads to delete in a0
261//------------------------------------------------------------------------
262
263_delete_threads:
264  mv      t0, a0                  // get the number of threads to delete
265  mv      s0, ra                  // save return register
2661:
267  addi    sp, sp, 8               // skip saved_child_thread_id
268  addi    sp, sp, 8               // skip child_thread_id
269
270  // deallocate thread's tls
271  ld      a0, (sp)
272  jal     ra, _dealloc_mem
273  addi    sp, sp, 8
274
275  // deallocate thread's stack
276  ld      a0, (sp)
277  jal     ra, _dealloc_mem
278  addi    sp, sp, 8
279
280  // decrement the number of threads to delete
281  addi    t0, t0, -1
282  bnez    t0, 1b
283
284  // finish deleting all threads
285  mv      ra, s0                  // restore return register
286  ret
287
288//------------------------------------------------------------------------
289// _dealloc_mem: deallocate memory space of size MEM_SIZE
290//
291//    This function takes the pointer to the memory space in a0
292//------------------------------------------------------------------------
293
294_dealloc_mem:
295  li      a1, MEM_SIZE
296  li      a7, SYSCALL_MUNMAP
297  ecall
298  ret
299
300//------------------------------------------------------------------------
301// _join: wait for all child threads to exit
302//
303//    Child threads are created with CLONE_CHILD_CLEARTID flag, so when
304//    they exit, they will clear the ctid/ptid variable and wake up their
305//    parent thread.
306//
307//    This function assumes the following structure in the calling thread's
308//    stack frame
309//
310//    | child_stack_ptr_0       |  << fp: frame pointer
311//    | child_tls_ptr_0         |
312//    | child_thread_id_0       |
313//    | saved_child_thread_id_0 |
314//    | child_stack_ptr_1       |
315//    | child_tls_ptr_1         |
316//    | child_thread_id_1       |
317//    | saved_child_thread_id_1 |
318//    | ...                     |  << sp: stack pointer
319//
320//    This function takes a number of threads to wait in a0
321//------------------------------------------------------------------------
322
323_join:
324  mv      t0, a0          // get the number of threads
325  mv      s0, ra          // save return register
326  mv      s1, sp          // save stack pointer
3271:
328  // Calling futex_wait on ctidptr
329  ld      a2, (sp)                // get child thread ID from
330                                  // saved_child_thread_id
331  addi    sp, sp, 8
332  mv      a0, sp                  // futex address (child_thread_id)
333  li      a1, FUTEX_WAIT_PRIVATE
334  li      a7, SYSCALL_FUTEX
335  ecall
336
337  addi    sp, sp, 8              // skip child_tls_ptr
338  addi    sp, sp, 8              // skip child_stack_ptr
339
340  // decrement the number of threads to wait for
341  addi    t0, t0, -1
342  bnez    t0, 1b
343
344  // finish waiting for all threads
345  mv      ra, s0                  // restore return register
346  mv      sp, s1                  // restore stack pointer
347  ret
348
349#define MT_DATA                                                           \
350  n_worker_threads:     .dword    0;                                      \
351  shared_var:           .dword    0;                                      \
352  barrier:              .dword    0;                                      \
353  array:                .dword    0x00000000deadbeef,                     \
354                                  0xdeadbeefdeadbeef,                     \
355                                  0x12343eeaaf423451;                     \
356
357#endif
358