112771Sqtt2@cornell.edu/*
212771Sqtt2@cornell.edu * Copyright (c) 2018, Cornell University
312771Sqtt2@cornell.edu * All rights reserved.
412771Sqtt2@cornell.edu *
512771Sqtt2@cornell.edu * Redistribution and use in source and binary forms, with or
612771Sqtt2@cornell.edu * without modification, are permitted provided that the following
712771Sqtt2@cornell.edu * conditions are met:
812771Sqtt2@cornell.edu *
912771Sqtt2@cornell.edu * Redistributions of source code must retain the above copyright
1012771Sqtt2@cornell.edu * notice, this list of conditions and the following disclaimer.
1112771Sqtt2@cornell.edu *
1212771Sqtt2@cornell.edu * Redistributions in binary form must reproduce the above
1312771Sqtt2@cornell.edu * copyright notice, this list of conditions and the following
1412771Sqtt2@cornell.edu * disclaimer in the documentation and/or other materials provided
1512771Sqtt2@cornell.edu * with the distribution.
1612771Sqtt2@cornell.edu *
1712771Sqtt2@cornell.edu * Neither the name of Cornell University nor the names of its
1812771Sqtt2@cornell.edu * contributors may be used to endorse or promote products derived
1912771Sqtt2@cornell.edu * from this software without specific prior written permission.
2012771Sqtt2@cornell.edu *
2112771Sqtt2@cornell.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
2212771Sqtt2@cornell.edu * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
2312771Sqtt2@cornell.edu * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
2412771Sqtt2@cornell.edu * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
2512771Sqtt2@cornell.edu * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
2612771Sqtt2@cornell.edu * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2712771Sqtt2@cornell.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2812771Sqtt2@cornell.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
2912771Sqtt2@cornell.edu * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
3012771Sqtt2@cornell.edu * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3112771Sqtt2@cornell.edu * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
3212771Sqtt2@cornell.edu * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3312771Sqtt2@cornell.edu * POSSIBILITY OF SUCH DAMAGE.
3412771Sqtt2@cornell.edu *
3512771Sqtt2@cornell.edu * Authors: Tuan Ta
3612771Sqtt2@cornell.edu */
3712771Sqtt2@cornell.edu
3812771Sqtt2@cornell.edu//------------------------------------------------------------------------
3912771Sqtt2@cornell.edu// This test_macros includes necessary functions and macros to create
4012771Sqtt2@cornell.edu// and exit threads. They're used in multi-threaded assembly tests.
4112771Sqtt2@cornell.edu// This assumes the target system can concurrently support 4 different
4212771Sqtt2@cornell.edu// threads (i.e., 1 master thread and 3 child threads).
4312771Sqtt2@cornell.edu//
4412771Sqtt2@cornell.edu// Threads are synchronized through futex system call (i.e., wait and
4512771Sqtt2@cornell.edu// wakeup operations).
4612771Sqtt2@cornell.edu//------------------------------------------------------------------------
4712771Sqtt2@cornell.edu
4812771Sqtt2@cornell.edu#ifndef __TEST_MACROS_MT_FUTEX_H
4912771Sqtt2@cornell.edu#define __TEST_MACROS_MT_FUTEX_H
5012771Sqtt2@cornell.edu
5112771Sqtt2@cornell.edu#define SYSCALL_FUTEX         98
5212771Sqtt2@cornell.edu#define SYSCALL_GETTID        178
5312771Sqtt2@cornell.edu#define SYSCALL_MUNMAP        215
5412771Sqtt2@cornell.edu#define SYSCALL_CLONE         220
5512771Sqtt2@cornell.edu#define SYSCALL_MMAP          222
5612771Sqtt2@cornell.edu
5712771Sqtt2@cornell.edu#define MEM_SIZE              (4096 * 1024)
5812771Sqtt2@cornell.edu
5912771Sqtt2@cornell.edu#define PROT_READ             0x1
6012771Sqtt2@cornell.edu#define PROT_WRITE            0x2
6112771Sqtt2@cornell.edu#define MMAP_PROT_FLAGS       (PROT_READ | PROT_WRITE)
6212771Sqtt2@cornell.edu
6312771Sqtt2@cornell.edu#define MAP_PRIVATE           0x02
6412771Sqtt2@cornell.edu#define MAP_ANONYMOUS         0x20
6512771Sqtt2@cornell.edu#define MAP_STACK             0x20000
6612771Sqtt2@cornell.edu#define MMAP_MAP_FLAGS        (MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK)
6712771Sqtt2@cornell.edu
6812771Sqtt2@cornell.edu#define CLONE_VM              0x00000100
6912771Sqtt2@cornell.edu#define CLONE_FS              0x00000200
7012771Sqtt2@cornell.edu#define CLONE_FILES           0x00000400
7112771Sqtt2@cornell.edu#define CLONE_SIGHAND         0x00000800
7212771Sqtt2@cornell.edu#define CLONE_PARENT          0x00008000
7312771Sqtt2@cornell.edu#define CLONE_THREAD          0x00010000
7412771Sqtt2@cornell.edu#define CLONE_IO              0x80000000
7512771Sqtt2@cornell.edu#define CLONE_PARENT_SETTID   0x00100000	/* set the TID in the parent */
7612771Sqtt2@cornell.edu#define CLONE_CHILD_CLEARTID  0x00200000	/* clear the TID in the child */
7712771Sqtt2@cornell.edu#define CLONE_SETTLS          0x00080000
7812771Sqtt2@cornell.edu#define CLONE_FLAGS           (CLONE_VM | CLONE_FS | CLONE_FILES \
7912771Sqtt2@cornell.edu                              | CLONE_SIGHAND | CLONE_PARENT \
8012771Sqtt2@cornell.edu                              | CLONE_THREAD | CLONE_IO \
8112771Sqtt2@cornell.edu                              | CLONE_PARENT_SETTID \
8212771Sqtt2@cornell.edu                              | CLONE_CHILD_CLEARTID \
8312771Sqtt2@cornell.edu                              | CLONE_SETTLS)
8412771Sqtt2@cornell.edu
8512771Sqtt2@cornell.edu#define FUTEX_WAIT            0
8612771Sqtt2@cornell.edu#define FUTEX_WAKE            1
8712771Sqtt2@cornell.edu#define FUTEX_CMP_REQUEUE     4
8812771Sqtt2@cornell.edu#define FUTEX_WAKE_OP         5
8912771Sqtt2@cornell.edu#define FUTEX_WAIT_BITSET     9
9012771Sqtt2@cornell.edu#define FUTEX_WAKE_BITSET     10
9112771Sqtt2@cornell.edu#define FUTEX_PRIVATE_FLAG    128
9212771Sqtt2@cornell.edu#define FUTEX_CLOCK_REALTIME  256
9312771Sqtt2@cornell.edu#define FUTEX_CMD_MASK        ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
9412771Sqtt2@cornell.edu
9512771Sqtt2@cornell.edu#define FUTEX_OP_SET          0  /* uaddr2 = oparg; */
9612771Sqtt2@cornell.edu#define FUTEX_OP_ADD          1  /* uaddr2 += oparg; */
9712771Sqtt2@cornell.edu#define FUTEX_OP_OR           2  /* uaddr2 |= oparg; */
9812771Sqtt2@cornell.edu#define FUTEX_OP_ANDN         3  /* uaddr2 &= ~oparg; */
9912771Sqtt2@cornell.edu#define FUTEX_OP_XOR          4  /* uaddr2 ^= oparg; */
10012771Sqtt2@cornell.edu#define FUTEX_OP_ARG_SHIFT    8  /* Use (1 << oparg) as operand */
10112771Sqtt2@cornell.edu
10212771Sqtt2@cornell.edu#define FUTEX_OP_CMP_EQ       0  /* if (oldval == cmparg) wake */
10312771Sqtt2@cornell.edu#define FUTEX_OP_CMP_NE       1  /* if (oldval != cmparg) wake */
10412771Sqtt2@cornell.edu#define FUTEX_OP_CMP_LT       2  /* if (oldval < cmparg) wake */
10512771Sqtt2@cornell.edu#define FUTEX_OP_CMP_LE       3  /* if (oldval <= cmparg) wake */
10612771Sqtt2@cornell.edu#define FUTEX_OP_CMP_GT       4  /* if (oldval > cmparg) wake */
10712771Sqtt2@cornell.edu#define FUTEX_OP_CMP_GE       5  /* if (oldval >= cmparg) wake */
10812771Sqtt2@cornell.edu
10912771Sqtt2@cornell.edu#define FUTEX_OP(op, oparg, cmp, cmparg)                    \
11012771Sqtt2@cornell.edu                (((op & 0xf) << 28) |                       \
11112771Sqtt2@cornell.edu                 ((cmp & 0xf) << 24) |                      \
11212771Sqtt2@cornell.edu                 ((oparg & 0xfff) << 12) |                  \
11312771Sqtt2@cornell.edu                 (cmparg & 0xfff))
11412771Sqtt2@cornell.edu
11512771Sqtt2@cornell.edu#define FUTEX_WAIT_PRIVATE        (FUTEX_WAIT | FUTEX_PRIVATE_FLAG)
11612771Sqtt2@cornell.edu#define FUTEX_WAKE_PRIVATE        (FUTEX_WAKE | FUTEX_PRIVATE_FLAG)
11712771Sqtt2@cornell.edu#define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG)
11812771Sqtt2@cornell.edu#define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG)
11912771Sqtt2@cornell.edu
12012771Sqtt2@cornell.edu#define FAILURE               1
12112771Sqtt2@cornell.edu#define SUCCESS               0
12212771Sqtt2@cornell.edu
12312771Sqtt2@cornell.edu//------------------------------------------------------------------------
12412771Sqtt2@cornell.edu// _create_threads: create a given number of threads
12512771Sqtt2@cornell.edu//
12612771Sqtt2@cornell.edu//    The calling thread (a.k.a, master thread) saves information about its
12712771Sqtt2@cornell.edu//    child threads in its stack in the following structure:
12812771Sqtt2@cornell.edu//
12912771Sqtt2@cornell.edu//    | child_stack_ptr_0       |  << fp: frame pointer
13012771Sqtt2@cornell.edu//    | child_tls_ptr_0         |
13112771Sqtt2@cornell.edu//    | child_thread_id_0       |
13212771Sqtt2@cornell.edu//    | saved_child_thread_id_0 |
13312771Sqtt2@cornell.edu//    | child_stack_ptr_1       |
13412771Sqtt2@cornell.edu//    | child_tls_ptr_1         |
13512771Sqtt2@cornell.edu//    | child_thread_id_1       |
13612771Sqtt2@cornell.edu//    | saved_child_thread_id_1 |
13712771Sqtt2@cornell.edu//    | ...                     |  << sp: stack pointer
13812771Sqtt2@cornell.edu//
13912771Sqtt2@cornell.edu//    For each child thread, we need to save the following information
14012771Sqtt2@cornell.edu//    in the parent thread's stack frame:
14112771Sqtt2@cornell.edu//
14212771Sqtt2@cornell.edu//    - child_stack_ptr stores the lower address of the child thread's
14312771Sqtt2@cornell.edu//      stack space
14412771Sqtt2@cornell.edu//
14512771Sqtt2@cornell.edu//    - child_tls_ptr stores the lower address of the child thread's
14612771Sqtt2@cornell.edu//      thread local storage (TLS)
14712771Sqtt2@cornell.edu//
14812771Sqtt2@cornell.edu//    - child_thread_id stores the thread ID of the child thread. This
14912771Sqtt2@cornell.edu//      variable will be cleared by the child thread when it exits.
15012771Sqtt2@cornell.edu//
15112771Sqtt2@cornell.edu//    - saved_child_thread_id also stores the thread ID of the child
15212771Sqtt2@cornell.edu//      thread, but this variable is used only by the parent thread.
15312771Sqtt2@cornell.edu//
15412771Sqtt2@cornell.edu//    This function takes the number of threads to create in a0. It
15512771Sqtt2@cornell.edu//    updates n_child_threads variable to the number of successfully
15612771Sqtt2@cornell.edu//    created threads.
15712771Sqtt2@cornell.edu//------------------------------------------------------------------------
15812771Sqtt2@cornell.edu
15912771Sqtt2@cornell.edu_create_threads:
16012771Sqtt2@cornell.edu  mv      t0, a0                // get the number of threads
16112771Sqtt2@cornell.edu  mv      s0, ra                // save return register
16212771Sqtt2@cornell.edu  la      t3, n_worker_threads
16312771Sqtt2@cornell.edu1:
16412771Sqtt2@cornell.edu  // allocate a new stack space and save its pointer in the caller's stack
16512771Sqtt2@cornell.edu  jal     ra, _alloc_mem
16612771Sqtt2@cornell.edu  addi    sp, sp, -8
16712771Sqtt2@cornell.edu  sd      a0, (sp)
16812771Sqtt2@cornell.edu  mv      t1, a0
16912771Sqtt2@cornell.edu
17012771Sqtt2@cornell.edu  // allocate a new thread local storage (TLS) and save its pointer in the
17112771Sqtt2@cornell.edu  // caller's stack
17212771Sqtt2@cornell.edu  jal     ra, _alloc_mem
17312771Sqtt2@cornell.edu  addi    sp, sp, -8
17412771Sqtt2@cornell.edu  sd      a0, (sp)
17512771Sqtt2@cornell.edu  mv      t2, a0
17612771Sqtt2@cornell.edu
17712771Sqtt2@cornell.edu  // allocate space in the caller's stack to store new thread ID
17812771Sqtt2@cornell.edu  addi    sp, sp, -8
17912771Sqtt2@cornell.edu
18012771Sqtt2@cornell.edu  // clone a new thread
18112771Sqtt2@cornell.edu  li      a0, CLONE_FLAGS
18212771Sqtt2@cornell.edu  li      s2, MEM_SIZE
18312771Sqtt2@cornell.edu  add     a1, t1, s2        // pointer to the high address of the new stack
18412771Sqtt2@cornell.edu  mv      a2, sp            // ptid
18512771Sqtt2@cornell.edu  mv      a3, t2            // pointer to the low address of the new TLS,
18612771Sqtt2@cornell.edu                            // assuming TLS grows upward
18712771Sqtt2@cornell.edu  mv      a4, sp            // ctid
18812771Sqtt2@cornell.edu  li      a7, SYSCALL_CLONE // clone syscall number
18912771Sqtt2@cornell.edu  ecall                     // call clone syscall
19012771Sqtt2@cornell.edu  bltz    a0, 2f            // syscall error
19112771Sqtt2@cornell.edu  beqz    a0, _mt_test      // only the new thread jumps to _mt_test
19212771Sqtt2@cornell.edu
19312771Sqtt2@cornell.edu  // save child thread ID in the caller's stack
19412771Sqtt2@cornell.edu  addi      sp, sp, -8
19512771Sqtt2@cornell.edu  sd        a0, (sp)
19612771Sqtt2@cornell.edu
19712771Sqtt2@cornell.edu  // decrement the number of threads to create
19812771Sqtt2@cornell.edu  addi      t0, t0, -1
19912771Sqtt2@cornell.edu
20012771Sqtt2@cornell.edu  // increment the number of successfully created threads sofar
20112771Sqtt2@cornell.edu  addi      t4, zero, 1
20212771Sqtt2@cornell.edu  amoadd.d  zero, t4, (t3)
20312771Sqtt2@cornell.edu
20412771Sqtt2@cornell.edu  // check if we still need to spawn more threads
20512771Sqtt2@cornell.edu  bnez      t0, 1b
20612771Sqtt2@cornell.edu  j         3f
20712771Sqtt2@cornell.edu2:
20812771Sqtt2@cornell.edu  // handle clone syscall error by deleting the last memory frame created
20912771Sqtt2@cornell.edu  // for the unsuccessfully spawned thread.
21012771Sqtt2@cornell.edu  addi      sp, sp, 8       // skip child_thread_id
21112771Sqtt2@cornell.edu
21212771Sqtt2@cornell.edu  // deallocate last allocated tls
21312771Sqtt2@cornell.edu  ld        a0, (sp)
21412771Sqtt2@cornell.edu  jal       ra, _dealloc_mem
21512771Sqtt2@cornell.edu  addi      sp, sp, 8
21612771Sqtt2@cornell.edu
21712771Sqtt2@cornell.edu  // deallocate last allocated stack
21812771Sqtt2@cornell.edu  ld        a0, (sp)
21912771Sqtt2@cornell.edu  jal       ra, _dealloc_mem
22012771Sqtt2@cornell.edu  addi      sp, sp, 8
22112771Sqtt2@cornell.edu3:
22212771Sqtt2@cornell.edu  // finish creating threads
22312771Sqtt2@cornell.edu  mv        ra, s0
22412771Sqtt2@cornell.edu  ret
22512771Sqtt2@cornell.edu
22612771Sqtt2@cornell.edu//------------------------------------------------------------------------
22712771Sqtt2@cornell.edu// _alloc_mem: allocate a memory space with size MEM_SIZE
22812771Sqtt2@cornell.edu//
22912771Sqtt2@cornell.edu//    This function returns the pointer to the newly allocated memory
23012771Sqtt2@cornell.edu//    space in a0
23112771Sqtt2@cornell.edu//------------------------------------------------------------------------
23212771Sqtt2@cornell.edu
23312771Sqtt2@cornell.edu_alloc_mem:
23412771Sqtt2@cornell.edu  li      a0, 0
23512771Sqtt2@cornell.edu  li      a1, MEM_SIZE
23612771Sqtt2@cornell.edu  li      a2, MMAP_PROT_FLAGS
23712771Sqtt2@cornell.edu  li      a3, MMAP_MAP_FLAGS
23812771Sqtt2@cornell.edu  li      a4, -1
23912771Sqtt2@cornell.edu  li      a5, 0
24012771Sqtt2@cornell.edu  li      a7, SYSCALL_MMAP
24112771Sqtt2@cornell.edu  ecall
24212771Sqtt2@cornell.edu  ret
24312771Sqtt2@cornell.edu
24412771Sqtt2@cornell.edu//------------------------------------------------------------------------
24512771Sqtt2@cornell.edu// _delete_threads: deallocate all child threads
24612771Sqtt2@cornell.edu//
24712771Sqtt2@cornell.edu//    This function assumes the following structure in the calling thread's
24812771Sqtt2@cornell.edu//    stack frame
24912771Sqtt2@cornell.edu//
25012771Sqtt2@cornell.edu//    | child_stack_ptr_0       |  << fp: frame pointer
25112771Sqtt2@cornell.edu//    | child_tls_ptr_0         |
25212771Sqtt2@cornell.edu//    | child_thread_id_0       |
25312771Sqtt2@cornell.edu//    | saved_child_thread_id_0 |
25412771Sqtt2@cornell.edu//    | child_stack_ptr_1       |
25512771Sqtt2@cornell.edu//    | child_tls_ptr_1         |
25612771Sqtt2@cornell.edu//    | child_thread_id_1       |
25712771Sqtt2@cornell.edu//    | saved_child_thread_id_1 |
25812771Sqtt2@cornell.edu//    | ...                     |  << sp: stack pointer
25912771Sqtt2@cornell.edu//
26012771Sqtt2@cornell.edu//    This function takes the number of threads to delete in a0
26112771Sqtt2@cornell.edu//------------------------------------------------------------------------
26212771Sqtt2@cornell.edu
26312771Sqtt2@cornell.edu_delete_threads:
26412771Sqtt2@cornell.edu  mv      t0, a0                  // get the number of threads to delete
26512771Sqtt2@cornell.edu  mv      s0, ra                  // save return register
26612771Sqtt2@cornell.edu1:
26712771Sqtt2@cornell.edu  addi    sp, sp, 8               // skip saved_child_thread_id
26812771Sqtt2@cornell.edu  addi    sp, sp, 8               // skip child_thread_id
26912771Sqtt2@cornell.edu
27012771Sqtt2@cornell.edu  // deallocate thread's tls
27112771Sqtt2@cornell.edu  ld      a0, (sp)
27212771Sqtt2@cornell.edu  jal     ra, _dealloc_mem
27312771Sqtt2@cornell.edu  addi    sp, sp, 8
27412771Sqtt2@cornell.edu
27512771Sqtt2@cornell.edu  // deallocate thread's stack
27612771Sqtt2@cornell.edu  ld      a0, (sp)
27712771Sqtt2@cornell.edu  jal     ra, _dealloc_mem
27812771Sqtt2@cornell.edu  addi    sp, sp, 8
27912771Sqtt2@cornell.edu
28012771Sqtt2@cornell.edu  // decrement the number of threads to delete
28112771Sqtt2@cornell.edu  addi    t0, t0, -1
28212771Sqtt2@cornell.edu  bnez    t0, 1b
28312771Sqtt2@cornell.edu
28412771Sqtt2@cornell.edu  // finish deleting all threads
28512771Sqtt2@cornell.edu  mv      ra, s0                  // restore return register
28612771Sqtt2@cornell.edu  ret
28712771Sqtt2@cornell.edu
28812771Sqtt2@cornell.edu//------------------------------------------------------------------------
28912771Sqtt2@cornell.edu// _dealloc_mem: deallocate memory space of size MEM_SIZE
29012771Sqtt2@cornell.edu//
29112771Sqtt2@cornell.edu//    This function takes the pointer to the memory space in a0
29212771Sqtt2@cornell.edu//------------------------------------------------------------------------
29312771Sqtt2@cornell.edu
29412771Sqtt2@cornell.edu_dealloc_mem:
29512771Sqtt2@cornell.edu  li      a1, MEM_SIZE
29612771Sqtt2@cornell.edu  li      a7, SYSCALL_MUNMAP
29712771Sqtt2@cornell.edu  ecall
29812771Sqtt2@cornell.edu  ret
29912771Sqtt2@cornell.edu
30012771Sqtt2@cornell.edu//------------------------------------------------------------------------
30112771Sqtt2@cornell.edu// _join: wait for all child threads to exit
30212771Sqtt2@cornell.edu//
30312771Sqtt2@cornell.edu//    Child threads are created with CLONE_CHILD_CLEARTID flag, so when
30412771Sqtt2@cornell.edu//    they exit, they will clear the ctid/ptid variable and wake up their
30512771Sqtt2@cornell.edu//    parent thread.
30612771Sqtt2@cornell.edu//
30712771Sqtt2@cornell.edu//    This function assumes the following structure in the calling thread's
30812771Sqtt2@cornell.edu//    stack frame
30912771Sqtt2@cornell.edu//
31012771Sqtt2@cornell.edu//    | child_stack_ptr_0       |  << fp: frame pointer
31112771Sqtt2@cornell.edu//    | child_tls_ptr_0         |
31212771Sqtt2@cornell.edu//    | child_thread_id_0       |
31312771Sqtt2@cornell.edu//    | saved_child_thread_id_0 |
31412771Sqtt2@cornell.edu//    | child_stack_ptr_1       |
31512771Sqtt2@cornell.edu//    | child_tls_ptr_1         |
31612771Sqtt2@cornell.edu//    | child_thread_id_1       |
31712771Sqtt2@cornell.edu//    | saved_child_thread_id_1 |
31812771Sqtt2@cornell.edu//    | ...                     |  << sp: stack pointer
31912771Sqtt2@cornell.edu//
32012771Sqtt2@cornell.edu//    This function takes a number of threads to wait in a0
32112771Sqtt2@cornell.edu//------------------------------------------------------------------------
32212771Sqtt2@cornell.edu
32312771Sqtt2@cornell.edu_join:
32412771Sqtt2@cornell.edu  mv      t0, a0          // get the number of threads
32512771Sqtt2@cornell.edu  mv      s0, ra          // save return register
32612771Sqtt2@cornell.edu  mv      s1, sp          // save stack pointer
32712771Sqtt2@cornell.edu1:
32812771Sqtt2@cornell.edu  // Calling futex_wait on ctidptr
32912771Sqtt2@cornell.edu  ld      a2, (sp)                // get child thread ID from
33012771Sqtt2@cornell.edu                                  // saved_child_thread_id
33112771Sqtt2@cornell.edu  addi    sp, sp, 8
33212771Sqtt2@cornell.edu  mv      a0, sp                  // futex address (child_thread_id)
33312771Sqtt2@cornell.edu  li      a1, FUTEX_WAIT_PRIVATE
33412771Sqtt2@cornell.edu  li      a7, SYSCALL_FUTEX
33512771Sqtt2@cornell.edu  ecall
33612771Sqtt2@cornell.edu
33712771Sqtt2@cornell.edu  addi    sp, sp, 8              // skip child_tls_ptr
33812771Sqtt2@cornell.edu  addi    sp, sp, 8              // skip child_stack_ptr
33912771Sqtt2@cornell.edu
34012771Sqtt2@cornell.edu  // decrement the number of threads to wait for
34112771Sqtt2@cornell.edu  addi    t0, t0, -1
34212771Sqtt2@cornell.edu  bnez    t0, 1b
34312771Sqtt2@cornell.edu
34412771Sqtt2@cornell.edu  // finish waiting for all threads
34512771Sqtt2@cornell.edu  mv      ra, s0                  // restore return register
34612771Sqtt2@cornell.edu  mv      sp, s1                  // restore stack pointer
34712771Sqtt2@cornell.edu  ret
34812771Sqtt2@cornell.edu
34912771Sqtt2@cornell.edu#define MT_DATA                                                           \
35012771Sqtt2@cornell.edu  n_worker_threads:     .dword    0;                                      \
35112771Sqtt2@cornell.edu  shared_var:           .dword    0;                                      \
35212771Sqtt2@cornell.edu  barrier:              .dword    0;                                      \
35312771Sqtt2@cornell.edu  array:                .dword    0x00000000deadbeef,                     \
35412771Sqtt2@cornell.edu                                  0xdeadbeefdeadbeef,                     \
35512771Sqtt2@cornell.edu                                  0x12343eeaaf423451;                     \
35612771Sqtt2@cornell.edu
35712771Sqtt2@cornell.edu#endif
358