process.cc revision 11704:c38fcdaa5fe5
1/*
2 * Copyright (c) 2014 Advanced Micro Devices, Inc.
3 * Copyright (c) 2007 The Hewlett-Packard Development Company
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder.  You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2003-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Gabe Black
42 *          Ali Saidi
43 */
44
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/regs/segment.hh"
47#include "arch/x86/isa_traits.hh"
48#include "arch/x86/process.hh"
49#include "arch/x86/system.hh"
50#include "arch/x86/types.hh"
51#include "base/loader/elf_object.hh"
52#include "base/loader/object_file.hh"
53#include "base/misc.hh"
54#include "base/trace.hh"
55#include "cpu/thread_context.hh"
56#include "debug/Stack.hh"
57#include "mem/multi_level_page_table.hh"
58#include "mem/page_table.hh"
59#include "sim/process_impl.hh"
60#include "sim/syscall_emul.hh"
61#include "sim/system.hh"
62
63using namespace std;
64using namespace X86ISA;
65
66static const int ArgumentReg[] = {
67    INTREG_RDI,
68    INTREG_RSI,
69    INTREG_RDX,
70    //This argument register is r10 for syscalls and rcx for C.
71    INTREG_R10W,
72    //INTREG_RCX,
73    INTREG_R8W,
74    INTREG_R9W
75};
76
77static const int NumArgumentRegs M5_VAR_USED =
78    sizeof(ArgumentReg) / sizeof(const int);
79
80static const int ArgumentReg32[] = {
81    INTREG_EBX,
82    INTREG_ECX,
83    INTREG_EDX,
84    INTREG_ESI,
85    INTREG_EDI,
86    INTREG_EBP
87};
88
89static const int NumArgumentRegs32 M5_VAR_USED =
90    sizeof(ArgumentReg) / sizeof(const int);
91
92X86LiveProcess::X86LiveProcess(LiveProcessParams * params, ObjectFile *objFile,
93        SyscallDesc *_syscallDescs, int _numSyscallDescs) :
94    LiveProcess(params, objFile), syscallDescs(_syscallDescs),
95    numSyscallDescs(_numSyscallDescs)
96{
97    brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize();
98    brk_point = roundUp(brk_point, PageBytes);
99}
100
101X86_64LiveProcess::X86_64LiveProcess(LiveProcessParams *params,
102        ObjectFile *objFile, SyscallDesc *_syscallDescs,
103        int _numSyscallDescs) :
104    X86LiveProcess(params, objFile, _syscallDescs, _numSyscallDescs)
105{
106
107    vsyscallPage.base = 0xffffffffff600000ULL;
108    vsyscallPage.size = PageBytes;
109    vsyscallPage.vtimeOffset = 0x400;
110    vsyscallPage.vgettimeofdayOffset = 0x0;
111
112    // Set up stack. On X86_64 Linux, stack goes from the top of memory
113    // downward, less the hole for the kernel address space plus one page
114    // for undertermined purposes.
115    stack_base = (Addr)0x7FFFFFFFF000ULL;
116
117    // Set pointer for next thread stack.  Reserve 8M for main stack.
118    next_thread_stack_base = stack_base - (8 * 1024 * 1024);
119
120    // "mmap_base" is a function which defines where mmap region starts in
121    // the process address space.
122    // mmap_base: PAGE_ALIGN(TASK_SIZE-MIN_GAP-mmap_rnd())
123    // TASK_SIZE: (1<<47)-PAGE_SIZE
124    // MIN_GAP: 128*1024*1024+stack_maxrandom_size()
125    // We do not use any address space layout randomization in gem5
126    // therefore the random fields become zero; the smallest gap space was
127    // chosen but gap could potentially be much larger.
128    mmap_end = (Addr)0x7FFFF7FFF000ULL;
129}
130
131void
132I386LiveProcess::syscall(int64_t callnum, ThreadContext *tc)
133{
134    TheISA::PCState pc = tc->pcState();
135    Addr eip = pc.pc();
136    if (eip >= vsyscallPage.base &&
137            eip < vsyscallPage.base + vsyscallPage.size) {
138        pc.npc(vsyscallPage.base + vsyscallPage.vsysexitOffset);
139        tc->pcState(pc);
140    }
141    X86LiveProcess::syscall(callnum, tc);
142}
143
144
145I386LiveProcess::I386LiveProcess(LiveProcessParams *params,
146        ObjectFile *objFile, SyscallDesc *_syscallDescs,
147        int _numSyscallDescs) :
148    X86LiveProcess(params, objFile, _syscallDescs, _numSyscallDescs)
149{
150    _gdtStart = ULL(0xffffd000);
151    _gdtSize = PageBytes;
152
153    vsyscallPage.base = 0xffffe000ULL;
154    vsyscallPage.size = PageBytes;
155    vsyscallPage.vsyscallOffset = 0x400;
156    vsyscallPage.vsysexitOffset = 0x410;
157
158    stack_base = _gdtStart;
159
160    // Set pointer for next thread stack.  Reserve 8M for main stack.
161    next_thread_stack_base = stack_base - (8 * 1024 * 1024);
162
163    // "mmap_base" is a function which defines where mmap region starts in
164    // the process address space.
165    // mmap_base: PAGE_ALIGN(TASK_SIZE-MIN_GAP-mmap_rnd())
166    // TASK_SIZE: 0xC0000000
167    // MIN_GAP: 128*1024*1024+stack_maxrandom_size()
168    // We do not use any address space layout randomization in gem5
169    // therefore the random fields become zero; the smallest gap space was
170    // chosen but gap could potentially be much larger.
171    mmap_end = (Addr)0xB7FFF000ULL;
172}
173
174SyscallDesc*
175X86LiveProcess::getDesc(int callnum)
176{
177    if (callnum < 0 || callnum >= numSyscallDescs)
178        return NULL;
179    return &syscallDescs[callnum];
180}
181
182void
183X86_64LiveProcess::initState()
184{
185    X86LiveProcess::initState();
186
187    argsInit(sizeof(uint64_t), PageBytes);
188
189       // Set up the vsyscall page for this process.
190    allocateMem(vsyscallPage.base, vsyscallPage.size);
191    uint8_t vtimeBlob[] = {
192        0x48,0xc7,0xc0,0xc9,0x00,0x00,0x00,    // mov    $0xc9,%rax
193        0x0f,0x05,                             // syscall
194        0xc3                                   // retq
195    };
196    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vtimeOffset,
197            vtimeBlob, sizeof(vtimeBlob));
198
199    uint8_t vgettimeofdayBlob[] = {
200        0x48,0xc7,0xc0,0x60,0x00,0x00,0x00,    // mov    $0x60,%rax
201        0x0f,0x05,                             // syscall
202        0xc3                                   // retq
203    };
204    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vgettimeofdayOffset,
205            vgettimeofdayBlob, sizeof(vgettimeofdayBlob));
206
207    if (kvmInSE) {
208        PortProxy physProxy = system->physProxy;
209
210        /*
211         * Set up the gdt.
212         */
213        uint8_t numGDTEntries = 0;
214        uint64_t nullDescriptor = 0;
215        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
216                            (uint8_t *)(&nullDescriptor), 8);
217        numGDTEntries++;
218
219        SegDescriptor initDesc = 0;
220        initDesc.type.codeOrData = 0; // code or data type
221        initDesc.type.c = 0;          // conforming
222        initDesc.type.r = 1;          // readable
223        initDesc.dpl = 0;             // privilege
224        initDesc.p = 1;               // present
225        initDesc.l = 1;               // longmode - 64 bit
226        initDesc.d = 0;               // operand size
227        initDesc.g = 1;               // granularity
228        initDesc.s = 1;               // system segment
229        initDesc.limitHigh = 0xFFFF;
230        initDesc.limitLow = 0xF;
231        initDesc.baseHigh = 0x0;
232        initDesc.baseLow = 0x0;
233
234        //64 bit code segment
235        SegDescriptor csLowPLDesc = initDesc;
236        csLowPLDesc.type.codeOrData = 1;
237        csLowPLDesc.dpl = 0;
238        uint64_t csLowPLDescVal = csLowPLDesc;
239        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
240                            (uint8_t *)(&csLowPLDescVal), 8);
241
242        numGDTEntries++;
243
244        SegSelector csLowPL = 0;
245        csLowPL.si = numGDTEntries - 1;
246        csLowPL.rpl = 0;
247
248        //64 bit data segment
249        SegDescriptor dsLowPLDesc = initDesc;
250        dsLowPLDesc.type.codeOrData = 0;
251        dsLowPLDesc.dpl = 0;
252        uint64_t dsLowPLDescVal = dsLowPLDesc;
253        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
254                            (uint8_t *)(&dsLowPLDescVal), 8);
255
256        numGDTEntries++;
257
258        SegSelector dsLowPL = 0;
259        dsLowPL.si = numGDTEntries - 1;
260        dsLowPL.rpl = 0;
261
262        //64 bit data segment
263        SegDescriptor dsDesc = initDesc;
264        dsDesc.type.codeOrData = 0;
265        dsDesc.dpl = 3;
266        uint64_t dsDescVal = dsDesc;
267        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
268                            (uint8_t *)(&dsDescVal), 8);
269
270        numGDTEntries++;
271
272        SegSelector ds = 0;
273        ds.si = numGDTEntries - 1;
274        ds.rpl = 3;
275
276        //64 bit code segment
277        SegDescriptor csDesc = initDesc;
278        csDesc.type.codeOrData = 1;
279        csDesc.dpl = 3;
280        uint64_t csDescVal = csDesc;
281        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
282                            (uint8_t *)(&csDescVal), 8);
283
284        numGDTEntries++;
285
286        SegSelector cs = 0;
287        cs.si = numGDTEntries - 1;
288        cs.rpl = 3;
289
290        SegSelector scall = 0;
291        scall.si = csLowPL.si;
292        scall.rpl = 0;
293
294        SegSelector sret = 0;
295        sret.si = dsLowPL.si;
296        sret.rpl = 3;
297
298        /* In long mode the TSS has been extended to 16 Bytes */
299        TSSlow TSSDescLow = 0;
300        TSSDescLow.type = 0xB;
301        TSSDescLow.dpl = 0; // Privelege level 0
302        TSSDescLow.p = 1; // Present
303        TSSDescLow.g = 1; // Page granularity
304        TSSDescLow.limitHigh = 0xF;
305        TSSDescLow.limitLow = 0xFFFF;
306        TSSDescLow.baseLow = bits(TSSVirtAddr, 23, 0);
307        TSSDescLow.baseHigh = bits(TSSVirtAddr, 31, 24);
308
309        TSShigh TSSDescHigh = 0;
310        TSSDescHigh.base = bits(TSSVirtAddr, 63, 32);
311
312        struct TSSDesc {
313            uint64_t low;
314            uint64_t high;
315        } tssDescVal = {TSSDescLow, TSSDescHigh};
316
317        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
318                            (uint8_t *)(&tssDescVal), sizeof(tssDescVal));
319
320        numGDTEntries++;
321
322        SegSelector tssSel = 0;
323        tssSel.si = numGDTEntries - 1;
324
325        uint64_t tss_base_addr = (TSSDescHigh.base << 32) |
326                                 (TSSDescLow.baseHigh << 24) |
327                                  TSSDescLow.baseLow;
328        uint64_t tss_limit = TSSDescLow.limitLow | (TSSDescLow.limitHigh << 16);
329
330        SegAttr tss_attr = 0;
331
332        tss_attr.type = TSSDescLow.type;
333        tss_attr.dpl = TSSDescLow.dpl;
334        tss_attr.present = TSSDescLow.p;
335        tss_attr.granularity = TSSDescLow.g;
336        tss_attr.unusable = 0;
337
338        for (int i = 0; i < contextIds.size(); i++) {
339            ThreadContext * tc = system->getThreadContext(contextIds[i]);
340
341            tc->setMiscReg(MISCREG_CS, cs);
342            tc->setMiscReg(MISCREG_DS, ds);
343            tc->setMiscReg(MISCREG_ES, ds);
344            tc->setMiscReg(MISCREG_FS, ds);
345            tc->setMiscReg(MISCREG_GS, ds);
346            tc->setMiscReg(MISCREG_SS, ds);
347
348            // LDT
349            tc->setMiscReg(MISCREG_TSL, 0);
350            SegAttr tslAttr = 0;
351            tslAttr.present = 1;
352            tslAttr.type = 2;
353            tc->setMiscReg(MISCREG_TSL_ATTR, tslAttr);
354
355            tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr);
356            tc->setMiscReg(MISCREG_TSG_LIMIT, 8 * numGDTEntries - 1);
357
358            tc->setMiscReg(MISCREG_TR, tssSel);
359            tc->setMiscReg(MISCREG_TR_BASE, tss_base_addr);
360            tc->setMiscReg(MISCREG_TR_EFF_BASE, 0);
361            tc->setMiscReg(MISCREG_TR_LIMIT, tss_limit);
362            tc->setMiscReg(MISCREG_TR_ATTR, tss_attr);
363
364            //Start using longmode segments.
365            installSegDesc(tc, SEGMENT_REG_CS, csDesc, true);
366            installSegDesc(tc, SEGMENT_REG_DS, dsDesc, true);
367            installSegDesc(tc, SEGMENT_REG_ES, dsDesc, true);
368            installSegDesc(tc, SEGMENT_REG_FS, dsDesc, true);
369            installSegDesc(tc, SEGMENT_REG_GS, dsDesc, true);
370            installSegDesc(tc, SEGMENT_REG_SS, dsDesc, true);
371
372            Efer efer = 0;
373            efer.sce = 1; // Enable system call extensions.
374            efer.lme = 1; // Enable long mode.
375            efer.lma = 1; // Activate long mode.
376            efer.nxe = 0; // Enable nx support.
377            efer.svme = 1; // Enable svm support for now.
378            efer.ffxsr = 0; // Turn on fast fxsave and fxrstor.
379            tc->setMiscReg(MISCREG_EFER, efer);
380
381            //Set up the registers that describe the operating mode.
382            CR0 cr0 = 0;
383            cr0.pg = 1; // Turn on paging.
384            cr0.cd = 0; // Don't disable caching.
385            cr0.nw = 0; // This is bit is defined to be ignored.
386            cr0.am = 1; // No alignment checking
387            cr0.wp = 1; // Supervisor mode can write read only pages
388            cr0.ne = 1;
389            cr0.et = 1; // This should always be 1
390            cr0.ts = 0; // We don't do task switching, so causing fp exceptions
391                        // would be pointless.
392            cr0.em = 0; // Allow x87 instructions to execute natively.
393            cr0.mp = 1; // This doesn't really matter, but the manual suggests
394                        // setting it to one.
395            cr0.pe = 1; // We're definitely in protected mode.
396            tc->setMiscReg(MISCREG_CR0, cr0);
397
398            CR0 cr2 = 0;
399            tc->setMiscReg(MISCREG_CR2, cr2);
400
401            CR3 cr3 = pageTablePhysAddr;
402            tc->setMiscReg(MISCREG_CR3, cr3);
403
404            CR4 cr4 = 0;
405            //Turn on pae.
406            cr4.osxsave = 1; // Enable XSAVE and Proc Extended States
407            cr4.osxmmexcpt = 1; // Operating System Unmasked Exception
408            cr4.osfxsr = 1; // Operating System FXSave/FSRSTOR Support
409            cr4.pce = 0; // Performance-Monitoring Counter Enable
410            cr4.pge = 0; // Page-Global Enable
411            cr4.mce = 0; // Machine Check Enable
412            cr4.pae = 1; // Physical-Address Extension
413            cr4.pse = 0; // Page Size Extensions
414            cr4.de = 0; // Debugging Extensions
415            cr4.tsd = 0; // Time Stamp Disable
416            cr4.pvi = 0; // Protected-Mode Virtual Interrupts
417            cr4.vme = 0; // Virtual-8086 Mode Extensions
418
419            tc->setMiscReg(MISCREG_CR4, cr4);
420
421            CR4 cr8 = 0;
422            tc->setMiscReg(MISCREG_CR8, cr8);
423
424            const Addr PageMapLevel4 = pageTablePhysAddr;
425            //Point to the page tables.
426            tc->setMiscReg(MISCREG_CR3, PageMapLevel4);
427
428            tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
429
430            tc->setMiscReg(MISCREG_APIC_BASE, 0xfee00900);
431
432            tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr);
433            tc->setMiscReg(MISCREG_TSG_LIMIT, 0xffff);
434
435            tc->setMiscReg(MISCREG_IDTR_BASE, IDTVirtAddr);
436            tc->setMiscReg(MISCREG_IDTR_LIMIT, 0xffff);
437
438            /* enabling syscall and sysret */
439            MiscReg star = ((MiscReg)sret << 48) | ((MiscReg)scall << 32);
440            tc->setMiscReg(MISCREG_STAR, star);
441            MiscReg lstar = (MiscReg)syscallCodeVirtAddr;
442            tc->setMiscReg(MISCREG_LSTAR, lstar);
443            MiscReg sfmask = (1 << 8) | (1 << 10); // TF | DF
444            tc->setMiscReg(MISCREG_SF_MASK, sfmask);
445        }
446
447        /* Set up the content of the TSS and write it to physical memory. */
448
449        struct {
450            uint32_t reserved0;        // +00h
451            uint32_t RSP0_low;         // +04h
452            uint32_t RSP0_high;        // +08h
453            uint32_t RSP1_low;         // +0Ch
454            uint32_t RSP1_high;        // +10h
455            uint32_t RSP2_low;         // +14h
456            uint32_t RSP2_high;        // +18h
457            uint32_t reserved1;        // +1Ch
458            uint32_t reserved2;        // +20h
459            uint32_t IST1_low;         // +24h
460            uint32_t IST1_high;        // +28h
461            uint32_t IST2_low;         // +2Ch
462            uint32_t IST2_high;        // +30h
463            uint32_t IST3_low;         // +34h
464            uint32_t IST3_high;        // +38h
465            uint32_t IST4_low;         // +3Ch
466            uint32_t IST4_high;        // +40h
467            uint32_t IST5_low;         // +44h
468            uint32_t IST5_high;        // +48h
469            uint32_t IST6_low;         // +4Ch
470            uint32_t IST6_high;        // +50h
471            uint32_t IST7_low;         // +54h
472            uint32_t IST7_high;        // +58h
473            uint32_t reserved3;        // +5Ch
474            uint32_t reserved4;        // +60h
475            uint16_t reserved5;        // +64h
476            uint16_t IO_MapBase;       // +66h
477        } tss;
478
479        /** setting Interrupt Stack Table */
480        uint64_t IST_start = ISTVirtAddr + PageBytes;
481        tss.IST1_low  = IST_start;
482        tss.IST1_high = IST_start >> 32;
483        tss.RSP0_low  = tss.IST1_low;
484        tss.RSP0_high = tss.IST1_high;
485        tss.RSP1_low  = tss.IST1_low;
486        tss.RSP1_high = tss.IST1_high;
487        tss.RSP2_low  = tss.IST1_low;
488        tss.RSP2_high = tss.IST1_high;
489        physProxy.writeBlob(TSSPhysAddr, (uint8_t *)(&tss), sizeof(tss));
490
491        /* Setting IDT gates */
492        GateDescriptorLow PFGateLow = 0;
493        PFGateLow.offsetHigh = bits(PFHandlerVirtAddr, 31, 16);
494        PFGateLow.offsetLow = bits(PFHandlerVirtAddr, 15, 0);
495        PFGateLow.selector = csLowPL;
496        PFGateLow.p = 1;
497        PFGateLow.dpl = 0;
498        PFGateLow.type = 0xe;      // gate interrupt type
499        PFGateLow.IST = 0;         // setting IST to 0 and using RSP0
500
501        GateDescriptorHigh PFGateHigh = 0;
502        PFGateHigh.offset = bits(PFHandlerVirtAddr, 63, 32);
503
504        struct {
505            uint64_t low;
506            uint64_t high;
507        } PFGate = {PFGateLow, PFGateHigh};
508
509        physProxy.writeBlob(IDTPhysAddr + 0xE0,
510                            (uint8_t *)(&PFGate), sizeof(PFGate));
511
512        /* System call handler */
513        uint8_t syscallBlob[] = {
514            // mov    %rax, (0xffffc90000005600)
515            0x48, 0xa3, 0x00, 0x60, 0x00,
516            0x00, 0x00, 0xc9, 0xff, 0xff,
517            // sysret
518            0x48, 0x0f, 0x07
519        };
520
521        physProxy.writeBlob(syscallCodePhysAddr,
522                            syscallBlob, sizeof(syscallBlob));
523
524        /** Page fault handler */
525        uint8_t faultBlob[] = {
526            // mov    %rax, (0xffffc90000005700)
527            0x48, 0xa3, 0x00, 0x61, 0x00,
528            0x00, 0x00, 0xc9, 0xff, 0xff,
529            // add    $0x8, %rsp # skip error
530            0x48, 0x83, 0xc4, 0x08,
531            // iretq
532            0x48, 0xcf
533        };
534
535        physProxy.writeBlob(PFHandlerPhysAddr, faultBlob, sizeof(faultBlob));
536
537        MultiLevelPageTable<PageTableOps> *pt =
538            dynamic_cast<MultiLevelPageTable<PageTableOps> *>(pTable);
539
540        /* Syscall handler */
541        pt->map(syscallCodeVirtAddr, syscallCodePhysAddr, PageBytes, false);
542        /* GDT */
543        pt->map(GDTVirtAddr, GDTPhysAddr, PageBytes, false);
544        /* IDT */
545        pt->map(IDTVirtAddr, IDTPhysAddr, PageBytes, false);
546        /* TSS */
547        pt->map(TSSVirtAddr, TSSPhysAddr, PageBytes, false);
548        /* IST */
549        pt->map(ISTVirtAddr, ISTPhysAddr, PageBytes, false);
550        /* PF handler */
551        pt->map(PFHandlerVirtAddr, PFHandlerPhysAddr, PageBytes, false);
552        /* MMIO region for m5ops */
553        pt->map(MMIORegionVirtAddr, MMIORegionPhysAddr, 16*PageBytes, false);
554    } else {
555        for (int i = 0; i < contextIds.size(); i++) {
556            ThreadContext * tc = system->getThreadContext(contextIds[i]);
557
558            SegAttr dataAttr = 0;
559            dataAttr.dpl = 3;
560            dataAttr.unusable = 0;
561            dataAttr.defaultSize = 1;
562            dataAttr.longMode = 1;
563            dataAttr.avl = 0;
564            dataAttr.granularity = 1;
565            dataAttr.present = 1;
566            dataAttr.type = 3;
567            dataAttr.writable = 1;
568            dataAttr.readable = 1;
569            dataAttr.expandDown = 0;
570            dataAttr.system = 1;
571
572            //Initialize the segment registers.
573            for (int seg = 0; seg < NUM_SEGMENTREGS; seg++) {
574                tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0);
575                tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0);
576                tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr);
577            }
578
579            SegAttr csAttr = 0;
580            csAttr.dpl = 3;
581            csAttr.unusable = 0;
582            csAttr.defaultSize = 0;
583            csAttr.longMode = 1;
584            csAttr.avl = 0;
585            csAttr.granularity = 1;
586            csAttr.present = 1;
587            csAttr.type = 10;
588            csAttr.writable = 0;
589            csAttr.readable = 1;
590            csAttr.expandDown = 0;
591            csAttr.system = 1;
592
593            tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr);
594
595            Efer efer = 0;
596            efer.sce = 1; // Enable system call extensions.
597            efer.lme = 1; // Enable long mode.
598            efer.lma = 1; // Activate long mode.
599            efer.nxe = 1; // Enable nx support.
600            efer.svme = 0; // Disable svm support for now. It isn't implemented.
601            efer.ffxsr = 1; // Turn on fast fxsave and fxrstor.
602            tc->setMiscReg(MISCREG_EFER, efer);
603
604            //Set up the registers that describe the operating mode.
605            CR0 cr0 = 0;
606            cr0.pg = 1; // Turn on paging.
607            cr0.cd = 0; // Don't disable caching.
608            cr0.nw = 0; // This is bit is defined to be ignored.
609            cr0.am = 0; // No alignment checking
610            cr0.wp = 0; // Supervisor mode can write read only pages
611            cr0.ne = 1;
612            cr0.et = 1; // This should always be 1
613            cr0.ts = 0; // We don't do task switching, so causing fp exceptions
614                        // would be pointless.
615            cr0.em = 0; // Allow x87 instructions to execute natively.
616            cr0.mp = 1; // This doesn't really matter, but the manual suggests
617                        // setting it to one.
618            cr0.pe = 1; // We're definitely in protected mode.
619            tc->setMiscReg(MISCREG_CR0, cr0);
620
621            tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
622        }
623    }
624}
625
626void
627I386LiveProcess::initState()
628{
629    X86LiveProcess::initState();
630
631    argsInit(sizeof(uint32_t), PageBytes);
632
633    /*
634     * Set up a GDT for this process. The whole GDT wouldn't really be for
635     * this process, but the only parts we care about are.
636     */
637    allocateMem(_gdtStart, _gdtSize);
638    uint64_t zero = 0;
639    assert(_gdtSize % sizeof(zero) == 0);
640    for (Addr gdtCurrent = _gdtStart;
641            gdtCurrent < _gdtStart + _gdtSize; gdtCurrent += sizeof(zero)) {
642        initVirtMem.write(gdtCurrent, zero);
643    }
644
645    // Set up the vsyscall page for this process.
646    allocateMem(vsyscallPage.base, vsyscallPage.size);
647    uint8_t vsyscallBlob[] = {
648        0x51,       // push %ecx
649        0x52,       // push %edp
650        0x55,       // push %ebp
651        0x89, 0xe5, // mov %esp, %ebp
652        0x0f, 0x34  // sysenter
653    };
654    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsyscallOffset,
655            vsyscallBlob, sizeof(vsyscallBlob));
656
657    uint8_t vsysexitBlob[] = {
658        0x5d,       // pop %ebp
659        0x5a,       // pop %edx
660        0x59,       // pop %ecx
661        0xc3        // ret
662    };
663    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsysexitOffset,
664            vsysexitBlob, sizeof(vsysexitBlob));
665
666    for (int i = 0; i < contextIds.size(); i++) {
667        ThreadContext * tc = system->getThreadContext(contextIds[i]);
668
669        SegAttr dataAttr = 0;
670        dataAttr.dpl = 3;
671        dataAttr.unusable = 0;
672        dataAttr.defaultSize = 1;
673        dataAttr.longMode = 0;
674        dataAttr.avl = 0;
675        dataAttr.granularity = 1;
676        dataAttr.present = 1;
677        dataAttr.type = 3;
678        dataAttr.writable = 1;
679        dataAttr.readable = 1;
680        dataAttr.expandDown = 0;
681        dataAttr.system = 1;
682
683        //Initialize the segment registers.
684        for (int seg = 0; seg < NUM_SEGMENTREGS; seg++) {
685            tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0);
686            tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0);
687            tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr);
688            tc->setMiscRegNoEffect(MISCREG_SEG_SEL(seg), 0xB);
689            tc->setMiscRegNoEffect(MISCREG_SEG_LIMIT(seg), (uint32_t)(-1));
690        }
691
692        SegAttr csAttr = 0;
693        csAttr.dpl = 3;
694        csAttr.unusable = 0;
695        csAttr.defaultSize = 1;
696        csAttr.longMode = 0;
697        csAttr.avl = 0;
698        csAttr.granularity = 1;
699        csAttr.present = 1;
700        csAttr.type = 0xa;
701        csAttr.writable = 0;
702        csAttr.readable = 1;
703        csAttr.expandDown = 0;
704        csAttr.system = 1;
705
706        tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr);
707
708        tc->setMiscRegNoEffect(MISCREG_TSG_BASE, _gdtStart);
709        tc->setMiscRegNoEffect(MISCREG_TSG_EFF_BASE, _gdtStart);
710        tc->setMiscRegNoEffect(MISCREG_TSG_LIMIT, _gdtStart + _gdtSize - 1);
711
712        // Set the LDT selector to 0 to deactivate it.
713        tc->setMiscRegNoEffect(MISCREG_TSL, 0);
714
715        Efer efer = 0;
716        efer.sce = 1; // Enable system call extensions.
717        efer.lme = 1; // Enable long mode.
718        efer.lma = 0; // Deactivate long mode.
719        efer.nxe = 1; // Enable nx support.
720        efer.svme = 0; // Disable svm support for now. It isn't implemented.
721        efer.ffxsr = 1; // Turn on fast fxsave and fxrstor.
722        tc->setMiscReg(MISCREG_EFER, efer);
723
724        //Set up the registers that describe the operating mode.
725        CR0 cr0 = 0;
726        cr0.pg = 1; // Turn on paging.
727        cr0.cd = 0; // Don't disable caching.
728        cr0.nw = 0; // This is bit is defined to be ignored.
729        cr0.am = 0; // No alignment checking
730        cr0.wp = 0; // Supervisor mode can write read only pages
731        cr0.ne = 1;
732        cr0.et = 1; // This should always be 1
733        cr0.ts = 0; // We don't do task switching, so causing fp exceptions
734                    // would be pointless.
735        cr0.em = 0; // Allow x87 instructions to execute natively.
736        cr0.mp = 1; // This doesn't really matter, but the manual suggests
737                    // setting it to one.
738        cr0.pe = 1; // We're definitely in protected mode.
739        tc->setMiscReg(MISCREG_CR0, cr0);
740
741        tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
742    }
743}
744
745template<class IntType>
746void
747X86LiveProcess::argsInit(int pageSize,
748        std::vector<AuxVector<IntType> > extraAuxvs)
749{
750    int intSize = sizeof(IntType);
751
752    typedef AuxVector<IntType> auxv_t;
753    std::vector<auxv_t> auxv = extraAuxvs;
754
755    string filename;
756    if (argv.size() < 1)
757        filename = "";
758    else
759        filename = argv[0];
760
761    //We want 16 byte alignment
762    uint64_t align = 16;
763
764    // Patch the ld_bias for dynamic executables.
765    updateBias();
766
767    // load object file into target memory
768    objFile->loadSections(initVirtMem);
769
770    enum X86CpuFeature {
771        X86_OnboardFPU = 1 << 0,
772        X86_VirtualModeExtensions = 1 << 1,
773        X86_DebuggingExtensions = 1 << 2,
774        X86_PageSizeExtensions = 1 << 3,
775
776        X86_TimeStampCounter = 1 << 4,
777        X86_ModelSpecificRegisters = 1 << 5,
778        X86_PhysicalAddressExtensions = 1 << 6,
779        X86_MachineCheckExtensions = 1 << 7,
780
781        X86_CMPXCHG8Instruction = 1 << 8,
782        X86_OnboardAPIC = 1 << 9,
783        X86_SYSENTER_SYSEXIT = 1 << 11,
784
785        X86_MemoryTypeRangeRegisters = 1 << 12,
786        X86_PageGlobalEnable = 1 << 13,
787        X86_MachineCheckArchitecture = 1 << 14,
788        X86_CMOVInstruction = 1 << 15,
789
790        X86_PageAttributeTable = 1 << 16,
791        X86_36BitPSEs = 1 << 17,
792        X86_ProcessorSerialNumber = 1 << 18,
793        X86_CLFLUSHInstruction = 1 << 19,
794
795        X86_DebugTraceStore = 1 << 21,
796        X86_ACPIViaMSR = 1 << 22,
797        X86_MultimediaExtensions = 1 << 23,
798
799        X86_FXSAVE_FXRSTOR = 1 << 24,
800        X86_StreamingSIMDExtensions = 1 << 25,
801        X86_StreamingSIMDExtensions2 = 1 << 26,
802        X86_CPUSelfSnoop = 1 << 27,
803
804        X86_HyperThreading = 1 << 28,
805        X86_AutomaticClockControl = 1 << 29,
806        X86_IA64Processor = 1 << 30
807    };
808
809    // Setup the auxiliary vectors. These will already have endian
810    // conversion. Auxiliary vectors are loaded only for elf formatted
811    // executables; the auxv is responsible for passing information from
812    // the OS to the interpreter.
813    ElfObject * elfObject = dynamic_cast<ElfObject *>(objFile);
814    if (elfObject) {
815        uint64_t features =
816            X86_OnboardFPU |
817            X86_VirtualModeExtensions |
818            X86_DebuggingExtensions |
819            X86_PageSizeExtensions |
820            X86_TimeStampCounter |
821            X86_ModelSpecificRegisters |
822            X86_PhysicalAddressExtensions |
823            X86_MachineCheckExtensions |
824            X86_CMPXCHG8Instruction |
825            X86_OnboardAPIC |
826            X86_SYSENTER_SYSEXIT |
827            X86_MemoryTypeRangeRegisters |
828            X86_PageGlobalEnable |
829            X86_MachineCheckArchitecture |
830            X86_CMOVInstruction |
831            X86_PageAttributeTable |
832            X86_36BitPSEs |
833//            X86_ProcessorSerialNumber |
834            X86_CLFLUSHInstruction |
835//            X86_DebugTraceStore |
836//            X86_ACPIViaMSR |
837            X86_MultimediaExtensions |
838            X86_FXSAVE_FXRSTOR |
839            X86_StreamingSIMDExtensions |
840            X86_StreamingSIMDExtensions2 |
841//            X86_CPUSelfSnoop |
842//            X86_HyperThreading |
843//            X86_AutomaticClockControl |
844//            X86_IA64Processor |
845            0;
846
847        //Bits which describe the system hardware capabilities
848        //XXX Figure out what these should be
849        auxv.push_back(auxv_t(M5_AT_HWCAP, features));
850        //The system page size
851        auxv.push_back(auxv_t(M5_AT_PAGESZ, X86ISA::PageBytes));
852        //Frequency at which times() increments
853        //Defined to be 100 in the kernel source.
854        auxv.push_back(auxv_t(M5_AT_CLKTCK, 100));
855        // This is the virtual address of the program header tables if they
856        // appear in the executable image.
857        auxv.push_back(auxv_t(M5_AT_PHDR, elfObject->programHeaderTable()));
858        // This is the size of a program header entry from the elf file.
859        auxv.push_back(auxv_t(M5_AT_PHENT, elfObject->programHeaderSize()));
860        // This is the number of program headers from the original elf file.
861        auxv.push_back(auxv_t(M5_AT_PHNUM, elfObject->programHeaderCount()));
862        // This is the base address of the ELF interpreter; it should be
863        // zero for static executables or contain the base address for
864        // dynamic executables.
865        auxv.push_back(auxv_t(M5_AT_BASE, getBias()));
866        //XXX Figure out what this should be.
867        auxv.push_back(auxv_t(M5_AT_FLAGS, 0));
868        //The entry point to the program
869        auxv.push_back(auxv_t(M5_AT_ENTRY, objFile->entryPoint()));
870        //Different user and group IDs
871        auxv.push_back(auxv_t(M5_AT_UID, uid()));
872        auxv.push_back(auxv_t(M5_AT_EUID, euid()));
873        auxv.push_back(auxv_t(M5_AT_GID, gid()));
874        auxv.push_back(auxv_t(M5_AT_EGID, egid()));
875        //Whether to enable "secure mode" in the executable
876        auxv.push_back(auxv_t(M5_AT_SECURE, 0));
877        //The address of 16 "random" bytes.
878        auxv.push_back(auxv_t(M5_AT_RANDOM, 0));
879        //The name of the program
880        auxv.push_back(auxv_t(M5_AT_EXECFN, 0));
881        //The platform string
882        auxv.push_back(auxv_t(M5_AT_PLATFORM, 0));
883    }
884
885    //Figure out how big the initial stack needs to be
886
887    // A sentry NULL void pointer at the top of the stack.
888    int sentry_size = intSize;
889
890    //This is the name of the file which is present on the initial stack
891    //It's purpose is to let the user space linker examine the original file.
892    int file_name_size = filename.size() + 1;
893
894    const int numRandomBytes = 16;
895    int aux_data_size = numRandomBytes;
896
897    string platform = "x86_64";
898    aux_data_size += platform.size() + 1;
899
900    int env_data_size = 0;
901    for (int i = 0; i < envp.size(); ++i)
902        env_data_size += envp[i].size() + 1;
903    int arg_data_size = 0;
904    for (int i = 0; i < argv.size(); ++i)
905        arg_data_size += argv[i].size() + 1;
906
907    //The info_block needs to be padded so it's size is a multiple of the
908    //alignment mask. Also, it appears that there needs to be at least some
909    //padding, so if the size is already a multiple, we need to increase it
910    //anyway.
911    int base_info_block_size =
912        sentry_size + file_name_size + env_data_size + arg_data_size;
913
914    int info_block_size = roundUp(base_info_block_size, align);
915
916    int info_block_padding = info_block_size - base_info_block_size;
917
918    //Each auxilliary vector is two 8 byte words
919    int aux_array_size = intSize * 2 * (auxv.size() + 1);
920
921    int envp_array_size = intSize * (envp.size() + 1);
922    int argv_array_size = intSize * (argv.size() + 1);
923
924    int argc_size = intSize;
925
926    //Figure out the size of the contents of the actual initial frame
927    int frame_size =
928        aux_array_size +
929        envp_array_size +
930        argv_array_size +
931        argc_size;
932
933    //There needs to be padding after the auxiliary vector data so that the
934    //very bottom of the stack is aligned properly.
935    int partial_size = frame_size + aux_data_size;
936    int aligned_partial_size = roundUp(partial_size, align);
937    int aux_padding = aligned_partial_size - partial_size;
938
939    int space_needed =
940        info_block_size +
941        aux_data_size +
942        aux_padding +
943        frame_size;
944
945    stack_min = stack_base - space_needed;
946    stack_min = roundDown(stack_min, align);
947    stack_size = roundUp(stack_base - stack_min, pageSize);
948
949    // map memory
950    Addr stack_end = roundDown(stack_base - stack_size, pageSize);
951
952    DPRINTF(Stack, "Mapping the stack: 0x%x %dB\n", stack_end, stack_size);
953    allocateMem(stack_end, stack_size);
954
955    // map out initial stack contents
956    IntType sentry_base = stack_base - sentry_size;
957    IntType file_name_base = sentry_base - file_name_size;
958    IntType env_data_base = file_name_base - env_data_size;
959    IntType arg_data_base = env_data_base - arg_data_size;
960    IntType aux_data_base = arg_data_base - info_block_padding - aux_data_size;
961    IntType auxv_array_base = aux_data_base - aux_array_size - aux_padding;
962    IntType envp_array_base = auxv_array_base - envp_array_size;
963    IntType argv_array_base = envp_array_base - argv_array_size;
964    IntType argc_base = argv_array_base - argc_size;
965
966    DPRINTF(Stack, "The addresses of items on the initial stack:\n");
967    DPRINTF(Stack, "0x%x - file name\n", file_name_base);
968    DPRINTF(Stack, "0x%x - env data\n", env_data_base);
969    DPRINTF(Stack, "0x%x - arg data\n", arg_data_base);
970    DPRINTF(Stack, "0x%x - aux data\n", aux_data_base);
971    DPRINTF(Stack, "0x%x - auxv array\n", auxv_array_base);
972    DPRINTF(Stack, "0x%x - envp array\n", envp_array_base);
973    DPRINTF(Stack, "0x%x - argv array\n", argv_array_base);
974    DPRINTF(Stack, "0x%x - argc \n", argc_base);
975    DPRINTF(Stack, "0x%x - stack min\n", stack_min);
976
977    // write contents to stack
978
979    // figure out argc
980    IntType argc = argv.size();
981    IntType guestArgc = X86ISA::htog(argc);
982
983    //Write out the sentry void *
984    IntType sentry_NULL = 0;
985    initVirtMem.writeBlob(sentry_base,
986            (uint8_t*)&sentry_NULL, sentry_size);
987
988    //Write the file name
989    initVirtMem.writeString(file_name_base, filename.c_str());
990
991    //Fix up the aux vectors which point to data
992    assert(auxv[auxv.size() - 3].a_type == M5_AT_RANDOM);
993    auxv[auxv.size() - 3].a_val = aux_data_base;
994    assert(auxv[auxv.size() - 2].a_type == M5_AT_EXECFN);
995    auxv[auxv.size() - 2].a_val = argv_array_base;
996    assert(auxv[auxv.size() - 1].a_type == M5_AT_PLATFORM);
997    auxv[auxv.size() - 1].a_val = aux_data_base + numRandomBytes;
998
999    //Copy the aux stuff
1000    for (int x = 0; x < auxv.size(); x++) {
1001        initVirtMem.writeBlob(auxv_array_base + x * 2 * intSize,
1002                (uint8_t*)&(auxv[x].a_type), intSize);
1003        initVirtMem.writeBlob(auxv_array_base + (x * 2 + 1) * intSize,
1004                (uint8_t*)&(auxv[x].a_val), intSize);
1005    }
1006    //Write out the terminating zeroed auxilliary vector
1007    const uint64_t zero = 0;
1008    initVirtMem.writeBlob(auxv_array_base + auxv.size() * 2 * intSize,
1009                          (uint8_t*)&zero, intSize);
1010    initVirtMem.writeBlob(auxv_array_base + (auxv.size() * 2 + 1) * intSize,
1011                          (uint8_t*)&zero, intSize);
1012
1013    initVirtMem.writeString(aux_data_base, platform.c_str());
1014
1015    copyStringArray(envp, envp_array_base, env_data_base, initVirtMem);
1016    copyStringArray(argv, argv_array_base, arg_data_base, initVirtMem);
1017
1018    initVirtMem.writeBlob(argc_base, (uint8_t*)&guestArgc, intSize);
1019
1020    ThreadContext *tc = system->getThreadContext(contextIds[0]);
1021    //Set the stack pointer register
1022    tc->setIntReg(StackPointerReg, stack_min);
1023
1024    // There doesn't need to be any segment base added in since we're dealing
1025    // with the flat segmentation model.
1026    tc->pcState(getStartPC());
1027
1028    //Align the "stack_min" to a page boundary.
1029    stack_min = roundDown(stack_min, pageSize);
1030
1031//    num_processes++;
1032}
1033
1034void
1035X86_64LiveProcess::argsInit(int intSize, int pageSize)
1036{
1037    std::vector<AuxVector<uint64_t> > extraAuxvs;
1038    extraAuxvs.push_back(AuxVector<uint64_t>(M5_AT_SYSINFO_EHDR,
1039                vsyscallPage.base));
1040    X86LiveProcess::argsInit<uint64_t>(pageSize, extraAuxvs);
1041}
1042
1043void
1044I386LiveProcess::argsInit(int intSize, int pageSize)
1045{
1046    std::vector<AuxVector<uint32_t> > extraAuxvs;
1047    //Tell the binary where the vsyscall part of the vsyscall page is.
1048    extraAuxvs.push_back(AuxVector<uint32_t>(M5_AT_SYSINFO,
1049                vsyscallPage.base + vsyscallPage.vsyscallOffset));
1050    extraAuxvs.push_back(AuxVector<uint32_t>(M5_AT_SYSINFO_EHDR,
1051                vsyscallPage.base));
1052    X86LiveProcess::argsInit<uint32_t>(pageSize, extraAuxvs);
1053}
1054
1055void
1056X86LiveProcess::setSyscallReturn(ThreadContext *tc, SyscallReturn retval)
1057{
1058    tc->setIntReg(INTREG_RAX, retval.encodedValue());
1059}
1060
1061X86ISA::IntReg
1062X86_64LiveProcess::getSyscallArg(ThreadContext *tc, int &i)
1063{
1064    assert(i < NumArgumentRegs);
1065    return tc->readIntReg(ArgumentReg[i++]);
1066}
1067
1068void
1069X86_64LiveProcess::setSyscallArg(ThreadContext *tc, int i, X86ISA::IntReg val)
1070{
1071    assert(i < NumArgumentRegs);
1072    return tc->setIntReg(ArgumentReg[i], val);
1073}
1074
1075X86ISA::IntReg
1076I386LiveProcess::getSyscallArg(ThreadContext *tc, int &i)
1077{
1078    assert(i < NumArgumentRegs32);
1079    return tc->readIntReg(ArgumentReg32[i++]);
1080}
1081
1082X86ISA::IntReg
1083I386LiveProcess::getSyscallArg(ThreadContext *tc, int &i, int width)
1084{
1085    assert(width == 32 || width == 64);
1086    assert(i < NumArgumentRegs);
1087    uint64_t retVal = tc->readIntReg(ArgumentReg32[i++]) & mask(32);
1088    if (width == 64)
1089        retVal |= ((uint64_t)tc->readIntReg(ArgumentReg[i++]) << 32);
1090    return retVal;
1091}
1092
1093void
1094I386LiveProcess::setSyscallArg(ThreadContext *tc, int i, X86ISA::IntReg val)
1095{
1096    assert(i < NumArgumentRegs);
1097    return tc->setIntReg(ArgumentReg[i], val);
1098}
1099