process.cc revision 11387:8eeee90c69a8
1/*
2 * Copyright (c) 2014 Advanced Micro Devices, Inc.
3 * Copyright (c) 2007 The Hewlett-Packard Development Company
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder.  You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2003-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Gabe Black
42 *          Ali Saidi
43 */
44
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/regs/segment.hh"
47#include "arch/x86/isa_traits.hh"
48#include "arch/x86/process.hh"
49#include "arch/x86/system.hh"
50#include "arch/x86/types.hh"
51#include "base/loader/elf_object.hh"
52#include "base/loader/object_file.hh"
53#include "base/misc.hh"
54#include "base/trace.hh"
55#include "cpu/thread_context.hh"
56#include "debug/Stack.hh"
57#include "mem/multi_level_page_table.hh"
58#include "mem/page_table.hh"
59#include "sim/process_impl.hh"
60#include "sim/syscall_emul.hh"
61#include "sim/system.hh"
62
63using namespace std;
64using namespace X86ISA;
65
66static const int ArgumentReg[] = {
67    INTREG_RDI,
68    INTREG_RSI,
69    INTREG_RDX,
70    //This argument register is r10 for syscalls and rcx for C.
71    INTREG_R10W,
72    //INTREG_RCX,
73    INTREG_R8W,
74    INTREG_R9W
75};
76static const int NumArgumentRegs = sizeof(ArgumentReg) / sizeof(const int);
77static const int ArgumentReg32[] = {
78    INTREG_EBX,
79    INTREG_ECX,
80    INTREG_EDX,
81    INTREG_ESI,
82    INTREG_EDI,
83    INTREG_EBP
84};
85static const int NumArgumentRegs32 = sizeof(ArgumentReg) / sizeof(const int);
86
87X86LiveProcess::X86LiveProcess(LiveProcessParams * params, ObjectFile *objFile,
88        SyscallDesc *_syscallDescs, int _numSyscallDescs) :
89    LiveProcess(params, objFile), syscallDescs(_syscallDescs),
90    numSyscallDescs(_numSyscallDescs)
91{
92    brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize();
93    brk_point = roundUp(brk_point, PageBytes);
94}
95
96X86_64LiveProcess::X86_64LiveProcess(LiveProcessParams *params,
97        ObjectFile *objFile, SyscallDesc *_syscallDescs,
98        int _numSyscallDescs) :
99    X86LiveProcess(params, objFile, _syscallDescs, _numSyscallDescs)
100{
101
102    vsyscallPage.base = 0xffffffffff600000ULL;
103    vsyscallPage.size = PageBytes;
104    vsyscallPage.vtimeOffset = 0x400;
105    vsyscallPage.vgettimeofdayOffset = 0x0;
106
107    // Set up stack. On X86_64 Linux, stack goes from the top of memory
108    // downward, less the hole for the kernel address space plus one page
109    // for undertermined purposes.
110    stack_base = (Addr)0x7FFFFFFFF000ULL;
111
112    // Set pointer for next thread stack.  Reserve 8M for main stack.
113    next_thread_stack_base = stack_base - (8 * 1024 * 1024);
114
115    // "mmap_base" is a function which defines where mmap region starts in
116    // the process address space.
117    // mmap_base: PAGE_ALIGN(TASK_SIZE-MIN_GAP-mmap_rnd())
118    // TASK_SIZE: (1<<47)-PAGE_SIZE
119    // MIN_GAP: 128*1024*1024+stack_maxrandom_size()
120    // We do not use any address space layout randomization in gem5
121    // therefore the random fields become zero; the smallest gap space was
122    // chosen but gap could potentially be much larger.
123    mmap_end = (Addr)0x7FFFF7FFF000ULL;
124}
125
126void
127I386LiveProcess::syscall(int64_t callnum, ThreadContext *tc)
128{
129    TheISA::PCState pc = tc->pcState();
130    Addr eip = pc.pc();
131    if (eip >= vsyscallPage.base &&
132            eip < vsyscallPage.base + vsyscallPage.size) {
133        pc.npc(vsyscallPage.base + vsyscallPage.vsysexitOffset);
134        tc->pcState(pc);
135    }
136    X86LiveProcess::syscall(callnum, tc);
137}
138
139
140I386LiveProcess::I386LiveProcess(LiveProcessParams *params,
141        ObjectFile *objFile, SyscallDesc *_syscallDescs,
142        int _numSyscallDescs) :
143    X86LiveProcess(params, objFile, _syscallDescs, _numSyscallDescs)
144{
145    _gdtStart = ULL(0xffffd000);
146    _gdtSize = PageBytes;
147
148    vsyscallPage.base = 0xffffe000ULL;
149    vsyscallPage.size = PageBytes;
150    vsyscallPage.vsyscallOffset = 0x400;
151    vsyscallPage.vsysexitOffset = 0x410;
152
153    stack_base = _gdtStart;
154
155    // Set pointer for next thread stack.  Reserve 8M for main stack.
156    next_thread_stack_base = stack_base - (8 * 1024 * 1024);
157
158    // "mmap_base" is a function which defines where mmap region starts in
159    // the process address space.
160    // mmap_base: PAGE_ALIGN(TASK_SIZE-MIN_GAP-mmap_rnd())
161    // TASK_SIZE: 0xC0000000
162    // MIN_GAP: 128*1024*1024+stack_maxrandom_size()
163    // We do not use any address space layout randomization in gem5
164    // therefore the random fields become zero; the smallest gap space was
165    // chosen but gap could potentially be much larger.
166    mmap_end = (Addr)0xB7FFF000ULL;
167}
168
169SyscallDesc*
170X86LiveProcess::getDesc(int callnum)
171{
172    if (callnum < 0 || callnum >= numSyscallDescs)
173        return NULL;
174    return &syscallDescs[callnum];
175}
176
177void
178X86_64LiveProcess::initState()
179{
180    X86LiveProcess::initState();
181
182    argsInit(sizeof(uint64_t), PageBytes);
183
184       // Set up the vsyscall page for this process.
185    allocateMem(vsyscallPage.base, vsyscallPage.size);
186    uint8_t vtimeBlob[] = {
187        0x48,0xc7,0xc0,0xc9,0x00,0x00,0x00,    // mov    $0xc9,%rax
188        0x0f,0x05,                             // syscall
189        0xc3                                   // retq
190    };
191    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vtimeOffset,
192            vtimeBlob, sizeof(vtimeBlob));
193
194    uint8_t vgettimeofdayBlob[] = {
195        0x48,0xc7,0xc0,0x60,0x00,0x00,0x00,    // mov    $0x60,%rax
196        0x0f,0x05,                             // syscall
197        0xc3                                   // retq
198    };
199    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vgettimeofdayOffset,
200            vgettimeofdayBlob, sizeof(vgettimeofdayBlob));
201
202    if (kvmInSE) {
203        PortProxy physProxy = system->physProxy;
204
205        /*
206         * Set up the gdt.
207         */
208        uint8_t numGDTEntries = 0;
209        uint64_t nullDescriptor = 0;
210        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
211                            (uint8_t *)(&nullDescriptor), 8);
212        numGDTEntries++;
213
214        SegDescriptor initDesc = 0;
215        initDesc.type.codeOrData = 0; // code or data type
216        initDesc.type.c = 0;          // conforming
217        initDesc.type.r = 1;          // readable
218        initDesc.dpl = 0;             // privilege
219        initDesc.p = 1;               // present
220        initDesc.l = 1;               // longmode - 64 bit
221        initDesc.d = 0;               // operand size
222        initDesc.g = 1;               // granularity
223        initDesc.s = 1;               // system segment
224        initDesc.limitHigh = 0xFFFF;
225        initDesc.limitLow = 0xF;
226        initDesc.baseHigh = 0x0;
227        initDesc.baseLow = 0x0;
228
229        //64 bit code segment
230        SegDescriptor csLowPLDesc = initDesc;
231        csLowPLDesc.type.codeOrData = 1;
232        csLowPLDesc.dpl = 0;
233        uint64_t csLowPLDescVal = csLowPLDesc;
234        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
235                            (uint8_t *)(&csLowPLDescVal), 8);
236
237        numGDTEntries++;
238
239        SegSelector csLowPL = 0;
240        csLowPL.si = numGDTEntries - 1;
241        csLowPL.rpl = 0;
242
243        //64 bit data segment
244        SegDescriptor dsLowPLDesc = initDesc;
245        dsLowPLDesc.type.codeOrData = 0;
246        dsLowPLDesc.dpl = 0;
247        uint64_t dsLowPLDescVal = dsLowPLDesc;
248        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
249                            (uint8_t *)(&dsLowPLDescVal), 8);
250
251        numGDTEntries++;
252
253        SegSelector dsLowPL = 0;
254        dsLowPL.si = numGDTEntries - 1;
255        dsLowPL.rpl = 0;
256
257        //64 bit data segment
258        SegDescriptor dsDesc = initDesc;
259        dsDesc.type.codeOrData = 0;
260        dsDesc.dpl = 3;
261        uint64_t dsDescVal = dsDesc;
262        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
263                            (uint8_t *)(&dsDescVal), 8);
264
265        numGDTEntries++;
266
267        SegSelector ds = 0;
268        ds.si = numGDTEntries - 1;
269        ds.rpl = 3;
270
271        //64 bit code segment
272        SegDescriptor csDesc = initDesc;
273        csDesc.type.codeOrData = 1;
274        csDesc.dpl = 3;
275        uint64_t csDescVal = csDesc;
276        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
277                            (uint8_t *)(&csDescVal), 8);
278
279        numGDTEntries++;
280
281        SegSelector cs = 0;
282        cs.si = numGDTEntries - 1;
283        cs.rpl = 3;
284
285        SegSelector scall = 0;
286        scall.si = csLowPL.si;
287        scall.rpl = 0;
288
289        SegSelector sret = 0;
290        sret.si = dsLowPL.si;
291        sret.rpl = 3;
292
293        /* In long mode the TSS has been extended to 16 Bytes */
294        TSSlow TSSDescLow = 0;
295        TSSDescLow.type = 0xB;
296        TSSDescLow.dpl = 0; // Privelege level 0
297        TSSDescLow.p = 1; // Present
298        TSSDescLow.g = 1; // Page granularity
299        TSSDescLow.limitHigh = 0xF;
300        TSSDescLow.limitLow = 0xFFFF;
301        TSSDescLow.baseLow = bits(TSSVirtAddr, 23, 0);
302        TSSDescLow.baseHigh = bits(TSSVirtAddr, 31, 24);
303
304        TSShigh TSSDescHigh = 0;
305        TSSDescHigh.base = bits(TSSVirtAddr, 63, 32);
306
307        struct TSSDesc {
308            uint64_t low;
309            uint64_t high;
310        } tssDescVal = {TSSDescLow, TSSDescHigh};
311
312        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
313                            (uint8_t *)(&tssDescVal), sizeof(tssDescVal));
314
315        numGDTEntries++;
316
317        SegSelector tssSel = 0;
318        tssSel.si = numGDTEntries - 1;
319
320        uint64_t tss_base_addr = (TSSDescHigh.base << 32) |
321                                 (TSSDescLow.baseHigh << 24) |
322                                  TSSDescLow.baseLow;
323        uint64_t tss_limit = TSSDescLow.limitLow | (TSSDescLow.limitHigh << 16);
324
325        SegAttr tss_attr = 0;
326
327        tss_attr.type = TSSDescLow.type;
328        tss_attr.dpl = TSSDescLow.dpl;
329        tss_attr.present = TSSDescLow.p;
330        tss_attr.granularity = TSSDescLow.g;
331        tss_attr.unusable = 0;
332
333        for (int i = 0; i < contextIds.size(); i++) {
334            ThreadContext * tc = system->getThreadContext(contextIds[i]);
335
336            tc->setMiscReg(MISCREG_CS, cs);
337            tc->setMiscReg(MISCREG_DS, ds);
338            tc->setMiscReg(MISCREG_ES, ds);
339            tc->setMiscReg(MISCREG_FS, ds);
340            tc->setMiscReg(MISCREG_GS, ds);
341            tc->setMiscReg(MISCREG_SS, ds);
342
343            // LDT
344            tc->setMiscReg(MISCREG_TSL, 0);
345            SegAttr tslAttr = 0;
346            tslAttr.present = 1;
347            tslAttr.type = 2;
348            tc->setMiscReg(MISCREG_TSL_ATTR, tslAttr);
349
350            tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr);
351            tc->setMiscReg(MISCREG_TSG_LIMIT, 8 * numGDTEntries - 1);
352
353            tc->setMiscReg(MISCREG_TR, tssSel);
354            tc->setMiscReg(MISCREG_TR_BASE, tss_base_addr);
355            tc->setMiscReg(MISCREG_TR_EFF_BASE, 0);
356            tc->setMiscReg(MISCREG_TR_LIMIT, tss_limit);
357            tc->setMiscReg(MISCREG_TR_ATTR, tss_attr);
358
359            //Start using longmode segments.
360            installSegDesc(tc, SEGMENT_REG_CS, csDesc, true);
361            installSegDesc(tc, SEGMENT_REG_DS, dsDesc, true);
362            installSegDesc(tc, SEGMENT_REG_ES, dsDesc, true);
363            installSegDesc(tc, SEGMENT_REG_FS, dsDesc, true);
364            installSegDesc(tc, SEGMENT_REG_GS, dsDesc, true);
365            installSegDesc(tc, SEGMENT_REG_SS, dsDesc, true);
366
367            Efer efer = 0;
368            efer.sce = 1; // Enable system call extensions.
369            efer.lme = 1; // Enable long mode.
370            efer.lma = 1; // Activate long mode.
371            efer.nxe = 0; // Enable nx support.
372            efer.svme = 1; // Enable svm support for now.
373            efer.ffxsr = 0; // Turn on fast fxsave and fxrstor.
374            tc->setMiscReg(MISCREG_EFER, efer);
375
376            //Set up the registers that describe the operating mode.
377            CR0 cr0 = 0;
378            cr0.pg = 1; // Turn on paging.
379            cr0.cd = 0; // Don't disable caching.
380            cr0.nw = 0; // This is bit is defined to be ignored.
381            cr0.am = 1; // No alignment checking
382            cr0.wp = 1; // Supervisor mode can write read only pages
383            cr0.ne = 1;
384            cr0.et = 1; // This should always be 1
385            cr0.ts = 0; // We don't do task switching, so causing fp exceptions
386                        // would be pointless.
387            cr0.em = 0; // Allow x87 instructions to execute natively.
388            cr0.mp = 1; // This doesn't really matter, but the manual suggests
389                        // setting it to one.
390            cr0.pe = 1; // We're definitely in protected mode.
391            tc->setMiscReg(MISCREG_CR0, cr0);
392
393            CR0 cr2 = 0;
394            tc->setMiscReg(MISCREG_CR2, cr2);
395
396            CR3 cr3 = pageTablePhysAddr;
397            tc->setMiscReg(MISCREG_CR3, cr3);
398
399            CR4 cr4 = 0;
400            //Turn on pae.
401            cr4.osxsave = 1; // Enable XSAVE and Proc Extended States
402            cr4.osxmmexcpt = 1; // Operating System Unmasked Exception
403            cr4.osfxsr = 1; // Operating System FXSave/FSRSTOR Support
404            cr4.pce = 0; // Performance-Monitoring Counter Enable
405            cr4.pge = 0; // Page-Global Enable
406            cr4.mce = 0; // Machine Check Enable
407            cr4.pae = 1; // Physical-Address Extension
408            cr4.pse = 0; // Page Size Extensions
409            cr4.de = 0; // Debugging Extensions
410            cr4.tsd = 0; // Time Stamp Disable
411            cr4.pvi = 0; // Protected-Mode Virtual Interrupts
412            cr4.vme = 0; // Virtual-8086 Mode Extensions
413
414            tc->setMiscReg(MISCREG_CR4, cr4);
415
416            CR4 cr8 = 0;
417            tc->setMiscReg(MISCREG_CR8, cr8);
418
419            const Addr PageMapLevel4 = pageTablePhysAddr;
420            //Point to the page tables.
421            tc->setMiscReg(MISCREG_CR3, PageMapLevel4);
422
423            tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
424
425            tc->setMiscReg(MISCREG_APIC_BASE, 0xfee00900);
426
427            tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr);
428            tc->setMiscReg(MISCREG_TSG_LIMIT, 0xffff);
429
430            tc->setMiscReg(MISCREG_IDTR_BASE, IDTVirtAddr);
431            tc->setMiscReg(MISCREG_IDTR_LIMIT, 0xffff);
432
433            /* enabling syscall and sysret */
434            MiscReg star = ((MiscReg)sret << 48) | ((MiscReg)scall << 32);
435            tc->setMiscReg(MISCREG_STAR, star);
436            MiscReg lstar = (MiscReg)syscallCodeVirtAddr;
437            tc->setMiscReg(MISCREG_LSTAR, lstar);
438            MiscReg sfmask = (1 << 8) | (1 << 10); // TF | DF
439            tc->setMiscReg(MISCREG_SF_MASK, sfmask);
440        }
441
442        /* Set up the content of the TSS and write it to physical memory. */
443
444        struct {
445            uint32_t reserved0;        // +00h
446            uint32_t RSP0_low;         // +04h
447            uint32_t RSP0_high;        // +08h
448            uint32_t RSP1_low;         // +0Ch
449            uint32_t RSP1_high;        // +10h
450            uint32_t RSP2_low;         // +14h
451            uint32_t RSP2_high;        // +18h
452            uint32_t reserved1;        // +1Ch
453            uint32_t reserved2;        // +20h
454            uint32_t IST1_low;         // +24h
455            uint32_t IST1_high;        // +28h
456            uint32_t IST2_low;         // +2Ch
457            uint32_t IST2_high;        // +30h
458            uint32_t IST3_low;         // +34h
459            uint32_t IST3_high;        // +38h
460            uint32_t IST4_low;         // +3Ch
461            uint32_t IST4_high;        // +40h
462            uint32_t IST5_low;         // +44h
463            uint32_t IST5_high;        // +48h
464            uint32_t IST6_low;         // +4Ch
465            uint32_t IST6_high;        // +50h
466            uint32_t IST7_low;         // +54h
467            uint32_t IST7_high;        // +58h
468            uint32_t reserved3;        // +5Ch
469            uint32_t reserved4;        // +60h
470            uint16_t reserved5;        // +64h
471            uint16_t IO_MapBase;       // +66h
472        } tss;
473
474        /** setting Interrupt Stack Table */
475        uint64_t IST_start = ISTVirtAddr + PageBytes;
476        tss.IST1_low  = IST_start;
477        tss.IST1_high = IST_start >> 32;
478        tss.RSP0_low  = tss.IST1_low;
479        tss.RSP0_high = tss.IST1_high;
480        tss.RSP1_low  = tss.IST1_low;
481        tss.RSP1_high = tss.IST1_high;
482        tss.RSP2_low  = tss.IST1_low;
483        tss.RSP2_high = tss.IST1_high;
484        physProxy.writeBlob(TSSPhysAddr, (uint8_t *)(&tss), sizeof(tss));
485
486        /* Setting IDT gates */
487        GateDescriptorLow PFGateLow = 0;
488        PFGateLow.offsetHigh = bits(PFHandlerVirtAddr, 31, 16);
489        PFGateLow.offsetLow = bits(PFHandlerVirtAddr, 15, 0);
490        PFGateLow.selector = csLowPL;
491        PFGateLow.p = 1;
492        PFGateLow.dpl = 0;
493        PFGateLow.type = 0xe;      // gate interrupt type
494        PFGateLow.IST = 0;         // setting IST to 0 and using RSP0
495
496        GateDescriptorHigh PFGateHigh = 0;
497        PFGateHigh.offset = bits(PFHandlerVirtAddr, 63, 32);
498
499        struct {
500            uint64_t low;
501            uint64_t high;
502        } PFGate = {PFGateLow, PFGateHigh};
503
504        physProxy.writeBlob(IDTPhysAddr + 0xE0,
505                            (uint8_t *)(&PFGate), sizeof(PFGate));
506
507        /* System call handler */
508        uint8_t syscallBlob[] = {
509            // mov    %rax, (0xffffc90000005600)
510            0x48, 0xa3, 0x00, 0x60, 0x00,
511            0x00, 0x00, 0xc9, 0xff, 0xff,
512            // sysret
513            0x48, 0x0f, 0x07
514        };
515
516        physProxy.writeBlob(syscallCodePhysAddr,
517                            syscallBlob, sizeof(syscallBlob));
518
519        /** Page fault handler */
520        uint8_t faultBlob[] = {
521            // mov    %rax, (0xffffc90000005700)
522            0x48, 0xa3, 0x00, 0x61, 0x00,
523            0x00, 0x00, 0xc9, 0xff, 0xff,
524            // add    $0x8, %rsp # skip error
525            0x48, 0x83, 0xc4, 0x08,
526            // iretq
527            0x48, 0xcf
528        };
529
530        physProxy.writeBlob(PFHandlerPhysAddr, faultBlob, sizeof(faultBlob));
531
532        MultiLevelPageTable<PageTableOps> *pt =
533            dynamic_cast<MultiLevelPageTable<PageTableOps> *>(pTable);
534
535        /* Syscall handler */
536        pt->map(syscallCodeVirtAddr, syscallCodePhysAddr, PageBytes, false);
537        /* GDT */
538        pt->map(GDTVirtAddr, GDTPhysAddr, PageBytes, false);
539        /* IDT */
540        pt->map(IDTVirtAddr, IDTPhysAddr, PageBytes, false);
541        /* TSS */
542        pt->map(TSSVirtAddr, TSSPhysAddr, PageBytes, false);
543        /* IST */
544        pt->map(ISTVirtAddr, ISTPhysAddr, PageBytes, false);
545        /* PF handler */
546        pt->map(PFHandlerVirtAddr, PFHandlerPhysAddr, PageBytes, false);
547        /* MMIO region for m5ops */
548        pt->map(MMIORegionVirtAddr, MMIORegionPhysAddr, 16*PageBytes, false);
549    } else {
550        for (int i = 0; i < contextIds.size(); i++) {
551            ThreadContext * tc = system->getThreadContext(contextIds[i]);
552
553            SegAttr dataAttr = 0;
554            dataAttr.dpl = 3;
555            dataAttr.unusable = 0;
556            dataAttr.defaultSize = 1;
557            dataAttr.longMode = 1;
558            dataAttr.avl = 0;
559            dataAttr.granularity = 1;
560            dataAttr.present = 1;
561            dataAttr.type = 3;
562            dataAttr.writable = 1;
563            dataAttr.readable = 1;
564            dataAttr.expandDown = 0;
565            dataAttr.system = 1;
566
567            //Initialize the segment registers.
568            for (int seg = 0; seg < NUM_SEGMENTREGS; seg++) {
569                tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0);
570                tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0);
571                tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr);
572            }
573
574            SegAttr csAttr = 0;
575            csAttr.dpl = 3;
576            csAttr.unusable = 0;
577            csAttr.defaultSize = 0;
578            csAttr.longMode = 1;
579            csAttr.avl = 0;
580            csAttr.granularity = 1;
581            csAttr.present = 1;
582            csAttr.type = 10;
583            csAttr.writable = 0;
584            csAttr.readable = 1;
585            csAttr.expandDown = 0;
586            csAttr.system = 1;
587
588            tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr);
589
590            Efer efer = 0;
591            efer.sce = 1; // Enable system call extensions.
592            efer.lme = 1; // Enable long mode.
593            efer.lma = 1; // Activate long mode.
594            efer.nxe = 1; // Enable nx support.
595            efer.svme = 0; // Disable svm support for now. It isn't implemented.
596            efer.ffxsr = 1; // Turn on fast fxsave and fxrstor.
597            tc->setMiscReg(MISCREG_EFER, efer);
598
599            //Set up the registers that describe the operating mode.
600            CR0 cr0 = 0;
601            cr0.pg = 1; // Turn on paging.
602            cr0.cd = 0; // Don't disable caching.
603            cr0.nw = 0; // This is bit is defined to be ignored.
604            cr0.am = 0; // No alignment checking
605            cr0.wp = 0; // Supervisor mode can write read only pages
606            cr0.ne = 1;
607            cr0.et = 1; // This should always be 1
608            cr0.ts = 0; // We don't do task switching, so causing fp exceptions
609                        // would be pointless.
610            cr0.em = 0; // Allow x87 instructions to execute natively.
611            cr0.mp = 1; // This doesn't really matter, but the manual suggests
612                        // setting it to one.
613            cr0.pe = 1; // We're definitely in protected mode.
614            tc->setMiscReg(MISCREG_CR0, cr0);
615
616            tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
617        }
618    }
619}
620
621void
622I386LiveProcess::initState()
623{
624    X86LiveProcess::initState();
625
626    argsInit(sizeof(uint32_t), PageBytes);
627
628    /*
629     * Set up a GDT for this process. The whole GDT wouldn't really be for
630     * this process, but the only parts we care about are.
631     */
632    allocateMem(_gdtStart, _gdtSize);
633    uint64_t zero = 0;
634    assert(_gdtSize % sizeof(zero) == 0);
635    for (Addr gdtCurrent = _gdtStart;
636            gdtCurrent < _gdtStart + _gdtSize; gdtCurrent += sizeof(zero)) {
637        initVirtMem.write(gdtCurrent, zero);
638    }
639
640    // Set up the vsyscall page for this process.
641    allocateMem(vsyscallPage.base, vsyscallPage.size);
642    uint8_t vsyscallBlob[] = {
643        0x51,       // push %ecx
644        0x52,       // push %edp
645        0x55,       // push %ebp
646        0x89, 0xe5, // mov %esp, %ebp
647        0x0f, 0x34  // sysenter
648    };
649    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsyscallOffset,
650            vsyscallBlob, sizeof(vsyscallBlob));
651
652    uint8_t vsysexitBlob[] = {
653        0x5d,       // pop %ebp
654        0x5a,       // pop %edx
655        0x59,       // pop %ecx
656        0xc3        // ret
657    };
658    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsysexitOffset,
659            vsysexitBlob, sizeof(vsysexitBlob));
660
661    for (int i = 0; i < contextIds.size(); i++) {
662        ThreadContext * tc = system->getThreadContext(contextIds[i]);
663
664        SegAttr dataAttr = 0;
665        dataAttr.dpl = 3;
666        dataAttr.unusable = 0;
667        dataAttr.defaultSize = 1;
668        dataAttr.longMode = 0;
669        dataAttr.avl = 0;
670        dataAttr.granularity = 1;
671        dataAttr.present = 1;
672        dataAttr.type = 3;
673        dataAttr.writable = 1;
674        dataAttr.readable = 1;
675        dataAttr.expandDown = 0;
676        dataAttr.system = 1;
677
678        //Initialize the segment registers.
679        for (int seg = 0; seg < NUM_SEGMENTREGS; seg++) {
680            tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0);
681            tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0);
682            tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr);
683            tc->setMiscRegNoEffect(MISCREG_SEG_SEL(seg), 0xB);
684            tc->setMiscRegNoEffect(MISCREG_SEG_LIMIT(seg), (uint32_t)(-1));
685        }
686
687        SegAttr csAttr = 0;
688        csAttr.dpl = 3;
689        csAttr.unusable = 0;
690        csAttr.defaultSize = 1;
691        csAttr.longMode = 0;
692        csAttr.avl = 0;
693        csAttr.granularity = 1;
694        csAttr.present = 1;
695        csAttr.type = 0xa;
696        csAttr.writable = 0;
697        csAttr.readable = 1;
698        csAttr.expandDown = 0;
699        csAttr.system = 1;
700
701        tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr);
702
703        tc->setMiscRegNoEffect(MISCREG_TSG_BASE, _gdtStart);
704        tc->setMiscRegNoEffect(MISCREG_TSG_EFF_BASE, _gdtStart);
705        tc->setMiscRegNoEffect(MISCREG_TSG_LIMIT, _gdtStart + _gdtSize - 1);
706
707        // Set the LDT selector to 0 to deactivate it.
708        tc->setMiscRegNoEffect(MISCREG_TSL, 0);
709
710        Efer efer = 0;
711        efer.sce = 1; // Enable system call extensions.
712        efer.lme = 1; // Enable long mode.
713        efer.lma = 0; // Deactivate long mode.
714        efer.nxe = 1; // Enable nx support.
715        efer.svme = 0; // Disable svm support for now. It isn't implemented.
716        efer.ffxsr = 1; // Turn on fast fxsave and fxrstor.
717        tc->setMiscReg(MISCREG_EFER, efer);
718
719        //Set up the registers that describe the operating mode.
720        CR0 cr0 = 0;
721        cr0.pg = 1; // Turn on paging.
722        cr0.cd = 0; // Don't disable caching.
723        cr0.nw = 0; // This is bit is defined to be ignored.
724        cr0.am = 0; // No alignment checking
725        cr0.wp = 0; // Supervisor mode can write read only pages
726        cr0.ne = 1;
727        cr0.et = 1; // This should always be 1
728        cr0.ts = 0; // We don't do task switching, so causing fp exceptions
729                    // would be pointless.
730        cr0.em = 0; // Allow x87 instructions to execute natively.
731        cr0.mp = 1; // This doesn't really matter, but the manual suggests
732                    // setting it to one.
733        cr0.pe = 1; // We're definitely in protected mode.
734        tc->setMiscReg(MISCREG_CR0, cr0);
735
736        tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
737    }
738}
739
740template<class IntType>
741void
742X86LiveProcess::argsInit(int pageSize,
743        std::vector<AuxVector<IntType> > extraAuxvs)
744{
745    int intSize = sizeof(IntType);
746
747    typedef AuxVector<IntType> auxv_t;
748    std::vector<auxv_t> auxv = extraAuxvs;
749
750    string filename;
751    if (argv.size() < 1)
752        filename = "";
753    else
754        filename = argv[0];
755
756    //We want 16 byte alignment
757    uint64_t align = 16;
758
759    // load object file into target memory
760    objFile->loadSections(initVirtMem);
761
762    enum X86CpuFeature {
763        X86_OnboardFPU = 1 << 0,
764        X86_VirtualModeExtensions = 1 << 1,
765        X86_DebuggingExtensions = 1 << 2,
766        X86_PageSizeExtensions = 1 << 3,
767
768        X86_TimeStampCounter = 1 << 4,
769        X86_ModelSpecificRegisters = 1 << 5,
770        X86_PhysicalAddressExtensions = 1 << 6,
771        X86_MachineCheckExtensions = 1 << 7,
772
773        X86_CMPXCHG8Instruction = 1 << 8,
774        X86_OnboardAPIC = 1 << 9,
775        X86_SYSENTER_SYSEXIT = 1 << 11,
776
777        X86_MemoryTypeRangeRegisters = 1 << 12,
778        X86_PageGlobalEnable = 1 << 13,
779        X86_MachineCheckArchitecture = 1 << 14,
780        X86_CMOVInstruction = 1 << 15,
781
782        X86_PageAttributeTable = 1 << 16,
783        X86_36BitPSEs = 1 << 17,
784        X86_ProcessorSerialNumber = 1 << 18,
785        X86_CLFLUSHInstruction = 1 << 19,
786
787        X86_DebugTraceStore = 1 << 21,
788        X86_ACPIViaMSR = 1 << 22,
789        X86_MultimediaExtensions = 1 << 23,
790
791        X86_FXSAVE_FXRSTOR = 1 << 24,
792        X86_StreamingSIMDExtensions = 1 << 25,
793        X86_StreamingSIMDExtensions2 = 1 << 26,
794        X86_CPUSelfSnoop = 1 << 27,
795
796        X86_HyperThreading = 1 << 28,
797        X86_AutomaticClockControl = 1 << 29,
798        X86_IA64Processor = 1 << 30
799    };
800
801    // Setup the auxilliary vectors. These will already have endian conversion.
802    // Auxilliary vectors are loaded only for elf formatted executables.
803    ElfObject * elfObject = dynamic_cast<ElfObject *>(objFile);
804    if (elfObject) {
805        uint64_t features =
806            X86_OnboardFPU |
807            X86_VirtualModeExtensions |
808            X86_DebuggingExtensions |
809            X86_PageSizeExtensions |
810            X86_TimeStampCounter |
811            X86_ModelSpecificRegisters |
812            X86_PhysicalAddressExtensions |
813            X86_MachineCheckExtensions |
814            X86_CMPXCHG8Instruction |
815            X86_OnboardAPIC |
816            X86_SYSENTER_SYSEXIT |
817            X86_MemoryTypeRangeRegisters |
818            X86_PageGlobalEnable |
819            X86_MachineCheckArchitecture |
820            X86_CMOVInstruction |
821            X86_PageAttributeTable |
822            X86_36BitPSEs |
823//            X86_ProcessorSerialNumber |
824            X86_CLFLUSHInstruction |
825//            X86_DebugTraceStore |
826//            X86_ACPIViaMSR |
827            X86_MultimediaExtensions |
828            X86_FXSAVE_FXRSTOR |
829            X86_StreamingSIMDExtensions |
830            X86_StreamingSIMDExtensions2 |
831//            X86_CPUSelfSnoop |
832//            X86_HyperThreading |
833//            X86_AutomaticClockControl |
834//            X86_IA64Processor |
835            0;
836
837        //Bits which describe the system hardware capabilities
838        //XXX Figure out what these should be
839        auxv.push_back(auxv_t(M5_AT_HWCAP, features));
840        //The system page size
841        auxv.push_back(auxv_t(M5_AT_PAGESZ, X86ISA::PageBytes));
842        //Frequency at which times() increments
843        //Defined to be 100 in the kernel source.
844        auxv.push_back(auxv_t(M5_AT_CLKTCK, 100));
845        // For statically linked executables, this is the virtual address of the
846        // program header tables if they appear in the executable image
847        auxv.push_back(auxv_t(M5_AT_PHDR, elfObject->programHeaderTable()));
848        // This is the size of a program header entry from the elf file.
849        auxv.push_back(auxv_t(M5_AT_PHENT, elfObject->programHeaderSize()));
850        // This is the number of program headers from the original elf file.
851        auxv.push_back(auxv_t(M5_AT_PHNUM, elfObject->programHeaderCount()));
852        //This is the address of the elf "interpreter", It should be set
853        //to 0 for regular executables. It should be something else
854        //(not sure what) for dynamic libraries.
855        auxv.push_back(auxv_t(M5_AT_BASE, 0));
856
857        //XXX Figure out what this should be.
858        auxv.push_back(auxv_t(M5_AT_FLAGS, 0));
859        //The entry point to the program
860        auxv.push_back(auxv_t(M5_AT_ENTRY, objFile->entryPoint()));
861        //Different user and group IDs
862        auxv.push_back(auxv_t(M5_AT_UID, uid()));
863        auxv.push_back(auxv_t(M5_AT_EUID, euid()));
864        auxv.push_back(auxv_t(M5_AT_GID, gid()));
865        auxv.push_back(auxv_t(M5_AT_EGID, egid()));
866        //Whether to enable "secure mode" in the executable
867        auxv.push_back(auxv_t(M5_AT_SECURE, 0));
868        //The address of 16 "random" bytes.
869        auxv.push_back(auxv_t(M5_AT_RANDOM, 0));
870        //The name of the program
871        auxv.push_back(auxv_t(M5_AT_EXECFN, 0));
872        //The platform string
873        auxv.push_back(auxv_t(M5_AT_PLATFORM, 0));
874    }
875
876    //Figure out how big the initial stack needs to be
877
878    // A sentry NULL void pointer at the top of the stack.
879    int sentry_size = intSize;
880
881    //This is the name of the file which is present on the initial stack
882    //It's purpose is to let the user space linker examine the original file.
883    int file_name_size = filename.size() + 1;
884
885    const int numRandomBytes = 16;
886    int aux_data_size = numRandomBytes;
887
888    string platform = "x86_64";
889    aux_data_size += platform.size() + 1;
890
891    int env_data_size = 0;
892    for (int i = 0; i < envp.size(); ++i)
893        env_data_size += envp[i].size() + 1;
894    int arg_data_size = 0;
895    for (int i = 0; i < argv.size(); ++i)
896        arg_data_size += argv[i].size() + 1;
897
898    //The info_block needs to be padded so it's size is a multiple of the
899    //alignment mask. Also, it appears that there needs to be at least some
900    //padding, so if the size is already a multiple, we need to increase it
901    //anyway.
902    int base_info_block_size =
903        sentry_size + file_name_size + env_data_size + arg_data_size;
904
905    int info_block_size = roundUp(base_info_block_size, align);
906
907    int info_block_padding = info_block_size - base_info_block_size;
908
909    //Each auxilliary vector is two 8 byte words
910    int aux_array_size = intSize * 2 * (auxv.size() + 1);
911
912    int envp_array_size = intSize * (envp.size() + 1);
913    int argv_array_size = intSize * (argv.size() + 1);
914
915    int argc_size = intSize;
916
917    //Figure out the size of the contents of the actual initial frame
918    int frame_size =
919        aux_array_size +
920        envp_array_size +
921        argv_array_size +
922        argc_size;
923
924    //There needs to be padding after the auxiliary vector data so that the
925    //very bottom of the stack is aligned properly.
926    int partial_size = frame_size + aux_data_size;
927    int aligned_partial_size = roundUp(partial_size, align);
928    int aux_padding = aligned_partial_size - partial_size;
929
930    int space_needed =
931        info_block_size +
932        aux_data_size +
933        aux_padding +
934        frame_size;
935
936    stack_min = stack_base - space_needed;
937    stack_min = roundDown(stack_min, align);
938    stack_size = roundUp(stack_base - stack_min, pageSize);
939
940    // map memory
941    Addr stack_end = roundDown(stack_base - stack_size, pageSize);
942
943    DPRINTF(Stack, "Mapping the stack: 0x%x %dB\n", stack_end, stack_size);
944    allocateMem(stack_end, stack_size);
945
946    // map out initial stack contents
947    IntType sentry_base = stack_base - sentry_size;
948    IntType file_name_base = sentry_base - file_name_size;
949    IntType env_data_base = file_name_base - env_data_size;
950    IntType arg_data_base = env_data_base - arg_data_size;
951    IntType aux_data_base = arg_data_base - info_block_padding - aux_data_size;
952    IntType auxv_array_base = aux_data_base - aux_array_size - aux_padding;
953    IntType envp_array_base = auxv_array_base - envp_array_size;
954    IntType argv_array_base = envp_array_base - argv_array_size;
955    IntType argc_base = argv_array_base - argc_size;
956
957    DPRINTF(Stack, "The addresses of items on the initial stack:\n");
958    DPRINTF(Stack, "0x%x - file name\n", file_name_base);
959    DPRINTF(Stack, "0x%x - env data\n", env_data_base);
960    DPRINTF(Stack, "0x%x - arg data\n", arg_data_base);
961    DPRINTF(Stack, "0x%x - aux data\n", aux_data_base);
962    DPRINTF(Stack, "0x%x - auxv array\n", auxv_array_base);
963    DPRINTF(Stack, "0x%x - envp array\n", envp_array_base);
964    DPRINTF(Stack, "0x%x - argv array\n", argv_array_base);
965    DPRINTF(Stack, "0x%x - argc \n", argc_base);
966    DPRINTF(Stack, "0x%x - stack min\n", stack_min);
967
968    // write contents to stack
969
970    // figure out argc
971    IntType argc = argv.size();
972    IntType guestArgc = X86ISA::htog(argc);
973
974    //Write out the sentry void *
975    IntType sentry_NULL = 0;
976    initVirtMem.writeBlob(sentry_base,
977            (uint8_t*)&sentry_NULL, sentry_size);
978
979    //Write the file name
980    initVirtMem.writeString(file_name_base, filename.c_str());
981
982    //Fix up the aux vectors which point to data
983    assert(auxv[auxv.size() - 3].a_type == M5_AT_RANDOM);
984    auxv[auxv.size() - 3].a_val = aux_data_base;
985    assert(auxv[auxv.size() - 2].a_type == M5_AT_EXECFN);
986    auxv[auxv.size() - 2].a_val = argv_array_base;
987    assert(auxv[auxv.size() - 1].a_type == M5_AT_PLATFORM);
988    auxv[auxv.size() - 1].a_val = aux_data_base + numRandomBytes;
989
990    //Copy the aux stuff
991    for (int x = 0; x < auxv.size(); x++) {
992        initVirtMem.writeBlob(auxv_array_base + x * 2 * intSize,
993                (uint8_t*)&(auxv[x].a_type), intSize);
994        initVirtMem.writeBlob(auxv_array_base + (x * 2 + 1) * intSize,
995                (uint8_t*)&(auxv[x].a_val), intSize);
996    }
997    //Write out the terminating zeroed auxilliary vector
998    const uint64_t zero = 0;
999    initVirtMem.writeBlob(auxv_array_base + auxv.size() * 2 * intSize,
1000                          (uint8_t*)&zero, intSize);
1001    initVirtMem.writeBlob(auxv_array_base + (auxv.size() * 2 + 1) * intSize,
1002                          (uint8_t*)&zero, intSize);
1003
1004    initVirtMem.writeString(aux_data_base, platform.c_str());
1005
1006    copyStringArray(envp, envp_array_base, env_data_base, initVirtMem);
1007    copyStringArray(argv, argv_array_base, arg_data_base, initVirtMem);
1008
1009    initVirtMem.writeBlob(argc_base, (uint8_t*)&guestArgc, intSize);
1010
1011    ThreadContext *tc = system->getThreadContext(contextIds[0]);
1012    //Set the stack pointer register
1013    tc->setIntReg(StackPointerReg, stack_min);
1014
1015    // There doesn't need to be any segment base added in since we're dealing
1016    // with the flat segmentation model.
1017    tc->pcState(objFile->entryPoint());
1018
1019    //Align the "stack_min" to a page boundary.
1020    stack_min = roundDown(stack_min, pageSize);
1021
1022//    num_processes++;
1023}
1024
1025void
1026X86_64LiveProcess::argsInit(int intSize, int pageSize)
1027{
1028    std::vector<AuxVector<uint64_t> > extraAuxvs;
1029    extraAuxvs.push_back(AuxVector<uint64_t>(M5_AT_SYSINFO_EHDR,
1030                vsyscallPage.base));
1031    X86LiveProcess::argsInit<uint64_t>(pageSize, extraAuxvs);
1032}
1033
1034void
1035I386LiveProcess::argsInit(int intSize, int pageSize)
1036{
1037    std::vector<AuxVector<uint32_t> > extraAuxvs;
1038    //Tell the binary where the vsyscall part of the vsyscall page is.
1039    extraAuxvs.push_back(AuxVector<uint32_t>(M5_AT_SYSINFO,
1040                vsyscallPage.base + vsyscallPage.vsyscallOffset));
1041    extraAuxvs.push_back(AuxVector<uint32_t>(M5_AT_SYSINFO_EHDR,
1042                vsyscallPage.base));
1043    X86LiveProcess::argsInit<uint32_t>(pageSize, extraAuxvs);
1044}
1045
1046void
1047X86LiveProcess::setSyscallReturn(ThreadContext *tc, SyscallReturn retval)
1048{
1049    tc->setIntReg(INTREG_RAX, retval.encodedValue());
1050}
1051
1052X86ISA::IntReg
1053X86_64LiveProcess::getSyscallArg(ThreadContext *tc, int &i)
1054{
1055    assert(i < NumArgumentRegs);
1056    return tc->readIntReg(ArgumentReg[i++]);
1057}
1058
1059void
1060X86_64LiveProcess::setSyscallArg(ThreadContext *tc, int i, X86ISA::IntReg val)
1061{
1062    assert(i < NumArgumentRegs);
1063    return tc->setIntReg(ArgumentReg[i], val);
1064}
1065
1066X86ISA::IntReg
1067I386LiveProcess::getSyscallArg(ThreadContext *tc, int &i)
1068{
1069    assert(i < NumArgumentRegs32);
1070    return tc->readIntReg(ArgumentReg32[i++]);
1071}
1072
1073X86ISA::IntReg
1074I386LiveProcess::getSyscallArg(ThreadContext *tc, int &i, int width)
1075{
1076    assert(width == 32 || width == 64);
1077    assert(i < NumArgumentRegs);
1078    uint64_t retVal = tc->readIntReg(ArgumentReg32[i++]) & mask(32);
1079    if (width == 64)
1080        retVal |= ((uint64_t)tc->readIntReg(ArgumentReg[i++]) << 32);
1081    return retVal;
1082}
1083
1084void
1085I386LiveProcess::setSyscallArg(ThreadContext *tc, int i, X86ISA::IntReg val)
1086{
1087    assert(i < NumArgumentRegs);
1088    return tc->setIntReg(ArgumentReg[i], val);
1089}
1090