process.cc revision 11320:42ecb523c64a
1/*
2 * Copyright (c) 2014 Advanced Micro Devices, Inc.
3 * Copyright (c) 2007 The Hewlett-Packard Development Company
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder.  You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2003-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Gabe Black
42 *          Ali Saidi
43 */
44
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/regs/segment.hh"
47#include "arch/x86/isa_traits.hh"
48#include "arch/x86/process.hh"
49#include "arch/x86/system.hh"
50#include "arch/x86/types.hh"
51#include "base/loader/elf_object.hh"
52#include "base/loader/object_file.hh"
53#include "base/misc.hh"
54#include "base/trace.hh"
55#include "cpu/thread_context.hh"
56#include "debug/Stack.hh"
57#include "mem/multi_level_page_table.hh"
58#include "mem/page_table.hh"
59#include "sim/process_impl.hh"
60#include "sim/syscall_emul.hh"
61#include "sim/system.hh"
62
63using namespace std;
64using namespace X86ISA;
65
66static const int ArgumentReg[] = {
67    INTREG_RDI,
68    INTREG_RSI,
69    INTREG_RDX,
70    //This argument register is r10 for syscalls and rcx for C.
71    INTREG_R10W,
72    //INTREG_RCX,
73    INTREG_R8W,
74    INTREG_R9W
75};
76static const int NumArgumentRegs = sizeof(ArgumentReg) / sizeof(const int);
77static const int ArgumentReg32[] = {
78    INTREG_EBX,
79    INTREG_ECX,
80    INTREG_EDX,
81    INTREG_ESI,
82    INTREG_EDI,
83};
84static const int NumArgumentRegs32 = sizeof(ArgumentReg) / sizeof(const int);
85
86X86LiveProcess::X86LiveProcess(LiveProcessParams * params, ObjectFile *objFile,
87        SyscallDesc *_syscallDescs, int _numSyscallDescs) :
88    LiveProcess(params, objFile), syscallDescs(_syscallDescs),
89    numSyscallDescs(_numSyscallDescs)
90{
91    brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize();
92    brk_point = roundUp(brk_point, PageBytes);
93}
94
95X86_64LiveProcess::X86_64LiveProcess(LiveProcessParams *params,
96        ObjectFile *objFile, SyscallDesc *_syscallDescs,
97        int _numSyscallDescs) :
98    X86LiveProcess(params, objFile, _syscallDescs, _numSyscallDescs)
99{
100
101    vsyscallPage.base = 0xffffffffff600000ULL;
102    vsyscallPage.size = PageBytes;
103    vsyscallPage.vtimeOffset = 0x400;
104    vsyscallPage.vgettimeofdayOffset = 0x0;
105
106    // Set up stack. On X86_64 Linux, stack goes from the top of memory
107    // downward, less the hole for the kernel address space plus one page
108    // for undertermined purposes.
109    stack_base = (Addr)0x7FFFFFFFF000ULL;
110
111    // Set pointer for next thread stack.  Reserve 8M for main stack.
112    next_thread_stack_base = stack_base - (8 * 1024 * 1024);
113
114    // Set up region for mmaps. This was determined empirically and may not
115    // always be correct.
116    mmap_start = mmap_end = (Addr)0x2aaaaaaab000ULL;
117}
118
119void
120I386LiveProcess::syscall(int64_t callnum, ThreadContext *tc)
121{
122    TheISA::PCState pc = tc->pcState();
123    Addr eip = pc.pc();
124    if (eip >= vsyscallPage.base &&
125            eip < vsyscallPage.base + vsyscallPage.size) {
126        pc.npc(vsyscallPage.base + vsyscallPage.vsysexitOffset);
127        tc->pcState(pc);
128    }
129    X86LiveProcess::syscall(callnum, tc);
130}
131
132
133I386LiveProcess::I386LiveProcess(LiveProcessParams *params,
134        ObjectFile *objFile, SyscallDesc *_syscallDescs,
135        int _numSyscallDescs) :
136    X86LiveProcess(params, objFile, _syscallDescs, _numSyscallDescs)
137{
138    _gdtStart = ULL(0xffffd000);
139    _gdtSize = PageBytes;
140
141    vsyscallPage.base = 0xffffe000ULL;
142    vsyscallPage.size = PageBytes;
143    vsyscallPage.vsyscallOffset = 0x400;
144    vsyscallPage.vsysexitOffset = 0x410;
145
146    stack_base = _gdtStart;
147
148    // Set pointer for next thread stack.  Reserve 8M for main stack.
149    next_thread_stack_base = stack_base - (8 * 1024 * 1024);
150
151    // Set up region for mmaps. This was determined empirically and may not
152    // always be correct.
153    mmap_start = mmap_end = (Addr)0xf7ffe000ULL;
154}
155
156SyscallDesc*
157X86LiveProcess::getDesc(int callnum)
158{
159    if (callnum < 0 || callnum >= numSyscallDescs)
160        return NULL;
161    return &syscallDescs[callnum];
162}
163
164void
165X86_64LiveProcess::initState()
166{
167    X86LiveProcess::initState();
168
169    argsInit(sizeof(uint64_t), PageBytes);
170
171       // Set up the vsyscall page for this process.
172    allocateMem(vsyscallPage.base, vsyscallPage.size);
173    uint8_t vtimeBlob[] = {
174        0x48,0xc7,0xc0,0xc9,0x00,0x00,0x00,    // mov    $0xc9,%rax
175        0x0f,0x05,                             // syscall
176        0xc3                                   // retq
177    };
178    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vtimeOffset,
179            vtimeBlob, sizeof(vtimeBlob));
180
181    uint8_t vgettimeofdayBlob[] = {
182        0x48,0xc7,0xc0,0x60,0x00,0x00,0x00,    // mov    $0x60,%rax
183        0x0f,0x05,                             // syscall
184        0xc3                                   // retq
185    };
186    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vgettimeofdayOffset,
187            vgettimeofdayBlob, sizeof(vgettimeofdayBlob));
188
189    if (kvmInSE) {
190        PortProxy physProxy = system->physProxy;
191
192        /*
193         * Set up the gdt.
194         */
195        uint8_t numGDTEntries = 0;
196        uint64_t nullDescriptor = 0;
197        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
198                            (uint8_t *)(&nullDescriptor), 8);
199        numGDTEntries++;
200
201        SegDescriptor initDesc = 0;
202        initDesc.type.codeOrData = 0; // code or data type
203        initDesc.type.c = 0;          // conforming
204        initDesc.type.r = 1;          // readable
205        initDesc.dpl = 0;             // privilege
206        initDesc.p = 1;               // present
207        initDesc.l = 1;               // longmode - 64 bit
208        initDesc.d = 0;               // operand size
209        initDesc.g = 1;               // granularity
210        initDesc.s = 1;               // system segment
211        initDesc.limitHigh = 0xFFFF;
212        initDesc.limitLow = 0xF;
213        initDesc.baseHigh = 0x0;
214        initDesc.baseLow = 0x0;
215
216        //64 bit code segment
217        SegDescriptor csLowPLDesc = initDesc;
218        csLowPLDesc.type.codeOrData = 1;
219        csLowPLDesc.dpl = 0;
220        uint64_t csLowPLDescVal = csLowPLDesc;
221        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
222                            (uint8_t *)(&csLowPLDescVal), 8);
223
224        numGDTEntries++;
225
226        SegSelector csLowPL = 0;
227        csLowPL.si = numGDTEntries - 1;
228        csLowPL.rpl = 0;
229
230        //64 bit data segment
231        SegDescriptor dsLowPLDesc = initDesc;
232        dsLowPLDesc.type.codeOrData = 0;
233        dsLowPLDesc.dpl = 0;
234        uint64_t dsLowPLDescVal = dsLowPLDesc;
235        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
236                            (uint8_t *)(&dsLowPLDescVal), 8);
237
238        numGDTEntries++;
239
240        SegSelector dsLowPL = 0;
241        dsLowPL.si = numGDTEntries - 1;
242        dsLowPL.rpl = 0;
243
244        //64 bit data segment
245        SegDescriptor dsDesc = initDesc;
246        dsDesc.type.codeOrData = 0;
247        dsDesc.dpl = 3;
248        uint64_t dsDescVal = dsDesc;
249        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
250                            (uint8_t *)(&dsDescVal), 8);
251
252        numGDTEntries++;
253
254        SegSelector ds = 0;
255        ds.si = numGDTEntries - 1;
256        ds.rpl = 3;
257
258        //64 bit code segment
259        SegDescriptor csDesc = initDesc;
260        csDesc.type.codeOrData = 1;
261        csDesc.dpl = 3;
262        uint64_t csDescVal = csDesc;
263        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
264                            (uint8_t *)(&csDescVal), 8);
265
266        numGDTEntries++;
267
268        SegSelector cs = 0;
269        cs.si = numGDTEntries - 1;
270        cs.rpl = 3;
271
272        SegSelector scall = 0;
273        scall.si = csLowPL.si;
274        scall.rpl = 0;
275
276        SegSelector sret = 0;
277        sret.si = dsLowPL.si;
278        sret.rpl = 3;
279
280        /* In long mode the TSS has been extended to 16 Bytes */
281        TSSlow TSSDescLow = 0;
282        TSSDescLow.type = 0xB;
283        TSSDescLow.dpl = 0; // Privelege level 0
284        TSSDescLow.p = 1; // Present
285        TSSDescLow.g = 1; // Page granularity
286        TSSDescLow.limitHigh = 0xF;
287        TSSDescLow.limitLow = 0xFFFF;
288        TSSDescLow.baseLow = bits(TSSVirtAddr, 23, 0);
289        TSSDescLow.baseHigh = bits(TSSVirtAddr, 31, 24);
290
291        TSShigh TSSDescHigh = 0;
292        TSSDescHigh.base = bits(TSSVirtAddr, 63, 32);
293
294        struct TSSDesc {
295            uint64_t low;
296            uint64_t high;
297        } tssDescVal = {TSSDescLow, TSSDescHigh};
298
299        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
300                            (uint8_t *)(&tssDescVal), sizeof(tssDescVal));
301
302        numGDTEntries++;
303
304        SegSelector tssSel = 0;
305        tssSel.si = numGDTEntries - 1;
306
307        uint64_t tss_base_addr = (TSSDescHigh.base << 32) |
308                                 (TSSDescLow.baseHigh << 24) |
309                                  TSSDescLow.baseLow;
310        uint64_t tss_limit = TSSDescLow.limitLow | (TSSDescLow.limitHigh << 16);
311
312        SegAttr tss_attr = 0;
313
314        tss_attr.type = TSSDescLow.type;
315        tss_attr.dpl = TSSDescLow.dpl;
316        tss_attr.present = TSSDescLow.p;
317        tss_attr.granularity = TSSDescLow.g;
318        tss_attr.unusable = 0;
319
320        for (int i = 0; i < contextIds.size(); i++) {
321            ThreadContext * tc = system->getThreadContext(contextIds[i]);
322
323            tc->setMiscReg(MISCREG_CS, cs);
324            tc->setMiscReg(MISCREG_DS, ds);
325            tc->setMiscReg(MISCREG_ES, ds);
326            tc->setMiscReg(MISCREG_FS, ds);
327            tc->setMiscReg(MISCREG_GS, ds);
328            tc->setMiscReg(MISCREG_SS, ds);
329
330            // LDT
331            tc->setMiscReg(MISCREG_TSL, 0);
332            SegAttr tslAttr = 0;
333            tslAttr.present = 1;
334            tslAttr.type = 2;
335            tc->setMiscReg(MISCREG_TSL_ATTR, tslAttr);
336
337            tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr);
338            tc->setMiscReg(MISCREG_TSG_LIMIT, 8 * numGDTEntries - 1);
339
340            tc->setMiscReg(MISCREG_TR, tssSel);
341            tc->setMiscReg(MISCREG_TR_BASE, tss_base_addr);
342            tc->setMiscReg(MISCREG_TR_EFF_BASE, 0);
343            tc->setMiscReg(MISCREG_TR_LIMIT, tss_limit);
344            tc->setMiscReg(MISCREG_TR_ATTR, tss_attr);
345
346            //Start using longmode segments.
347            installSegDesc(tc, SEGMENT_REG_CS, csDesc, true);
348            installSegDesc(tc, SEGMENT_REG_DS, dsDesc, true);
349            installSegDesc(tc, SEGMENT_REG_ES, dsDesc, true);
350            installSegDesc(tc, SEGMENT_REG_FS, dsDesc, true);
351            installSegDesc(tc, SEGMENT_REG_GS, dsDesc, true);
352            installSegDesc(tc, SEGMENT_REG_SS, dsDesc, true);
353
354            Efer efer = 0;
355            efer.sce = 1; // Enable system call extensions.
356            efer.lme = 1; // Enable long mode.
357            efer.lma = 1; // Activate long mode.
358            efer.nxe = 0; // Enable nx support.
359            efer.svme = 1; // Enable svm support for now.
360            efer.ffxsr = 0; // Turn on fast fxsave and fxrstor.
361            tc->setMiscReg(MISCREG_EFER, efer);
362
363            //Set up the registers that describe the operating mode.
364            CR0 cr0 = 0;
365            cr0.pg = 1; // Turn on paging.
366            cr0.cd = 0; // Don't disable caching.
367            cr0.nw = 0; // This is bit is defined to be ignored.
368            cr0.am = 1; // No alignment checking
369            cr0.wp = 1; // Supervisor mode can write read only pages
370            cr0.ne = 1;
371            cr0.et = 1; // This should always be 1
372            cr0.ts = 0; // We don't do task switching, so causing fp exceptions
373                        // would be pointless.
374            cr0.em = 0; // Allow x87 instructions to execute natively.
375            cr0.mp = 1; // This doesn't really matter, but the manual suggests
376                        // setting it to one.
377            cr0.pe = 1; // We're definitely in protected mode.
378            tc->setMiscReg(MISCREG_CR0, cr0);
379
380            CR0 cr2 = 0;
381            tc->setMiscReg(MISCREG_CR2, cr2);
382
383            CR3 cr3 = pageTablePhysAddr;
384            tc->setMiscReg(MISCREG_CR3, cr3);
385
386            CR4 cr4 = 0;
387            //Turn on pae.
388            cr4.osxsave = 1; // Enable XSAVE and Proc Extended States
389            cr4.osxmmexcpt = 1; // Operating System Unmasked Exception
390            cr4.osfxsr = 1; // Operating System FXSave/FSRSTOR Support
391            cr4.pce = 0; // Performance-Monitoring Counter Enable
392            cr4.pge = 0; // Page-Global Enable
393            cr4.mce = 0; // Machine Check Enable
394            cr4.pae = 1; // Physical-Address Extension
395            cr4.pse = 0; // Page Size Extensions
396            cr4.de = 0; // Debugging Extensions
397            cr4.tsd = 0; // Time Stamp Disable
398            cr4.pvi = 0; // Protected-Mode Virtual Interrupts
399            cr4.vme = 0; // Virtual-8086 Mode Extensions
400
401            tc->setMiscReg(MISCREG_CR4, cr4);
402
403            CR4 cr8 = 0;
404            tc->setMiscReg(MISCREG_CR8, cr8);
405
406            const Addr PageMapLevel4 = pageTablePhysAddr;
407            //Point to the page tables.
408            tc->setMiscReg(MISCREG_CR3, PageMapLevel4);
409
410            tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
411
412            tc->setMiscReg(MISCREG_APIC_BASE, 0xfee00900);
413
414            tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr);
415            tc->setMiscReg(MISCREG_TSG_LIMIT, 0xffff);
416
417            tc->setMiscReg(MISCREG_IDTR_BASE, IDTVirtAddr);
418            tc->setMiscReg(MISCREG_IDTR_LIMIT, 0xffff);
419
420            /* enabling syscall and sysret */
421            MiscReg star = ((MiscReg)sret << 48) | ((MiscReg)scall << 32);
422            tc->setMiscReg(MISCREG_STAR, star);
423            MiscReg lstar = (MiscReg)syscallCodeVirtAddr;
424            tc->setMiscReg(MISCREG_LSTAR, lstar);
425            MiscReg sfmask = (1 << 8) | (1 << 10); // TF | DF
426            tc->setMiscReg(MISCREG_SF_MASK, sfmask);
427        }
428
429        /* Set up the content of the TSS and write it to physical memory. */
430
431        struct {
432            uint32_t reserved0;        // +00h
433            uint32_t RSP0_low;         // +04h
434            uint32_t RSP0_high;        // +08h
435            uint32_t RSP1_low;         // +0Ch
436            uint32_t RSP1_high;        // +10h
437            uint32_t RSP2_low;         // +14h
438            uint32_t RSP2_high;        // +18h
439            uint32_t reserved1;        // +1Ch
440            uint32_t reserved2;        // +20h
441            uint32_t IST1_low;         // +24h
442            uint32_t IST1_high;        // +28h
443            uint32_t IST2_low;         // +2Ch
444            uint32_t IST2_high;        // +30h
445            uint32_t IST3_low;         // +34h
446            uint32_t IST3_high;        // +38h
447            uint32_t IST4_low;         // +3Ch
448            uint32_t IST4_high;        // +40h
449            uint32_t IST5_low;         // +44h
450            uint32_t IST5_high;        // +48h
451            uint32_t IST6_low;         // +4Ch
452            uint32_t IST6_high;        // +50h
453            uint32_t IST7_low;         // +54h
454            uint32_t IST7_high;        // +58h
455            uint32_t reserved3;        // +5Ch
456            uint32_t reserved4;        // +60h
457            uint16_t reserved5;        // +64h
458            uint16_t IO_MapBase;       // +66h
459        } tss;
460
461        /** setting Interrupt Stack Table */
462        uint64_t IST_start = ISTVirtAddr + PageBytes;
463        tss.IST1_low  = IST_start;
464        tss.IST1_high = IST_start >> 32;
465        tss.RSP0_low  = tss.IST1_low;
466        tss.RSP0_high = tss.IST1_high;
467        tss.RSP1_low  = tss.IST1_low;
468        tss.RSP1_high = tss.IST1_high;
469        tss.RSP2_low  = tss.IST1_low;
470        tss.RSP2_high = tss.IST1_high;
471        physProxy.writeBlob(TSSPhysAddr, (uint8_t *)(&tss), sizeof(tss));
472
473        /* Setting IDT gates */
474        GateDescriptorLow PFGateLow = 0;
475        PFGateLow.offsetHigh = bits(PFHandlerVirtAddr, 31, 16);
476        PFGateLow.offsetLow = bits(PFHandlerVirtAddr, 15, 0);
477        PFGateLow.selector = csLowPL;
478        PFGateLow.p = 1;
479        PFGateLow.dpl = 0;
480        PFGateLow.type = 0xe;      // gate interrupt type
481        PFGateLow.IST = 0;         // setting IST to 0 and using RSP0
482
483        GateDescriptorHigh PFGateHigh = 0;
484        PFGateHigh.offset = bits(PFHandlerVirtAddr, 63, 32);
485
486        struct {
487            uint64_t low;
488            uint64_t high;
489        } PFGate = {PFGateLow, PFGateHigh};
490
491        physProxy.writeBlob(IDTPhysAddr + 0xE0,
492                            (uint8_t *)(&PFGate), sizeof(PFGate));
493
494        /* System call handler */
495        uint8_t syscallBlob[] = {
496            // mov    %rax, (0xffffc90000005600)
497            0x48, 0xa3, 0x00, 0x60, 0x00,
498            0x00, 0x00, 0xc9, 0xff, 0xff,
499            // sysret
500            0x48, 0x0f, 0x07
501        };
502
503        physProxy.writeBlob(syscallCodePhysAddr,
504                            syscallBlob, sizeof(syscallBlob));
505
506        /** Page fault handler */
507        uint8_t faultBlob[] = {
508            // mov    %rax, (0xffffc90000005700)
509            0x48, 0xa3, 0x00, 0x61, 0x00,
510            0x00, 0x00, 0xc9, 0xff, 0xff,
511            // add    $0x8, %rsp # skip error
512            0x48, 0x83, 0xc4, 0x08,
513            // iretq
514            0x48, 0xcf
515        };
516
517        physProxy.writeBlob(PFHandlerPhysAddr, faultBlob, sizeof(faultBlob));
518
519        MultiLevelPageTable<PageTableOps> *pt =
520            dynamic_cast<MultiLevelPageTable<PageTableOps> *>(pTable);
521
522        /* Syscall handler */
523        pt->map(syscallCodeVirtAddr, syscallCodePhysAddr, PageBytes, false);
524        /* GDT */
525        pt->map(GDTVirtAddr, GDTPhysAddr, PageBytes, false);
526        /* IDT */
527        pt->map(IDTVirtAddr, IDTPhysAddr, PageBytes, false);
528        /* TSS */
529        pt->map(TSSVirtAddr, TSSPhysAddr, PageBytes, false);
530        /* IST */
531        pt->map(ISTVirtAddr, ISTPhysAddr, PageBytes, false);
532        /* PF handler */
533        pt->map(PFHandlerVirtAddr, PFHandlerPhysAddr, PageBytes, false);
534        /* MMIO region for m5ops */
535        pt->map(MMIORegionVirtAddr, MMIORegionPhysAddr, 16*PageBytes, false);
536    } else {
537        for (int i = 0; i < contextIds.size(); i++) {
538            ThreadContext * tc = system->getThreadContext(contextIds[i]);
539
540            SegAttr dataAttr = 0;
541            dataAttr.dpl = 3;
542            dataAttr.unusable = 0;
543            dataAttr.defaultSize = 1;
544            dataAttr.longMode = 1;
545            dataAttr.avl = 0;
546            dataAttr.granularity = 1;
547            dataAttr.present = 1;
548            dataAttr.type = 3;
549            dataAttr.writable = 1;
550            dataAttr.readable = 1;
551            dataAttr.expandDown = 0;
552            dataAttr.system = 1;
553
554            //Initialize the segment registers.
555            for(int seg = 0; seg < NUM_SEGMENTREGS; seg++) {
556                tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0);
557                tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0);
558                tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr);
559            }
560
561            SegAttr csAttr = 0;
562            csAttr.dpl = 3;
563            csAttr.unusable = 0;
564            csAttr.defaultSize = 0;
565            csAttr.longMode = 1;
566            csAttr.avl = 0;
567            csAttr.granularity = 1;
568            csAttr.present = 1;
569            csAttr.type = 10;
570            csAttr.writable = 0;
571            csAttr.readable = 1;
572            csAttr.expandDown = 0;
573            csAttr.system = 1;
574
575            tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr);
576
577            Efer efer = 0;
578            efer.sce = 1; // Enable system call extensions.
579            efer.lme = 1; // Enable long mode.
580            efer.lma = 1; // Activate long mode.
581            efer.nxe = 1; // Enable nx support.
582            efer.svme = 0; // Disable svm support for now. It isn't implemented.
583            efer.ffxsr = 1; // Turn on fast fxsave and fxrstor.
584            tc->setMiscReg(MISCREG_EFER, efer);
585
586            //Set up the registers that describe the operating mode.
587            CR0 cr0 = 0;
588            cr0.pg = 1; // Turn on paging.
589            cr0.cd = 0; // Don't disable caching.
590            cr0.nw = 0; // This is bit is defined to be ignored.
591            cr0.am = 0; // No alignment checking
592            cr0.wp = 0; // Supervisor mode can write read only pages
593            cr0.ne = 1;
594            cr0.et = 1; // This should always be 1
595            cr0.ts = 0; // We don't do task switching, so causing fp exceptions
596                        // would be pointless.
597            cr0.em = 0; // Allow x87 instructions to execute natively.
598            cr0.mp = 1; // This doesn't really matter, but the manual suggests
599                        // setting it to one.
600            cr0.pe = 1; // We're definitely in protected mode.
601            tc->setMiscReg(MISCREG_CR0, cr0);
602
603            tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
604        }
605    }
606}
607
608void
609I386LiveProcess::initState()
610{
611    X86LiveProcess::initState();
612
613    argsInit(sizeof(uint32_t), PageBytes);
614
615    /*
616     * Set up a GDT for this process. The whole GDT wouldn't really be for
617     * this process, but the only parts we care about are.
618     */
619    allocateMem(_gdtStart, _gdtSize);
620    uint64_t zero = 0;
621    assert(_gdtSize % sizeof(zero) == 0);
622    for (Addr gdtCurrent = _gdtStart;
623            gdtCurrent < _gdtStart + _gdtSize; gdtCurrent += sizeof(zero)) {
624        initVirtMem.write(gdtCurrent, zero);
625    }
626
627    // Set up the vsyscall page for this process.
628    allocateMem(vsyscallPage.base, vsyscallPage.size);
629    uint8_t vsyscallBlob[] = {
630        0x51,       // push %ecx
631        0x52,       // push %edp
632        0x55,       // push %ebp
633        0x89, 0xe5, // mov %esp, %ebp
634        0x0f, 0x34  // sysenter
635    };
636    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsyscallOffset,
637            vsyscallBlob, sizeof(vsyscallBlob));
638
639    uint8_t vsysexitBlob[] = {
640        0x5d,       // pop %ebp
641        0x5a,       // pop %edx
642        0x59,       // pop %ecx
643        0xc3        // ret
644    };
645    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsysexitOffset,
646            vsysexitBlob, sizeof(vsysexitBlob));
647
648    for (int i = 0; i < contextIds.size(); i++) {
649        ThreadContext * tc = system->getThreadContext(contextIds[i]);
650
651        SegAttr dataAttr = 0;
652        dataAttr.dpl = 3;
653        dataAttr.unusable = 0;
654        dataAttr.defaultSize = 1;
655        dataAttr.longMode = 0;
656        dataAttr.avl = 0;
657        dataAttr.granularity = 1;
658        dataAttr.present = 1;
659        dataAttr.type = 3;
660        dataAttr.writable = 1;
661        dataAttr.readable = 1;
662        dataAttr.expandDown = 0;
663        dataAttr.system = 1;
664
665        //Initialize the segment registers.
666        for(int seg = 0; seg < NUM_SEGMENTREGS; seg++) {
667            tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0);
668            tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0);
669            tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr);
670            tc->setMiscRegNoEffect(MISCREG_SEG_SEL(seg), 0xB);
671            tc->setMiscRegNoEffect(MISCREG_SEG_LIMIT(seg), (uint32_t)(-1));
672        }
673
674        SegAttr csAttr = 0;
675        csAttr.dpl = 3;
676        csAttr.unusable = 0;
677        csAttr.defaultSize = 1;
678        csAttr.longMode = 0;
679        csAttr.avl = 0;
680        csAttr.granularity = 1;
681        csAttr.present = 1;
682        csAttr.type = 0xa;
683        csAttr.writable = 0;
684        csAttr.readable = 1;
685        csAttr.expandDown = 0;
686        csAttr.system = 1;
687
688        tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr);
689
690        tc->setMiscRegNoEffect(MISCREG_TSG_BASE, _gdtStart);
691        tc->setMiscRegNoEffect(MISCREG_TSG_EFF_BASE, _gdtStart);
692        tc->setMiscRegNoEffect(MISCREG_TSG_LIMIT, _gdtStart + _gdtSize - 1);
693
694        // Set the LDT selector to 0 to deactivate it.
695        tc->setMiscRegNoEffect(MISCREG_TSL, 0);
696
697        Efer efer = 0;
698        efer.sce = 1; // Enable system call extensions.
699        efer.lme = 1; // Enable long mode.
700        efer.lma = 0; // Deactivate long mode.
701        efer.nxe = 1; // Enable nx support.
702        efer.svme = 0; // Disable svm support for now. It isn't implemented.
703        efer.ffxsr = 1; // Turn on fast fxsave and fxrstor.
704        tc->setMiscReg(MISCREG_EFER, efer);
705
706        //Set up the registers that describe the operating mode.
707        CR0 cr0 = 0;
708        cr0.pg = 1; // Turn on paging.
709        cr0.cd = 0; // Don't disable caching.
710        cr0.nw = 0; // This is bit is defined to be ignored.
711        cr0.am = 0; // No alignment checking
712        cr0.wp = 0; // Supervisor mode can write read only pages
713        cr0.ne = 1;
714        cr0.et = 1; // This should always be 1
715        cr0.ts = 0; // We don't do task switching, so causing fp exceptions
716                    // would be pointless.
717        cr0.em = 0; // Allow x87 instructions to execute natively.
718        cr0.mp = 1; // This doesn't really matter, but the manual suggests
719                    // setting it to one.
720        cr0.pe = 1; // We're definitely in protected mode.
721        tc->setMiscReg(MISCREG_CR0, cr0);
722
723        tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
724    }
725}
726
727template<class IntType>
728void
729X86LiveProcess::argsInit(int pageSize,
730        std::vector<AuxVector<IntType> > extraAuxvs)
731{
732    int intSize = sizeof(IntType);
733
734    typedef AuxVector<IntType> auxv_t;
735    std::vector<auxv_t> auxv = extraAuxvs;
736
737    string filename;
738    if(argv.size() < 1)
739        filename = "";
740    else
741        filename = argv[0];
742
743    //We want 16 byte alignment
744    uint64_t align = 16;
745
746    // load object file into target memory
747    objFile->loadSections(initVirtMem);
748
749    enum X86CpuFeature {
750        X86_OnboardFPU = 1 << 0,
751        X86_VirtualModeExtensions = 1 << 1,
752        X86_DebuggingExtensions = 1 << 2,
753        X86_PageSizeExtensions = 1 << 3,
754
755        X86_TimeStampCounter = 1 << 4,
756        X86_ModelSpecificRegisters = 1 << 5,
757        X86_PhysicalAddressExtensions = 1 << 6,
758        X86_MachineCheckExtensions = 1 << 7,
759
760        X86_CMPXCHG8Instruction = 1 << 8,
761        X86_OnboardAPIC = 1 << 9,
762        X86_SYSENTER_SYSEXIT = 1 << 11,
763
764        X86_MemoryTypeRangeRegisters = 1 << 12,
765        X86_PageGlobalEnable = 1 << 13,
766        X86_MachineCheckArchitecture = 1 << 14,
767        X86_CMOVInstruction = 1 << 15,
768
769        X86_PageAttributeTable = 1 << 16,
770        X86_36BitPSEs = 1 << 17,
771        X86_ProcessorSerialNumber = 1 << 18,
772        X86_CLFLUSHInstruction = 1 << 19,
773
774        X86_DebugTraceStore = 1 << 21,
775        X86_ACPIViaMSR = 1 << 22,
776        X86_MultimediaExtensions = 1 << 23,
777
778        X86_FXSAVE_FXRSTOR = 1 << 24,
779        X86_StreamingSIMDExtensions = 1 << 25,
780        X86_StreamingSIMDExtensions2 = 1 << 26,
781        X86_CPUSelfSnoop = 1 << 27,
782
783        X86_HyperThreading = 1 << 28,
784        X86_AutomaticClockControl = 1 << 29,
785        X86_IA64Processor = 1 << 30
786    };
787
788    // Setup the auxilliary vectors. These will already have endian conversion.
789    // Auxilliary vectors are loaded only for elf formatted executables.
790    ElfObject * elfObject = dynamic_cast<ElfObject *>(objFile);
791    if (elfObject) {
792        uint64_t features =
793            X86_OnboardFPU |
794            X86_VirtualModeExtensions |
795            X86_DebuggingExtensions |
796            X86_PageSizeExtensions |
797            X86_TimeStampCounter |
798            X86_ModelSpecificRegisters |
799            X86_PhysicalAddressExtensions |
800            X86_MachineCheckExtensions |
801            X86_CMPXCHG8Instruction |
802            X86_OnboardAPIC |
803            X86_SYSENTER_SYSEXIT |
804            X86_MemoryTypeRangeRegisters |
805            X86_PageGlobalEnable |
806            X86_MachineCheckArchitecture |
807            X86_CMOVInstruction |
808            X86_PageAttributeTable |
809            X86_36BitPSEs |
810//            X86_ProcessorSerialNumber |
811            X86_CLFLUSHInstruction |
812//            X86_DebugTraceStore |
813//            X86_ACPIViaMSR |
814            X86_MultimediaExtensions |
815            X86_FXSAVE_FXRSTOR |
816            X86_StreamingSIMDExtensions |
817            X86_StreamingSIMDExtensions2 |
818//            X86_CPUSelfSnoop |
819//            X86_HyperThreading |
820//            X86_AutomaticClockControl |
821//            X86_IA64Processor |
822            0;
823
824        //Bits which describe the system hardware capabilities
825        //XXX Figure out what these should be
826        auxv.push_back(auxv_t(M5_AT_HWCAP, features));
827        //The system page size
828        auxv.push_back(auxv_t(M5_AT_PAGESZ, X86ISA::PageBytes));
829        //Frequency at which times() increments
830        //Defined to be 100 in the kernel source.
831        auxv.push_back(auxv_t(M5_AT_CLKTCK, 100));
832        // For statically linked executables, this is the virtual address of the
833        // program header tables if they appear in the executable image
834        auxv.push_back(auxv_t(M5_AT_PHDR, elfObject->programHeaderTable()));
835        // This is the size of a program header entry from the elf file.
836        auxv.push_back(auxv_t(M5_AT_PHENT, elfObject->programHeaderSize()));
837        // This is the number of program headers from the original elf file.
838        auxv.push_back(auxv_t(M5_AT_PHNUM, elfObject->programHeaderCount()));
839        //This is the address of the elf "interpreter", It should be set
840        //to 0 for regular executables. It should be something else
841        //(not sure what) for dynamic libraries.
842        auxv.push_back(auxv_t(M5_AT_BASE, 0));
843
844        //XXX Figure out what this should be.
845        auxv.push_back(auxv_t(M5_AT_FLAGS, 0));
846        //The entry point to the program
847        auxv.push_back(auxv_t(M5_AT_ENTRY, objFile->entryPoint()));
848        //Different user and group IDs
849        auxv.push_back(auxv_t(M5_AT_UID, uid()));
850        auxv.push_back(auxv_t(M5_AT_EUID, euid()));
851        auxv.push_back(auxv_t(M5_AT_GID, gid()));
852        auxv.push_back(auxv_t(M5_AT_EGID, egid()));
853        //Whether to enable "secure mode" in the executable
854        auxv.push_back(auxv_t(M5_AT_SECURE, 0));
855        //The address of 16 "random" bytes.
856        auxv.push_back(auxv_t(M5_AT_RANDOM, 0));
857        //The name of the program
858        auxv.push_back(auxv_t(M5_AT_EXECFN, 0));
859        //The platform string
860        auxv.push_back(auxv_t(M5_AT_PLATFORM, 0));
861    }
862
863    //Figure out how big the initial stack needs to be
864
865    // A sentry NULL void pointer at the top of the stack.
866    int sentry_size = intSize;
867
868    //This is the name of the file which is present on the initial stack
869    //It's purpose is to let the user space linker examine the original file.
870    int file_name_size = filename.size() + 1;
871
872    const int numRandomBytes = 16;
873    int aux_data_size = numRandomBytes;
874
875    string platform = "x86_64";
876    aux_data_size += platform.size() + 1;
877
878    int env_data_size = 0;
879    for (int i = 0; i < envp.size(); ++i)
880        env_data_size += envp[i].size() + 1;
881    int arg_data_size = 0;
882    for (int i = 0; i < argv.size(); ++i)
883        arg_data_size += argv[i].size() + 1;
884
885    //The info_block needs to be padded so it's size is a multiple of the
886    //alignment mask. Also, it appears that there needs to be at least some
887    //padding, so if the size is already a multiple, we need to increase it
888    //anyway.
889    int base_info_block_size =
890        sentry_size + file_name_size + env_data_size + arg_data_size;
891
892    int info_block_size = roundUp(base_info_block_size, align);
893
894    int info_block_padding = info_block_size - base_info_block_size;
895
896    //Each auxilliary vector is two 8 byte words
897    int aux_array_size = intSize * 2 * (auxv.size() + 1);
898
899    int envp_array_size = intSize * (envp.size() + 1);
900    int argv_array_size = intSize * (argv.size() + 1);
901
902    int argc_size = intSize;
903
904    //Figure out the size of the contents of the actual initial frame
905    int frame_size =
906        aux_array_size +
907        envp_array_size +
908        argv_array_size +
909        argc_size;
910
911    //There needs to be padding after the auxiliary vector data so that the
912    //very bottom of the stack is aligned properly.
913    int partial_size = frame_size + aux_data_size;
914    int aligned_partial_size = roundUp(partial_size, align);
915    int aux_padding = aligned_partial_size - partial_size;
916
917    int space_needed =
918        info_block_size +
919        aux_data_size +
920        aux_padding +
921        frame_size;
922
923    stack_min = stack_base - space_needed;
924    stack_min = roundDown(stack_min, align);
925    stack_size = roundUp(stack_base - stack_min, pageSize);
926
927    // map memory
928    Addr stack_end = roundDown(stack_base - stack_size, pageSize);
929
930    DPRINTF(Stack, "Mapping the stack: 0x%x %dB\n", stack_end, stack_size);
931    allocateMem(stack_end, stack_size);
932
933    // map out initial stack contents
934    IntType sentry_base = stack_base - sentry_size;
935    IntType file_name_base = sentry_base - file_name_size;
936    IntType env_data_base = file_name_base - env_data_size;
937    IntType arg_data_base = env_data_base - arg_data_size;
938    IntType aux_data_base = arg_data_base - info_block_padding - aux_data_size;
939    IntType auxv_array_base = aux_data_base - aux_array_size - aux_padding;
940    IntType envp_array_base = auxv_array_base - envp_array_size;
941    IntType argv_array_base = envp_array_base - argv_array_size;
942    IntType argc_base = argv_array_base - argc_size;
943
944    DPRINTF(Stack, "The addresses of items on the initial stack:\n");
945    DPRINTF(Stack, "0x%x - file name\n", file_name_base);
946    DPRINTF(Stack, "0x%x - env data\n", env_data_base);
947    DPRINTF(Stack, "0x%x - arg data\n", arg_data_base);
948    DPRINTF(Stack, "0x%x - aux data\n", aux_data_base);
949    DPRINTF(Stack, "0x%x - auxv array\n", auxv_array_base);
950    DPRINTF(Stack, "0x%x - envp array\n", envp_array_base);
951    DPRINTF(Stack, "0x%x - argv array\n", argv_array_base);
952    DPRINTF(Stack, "0x%x - argc \n", argc_base);
953    DPRINTF(Stack, "0x%x - stack min\n", stack_min);
954
955    // write contents to stack
956
957    // figure out argc
958    IntType argc = argv.size();
959    IntType guestArgc = X86ISA::htog(argc);
960
961    //Write out the sentry void *
962    IntType sentry_NULL = 0;
963    initVirtMem.writeBlob(sentry_base,
964            (uint8_t*)&sentry_NULL, sentry_size);
965
966    //Write the file name
967    initVirtMem.writeString(file_name_base, filename.c_str());
968
969    //Fix up the aux vectors which point to data
970    assert(auxv[auxv.size() - 3].a_type == M5_AT_RANDOM);
971    auxv[auxv.size() - 3].a_val = aux_data_base;
972    assert(auxv[auxv.size() - 2].a_type == M5_AT_EXECFN);
973    auxv[auxv.size() - 2].a_val = argv_array_base;
974    assert(auxv[auxv.size() - 1].a_type == M5_AT_PLATFORM);
975    auxv[auxv.size() - 1].a_val = aux_data_base + numRandomBytes;
976
977    //Copy the aux stuff
978    for (int x = 0; x < auxv.size(); x++) {
979        initVirtMem.writeBlob(auxv_array_base + x * 2 * intSize,
980                (uint8_t*)&(auxv[x].a_type), intSize);
981        initVirtMem.writeBlob(auxv_array_base + (x * 2 + 1) * intSize,
982                (uint8_t*)&(auxv[x].a_val), intSize);
983    }
984    //Write out the terminating zeroed auxilliary vector
985    const uint64_t zero = 0;
986    initVirtMem.writeBlob(auxv_array_base + 2 * intSize * auxv.size(),
987            (uint8_t*)&zero, 2 * intSize);
988
989    initVirtMem.writeString(aux_data_base, platform.c_str());
990
991    copyStringArray(envp, envp_array_base, env_data_base, initVirtMem);
992    copyStringArray(argv, argv_array_base, arg_data_base, initVirtMem);
993
994    initVirtMem.writeBlob(argc_base, (uint8_t*)&guestArgc, intSize);
995
996    ThreadContext *tc = system->getThreadContext(contextIds[0]);
997    //Set the stack pointer register
998    tc->setIntReg(StackPointerReg, stack_min);
999
1000    // There doesn't need to be any segment base added in since we're dealing
1001    // with the flat segmentation model.
1002    tc->pcState(objFile->entryPoint());
1003
1004    //Align the "stack_min" to a page boundary.
1005    stack_min = roundDown(stack_min, pageSize);
1006
1007//    num_processes++;
1008}
1009
1010void
1011X86_64LiveProcess::argsInit(int intSize, int pageSize)
1012{
1013    std::vector<AuxVector<uint64_t> > extraAuxvs;
1014    extraAuxvs.push_back(AuxVector<uint64_t>(M5_AT_SYSINFO_EHDR,
1015                vsyscallPage.base));
1016    X86LiveProcess::argsInit<uint64_t>(pageSize, extraAuxvs);
1017}
1018
1019void
1020I386LiveProcess::argsInit(int intSize, int pageSize)
1021{
1022    std::vector<AuxVector<uint32_t> > extraAuxvs;
1023    //Tell the binary where the vsyscall part of the vsyscall page is.
1024    extraAuxvs.push_back(AuxVector<uint32_t>(M5_AT_SYSINFO,
1025                vsyscallPage.base + vsyscallPage.vsyscallOffset));
1026    extraAuxvs.push_back(AuxVector<uint32_t>(M5_AT_SYSINFO_EHDR,
1027                vsyscallPage.base));
1028    X86LiveProcess::argsInit<uint32_t>(pageSize, extraAuxvs);
1029}
1030
1031void
1032X86LiveProcess::setSyscallReturn(ThreadContext *tc, SyscallReturn retval)
1033{
1034    tc->setIntReg(INTREG_RAX, retval.encodedValue());
1035}
1036
1037X86ISA::IntReg
1038X86_64LiveProcess::getSyscallArg(ThreadContext *tc, int &i)
1039{
1040    assert(i < NumArgumentRegs);
1041    return tc->readIntReg(ArgumentReg[i++]);
1042}
1043
1044void
1045X86_64LiveProcess::setSyscallArg(ThreadContext *tc, int i, X86ISA::IntReg val)
1046{
1047    assert(i < NumArgumentRegs);
1048    return tc->setIntReg(ArgumentReg[i], val);
1049}
1050
1051X86ISA::IntReg
1052I386LiveProcess::getSyscallArg(ThreadContext *tc, int &i)
1053{
1054    assert(i < NumArgumentRegs32);
1055    return tc->readIntReg(ArgumentReg32[i++]);
1056}
1057
1058X86ISA::IntReg
1059I386LiveProcess::getSyscallArg(ThreadContext *tc, int &i, int width)
1060{
1061    assert(width == 32 || width == 64);
1062    assert(i < NumArgumentRegs);
1063    uint64_t retVal = tc->readIntReg(ArgumentReg32[i++]) & mask(32);
1064    if (width == 64)
1065        retVal |= ((uint64_t)tc->readIntReg(ArgumentReg[i++]) << 32);
1066    return retVal;
1067}
1068
1069void
1070I386LiveProcess::setSyscallArg(ThreadContext *tc, int i, X86ISA::IntReg val)
1071{
1072    assert(i < NumArgumentRegs);
1073    return tc->setIntReg(ArgumentReg[i], val);
1074}
1075