process.cc revision 10554:fe2e2f06a7c8
1/*
2 * Copyright (c) 2014 Advanced Micro Devices, Inc.
3 * Copyright (c) 2007 The Hewlett-Packard Development Company
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder.  You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2003-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Gabe Black
42 *          Ali Saidi
43 */
44
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/regs/segment.hh"
47#include "arch/x86/isa_traits.hh"
48#include "arch/x86/process.hh"
49#include "arch/x86/system.hh"
50#include "arch/x86/types.hh"
51#include "base/loader/elf_object.hh"
52#include "base/loader/object_file.hh"
53#include "base/misc.hh"
54#include "base/trace.hh"
55#include "cpu/thread_context.hh"
56#include "debug/Stack.hh"
57#include "mem/multi_level_page_table.hh"
58#include "mem/page_table.hh"
59#include "sim/process_impl.hh"
60#include "sim/syscall_emul.hh"
61#include "sim/system.hh"
62
63using namespace std;
64using namespace X86ISA;
65
66static const int ArgumentReg[] = {
67    INTREG_RDI,
68    INTREG_RSI,
69    INTREG_RDX,
70    //This argument register is r10 for syscalls and rcx for C.
71    INTREG_R10W,
72    //INTREG_RCX,
73    INTREG_R8W,
74    INTREG_R9W
75};
76static const int NumArgumentRegs = sizeof(ArgumentReg) / sizeof(const int);
77static const int ArgumentReg32[] = {
78    INTREG_EBX,
79    INTREG_ECX,
80    INTREG_EDX,
81    INTREG_ESI,
82    INTREG_EDI,
83};
84static const int NumArgumentRegs32 = sizeof(ArgumentReg) / sizeof(const int);
85
86X86LiveProcess::X86LiveProcess(LiveProcessParams * params, ObjectFile *objFile,
87        SyscallDesc *_syscallDescs, int _numSyscallDescs) :
88    LiveProcess(params, objFile), syscallDescs(_syscallDescs),
89    numSyscallDescs(_numSyscallDescs)
90{
91    brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize();
92    brk_point = roundUp(brk_point, PageBytes);
93}
94
95X86_64LiveProcess::X86_64LiveProcess(LiveProcessParams *params,
96        ObjectFile *objFile, SyscallDesc *_syscallDescs,
97        int _numSyscallDescs) :
98    X86LiveProcess(params, objFile, _syscallDescs, _numSyscallDescs)
99{
100
101    vsyscallPage.base = 0xffffffffff600000ULL;
102    vsyscallPage.size = PageBytes;
103    vsyscallPage.vtimeOffset = 0x400;
104    vsyscallPage.vgettimeofdayOffset = 0x0;
105
106    // Set up stack. On X86_64 Linux, stack goes from the top of memory
107    // downward, less the hole for the kernel address space plus one page
108    // for undertermined purposes.
109    stack_base = (Addr)0x7FFFFFFFF000ULL;
110
111    // Set pointer for next thread stack.  Reserve 8M for main stack.
112    next_thread_stack_base = stack_base - (8 * 1024 * 1024);
113
114    // Set up region for mmaps. This was determined empirically and may not
115    // always be correct.
116    mmap_start = mmap_end = (Addr)0x2aaaaaaab000ULL;
117}
118
119void
120I386LiveProcess::syscall(int64_t callnum, ThreadContext *tc)
121{
122    TheISA::PCState pc = tc->pcState();
123    Addr eip = pc.pc();
124    if (eip >= vsyscallPage.base &&
125            eip < vsyscallPage.base + vsyscallPage.size) {
126        pc.npc(vsyscallPage.base + vsyscallPage.vsysexitOffset);
127        tc->pcState(pc);
128    }
129    X86LiveProcess::syscall(callnum, tc);
130}
131
132
133I386LiveProcess::I386LiveProcess(LiveProcessParams *params,
134        ObjectFile *objFile, SyscallDesc *_syscallDescs,
135        int _numSyscallDescs) :
136    X86LiveProcess(params, objFile, _syscallDescs, _numSyscallDescs)
137{
138    _gdtStart = ULL(0xffffd000);
139    _gdtSize = PageBytes;
140
141    vsyscallPage.base = 0xffffe000ULL;
142    vsyscallPage.size = PageBytes;
143    vsyscallPage.vsyscallOffset = 0x400;
144    vsyscallPage.vsysexitOffset = 0x410;
145
146    stack_base = _gdtStart;
147
148    // Set pointer for next thread stack.  Reserve 8M for main stack.
149    next_thread_stack_base = stack_base - (8 * 1024 * 1024);
150
151    // Set up region for mmaps. This was determined empirically and may not
152    // always be correct.
153    mmap_start = mmap_end = (Addr)0xf7ffe000ULL;
154}
155
156SyscallDesc*
157X86LiveProcess::getDesc(int callnum)
158{
159    if (callnum < 0 || callnum >= numSyscallDescs)
160        return NULL;
161    return &syscallDescs[callnum];
162}
163
164void
165X86_64LiveProcess::initState()
166{
167    X86LiveProcess::initState();
168
169    argsInit(sizeof(uint64_t), PageBytes);
170
171       // Set up the vsyscall page for this process.
172    allocateMem(vsyscallPage.base, vsyscallPage.size);
173    uint8_t vtimeBlob[] = {
174        0x48,0xc7,0xc0,0xc9,0x00,0x00,0x00,    // mov    $0xc9,%rax
175        0x0f,0x05,                             // syscall
176        0xc3                                   // retq
177    };
178    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vtimeOffset,
179            vtimeBlob, sizeof(vtimeBlob));
180
181    uint8_t vgettimeofdayBlob[] = {
182        0x48,0xc7,0xc0,0x60,0x00,0x00,0x00,    // mov    $0x60,%rax
183        0x0f,0x05,                             // syscall
184        0xc3                                   // retq
185    };
186    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vgettimeofdayOffset,
187            vgettimeofdayBlob, sizeof(vgettimeofdayBlob));
188
189    if (kvmInSE) {
190        PortProxy physProxy = system->physProxy;
191
192        /*
193         * Set up the gdt.
194         */
195        uint8_t numGDTEntries = 0;
196        uint64_t nullDescriptor = 0;
197        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
198                            (uint8_t *)(&nullDescriptor), 8);
199        numGDTEntries++;
200
201        SegDescriptor initDesc = 0;
202        initDesc.type.codeOrData = 0; // code or data type
203        initDesc.type.c = 0;          // conforming
204        initDesc.type.r = 1;          // readable
205        initDesc.dpl = 0;             // privilege
206        initDesc.p = 1;               // present
207        initDesc.l = 1;               // longmode - 64 bit
208        initDesc.d = 0;               // operand size
209        initDesc.g = 1;               // granularity
210        initDesc.s = 1;               // system segment
211        initDesc.limitHigh = 0xFFFF;
212        initDesc.limitLow = 0xF;
213        initDesc.baseHigh = 0x0;
214        initDesc.baseLow = 0x0;
215
216        //64 bit code segment
217        SegDescriptor csLowPLDesc = initDesc;
218        csLowPLDesc.type.codeOrData = 1;
219        csLowPLDesc.dpl = 0;
220        uint64_t csLowPLDescVal = csLowPLDesc;
221        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
222                            (uint8_t *)(&csLowPLDescVal), 8);
223
224        numGDTEntries++;
225
226        SegSelector csLowPL = 0;
227        csLowPL.si = numGDTEntries - 1;
228        csLowPL.rpl = 0;
229
230        //64 bit data segment
231        SegDescriptor dsLowPLDesc = initDesc;
232        dsLowPLDesc.type.codeOrData = 0;
233        dsLowPLDesc.dpl = 0;
234        uint64_t dsLowPLDescVal = dsLowPLDesc;
235        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
236                            (uint8_t *)(&dsLowPLDescVal), 8);
237
238        numGDTEntries++;
239
240        SegSelector dsLowPL = 0;
241        dsLowPL.si = numGDTEntries - 1;
242        dsLowPL.rpl = 0;
243
244        //64 bit data segment
245        SegDescriptor dsDesc = initDesc;
246        dsDesc.type.codeOrData = 0;
247        dsDesc.dpl = 3;
248        uint64_t dsDescVal = dsDesc;
249        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
250                            (uint8_t *)(&dsDescVal), 8);
251
252        numGDTEntries++;
253
254        SegSelector ds = 0;
255        ds.si = numGDTEntries - 1;
256        ds.rpl = 3;
257
258        //64 bit code segment
259        SegDescriptor csDesc = initDesc;
260        csDesc.type.codeOrData = 1;
261        csDesc.dpl = 3;
262        uint64_t csDescVal = csDesc;
263        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
264                            (uint8_t *)(&csDescVal), 8);
265
266        numGDTEntries++;
267
268        SegSelector cs = 0;
269        cs.si = numGDTEntries - 1;
270        cs.rpl = 3;
271
272        SegSelector scall = 0;
273        scall.si = csLowPL.si;
274        scall.rpl = 0;
275
276        SegSelector sret = 0;
277        sret.si = dsLowPL.si;
278        sret.rpl = 3;
279
280        /* In long mode the TSS has been extended to 16 Bytes */
281        TSSlow TSSDescLow = 0;
282        TSSDescLow.type = 0xB;
283        TSSDescLow.dpl = 0; // Privelege level 0
284        TSSDescLow.p = 1; // Present
285        TSSDescLow.g = 1; // Page granularity
286        TSSDescLow.limitHigh = 0xF;
287        TSSDescLow.limitLow = 0xFFFF;
288        TSSDescLow.baseLow = (((uint32_t)TSSVirtAddr) << 8) >> 8;
289        TSSDescLow.baseHigh = (uint8_t)(((uint32_t)TSSVirtAddr) >> 24);
290
291        TSShigh TSSDescHigh = 0;
292        TSSDescHigh.base = (uint32_t)(TSSVirtAddr >> 32);
293
294        struct TSSDesc {
295            uint64_t low;
296            uint64_t high;
297        } tssDescVal = {TSSDescLow, TSSDescHigh};
298
299        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
300                            (uint8_t *)(&tssDescVal), sizeof(tssDescVal));
301
302        numGDTEntries++;
303
304        SegSelector tssSel = 0;
305        tssSel.si = numGDTEntries - 1;
306
307        uint64_t tss_base_addr = (TSSDescHigh.base << 32) | ((TSSDescLow.baseHigh << 24) | TSSDescLow.baseLow);
308        uint64_t tss_limit = TSSDescLow.limitLow | (TSSDescLow.limitHigh << 16);
309
310        SegAttr tss_attr = 0;
311
312        tss_attr.type = TSSDescLow.type;
313        tss_attr.dpl = TSSDescLow.dpl;
314        tss_attr.present = TSSDescLow.p;
315        tss_attr.granularity = TSSDescLow.g;
316        tss_attr.unusable = 0;
317
318        for (int i = 0; i < contextIds.size(); i++) {
319            ThreadContext * tc = system->getThreadContext(contextIds[i]);
320
321            tc->setMiscReg(MISCREG_CS, (MiscReg)cs);
322            tc->setMiscReg(MISCREG_DS, (MiscReg)ds);
323            tc->setMiscReg(MISCREG_ES, (MiscReg)ds);
324            tc->setMiscReg(MISCREG_FS, (MiscReg)ds);
325            tc->setMiscReg(MISCREG_GS, (MiscReg)ds);
326            tc->setMiscReg(MISCREG_SS, (MiscReg)ds);
327
328            // LDT
329            tc->setMiscReg(MISCREG_TSL, 0);
330            SegAttr tslAttr = 0;
331            tslAttr.present = 1;
332            tslAttr.type = 2;
333            tc->setMiscReg(MISCREG_TSL_ATTR, tslAttr);
334
335            tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr);
336            tc->setMiscReg(MISCREG_TSG_LIMIT, 8 * numGDTEntries - 1);
337
338            tc->setMiscReg(MISCREG_TR, (MiscReg)tssSel);
339            tc->setMiscReg(MISCREG_SEG_BASE(SYS_SEGMENT_REG_TR), tss_base_addr);
340            tc->setMiscReg(MISCREG_SEG_EFF_BASE(SYS_SEGMENT_REG_TR), 0);
341            tc->setMiscReg(MISCREG_SEG_LIMIT(SYS_SEGMENT_REG_TR), tss_limit);
342            tc->setMiscReg(MISCREG_SEG_ATTR(SYS_SEGMENT_REG_TR), (MiscReg)tss_attr);
343
344            //Start using longmode segments.
345            installSegDesc(tc, SEGMENT_REG_CS, csDesc, true);
346            installSegDesc(tc, SEGMENT_REG_DS, dsDesc, true);
347            installSegDesc(tc, SEGMENT_REG_ES, dsDesc, true);
348            installSegDesc(tc, SEGMENT_REG_FS, dsDesc, true);
349            installSegDesc(tc, SEGMENT_REG_GS, dsDesc, true);
350            installSegDesc(tc, SEGMENT_REG_SS, dsDesc, true);
351
352            Efer efer = 0;
353            efer.sce = 1; // Enable system call extensions.
354            efer.lme = 1; // Enable long mode.
355            efer.lma = 1; // Activate long mode.
356            efer.nxe = 0; // Enable nx support.
357            efer.svme = 1; // Enable svm support for now.
358            efer.ffxsr = 0; // Turn on fast fxsave and fxrstor.
359            tc->setMiscReg(MISCREG_EFER, efer);
360
361            //Set up the registers that describe the operating mode.
362            CR0 cr0 = 0;
363            cr0.pg = 1; // Turn on paging.
364            cr0.cd = 0; // Don't disable caching.
365            cr0.nw = 0; // This is bit is defined to be ignored.
366            cr0.am = 1; // No alignment checking
367            cr0.wp = 1; // Supervisor mode can write read only pages
368            cr0.ne = 1;
369            cr0.et = 1; // This should always be 1
370            cr0.ts = 0; // We don't do task switching, so causing fp exceptions
371                        // would be pointless.
372            cr0.em = 0; // Allow x87 instructions to execute natively.
373            cr0.mp = 1; // This doesn't really matter, but the manual suggests
374                        // setting it to one.
375            cr0.pe = 1; // We're definitely in protected mode.
376            tc->setMiscReg(MISCREG_CR0, cr0);
377
378            CR0 cr2 = 0;
379            tc->setMiscReg(MISCREG_CR2, cr2);
380
381            CR3 cr3 = pageTablePhysAddr;
382            tc->setMiscReg(MISCREG_CR3, cr3);
383
384            CR4 cr4 = 0;
385            //Turn on pae.
386            cr4.osxsave = 1; // Enable XSAVE and Proc Extended States
387            cr4.osxmmexcpt = 1; // Operating System Unmasked Exception
388            cr4.osfxsr = 1; // Operating System FXSave/FSRSTOR Support
389            cr4.pce = 0; // Performance-Monitoring Counter Enable
390            cr4.pge = 0; // Page-Global Enable
391            cr4.mce = 0; // Machine Check Enable
392            cr4.pae = 1; // Physical-Address Extension
393            cr4.pse = 0; // Page Size Extensions
394            cr4.de = 0; // Debugging Extensions
395            cr4.tsd = 0; // Time Stamp Disable
396            cr4.pvi = 0; // Protected-Mode Virtual Interrupts
397            cr4.vme = 0; // Virtual-8086 Mode Extensions
398
399            tc->setMiscReg(MISCREG_CR4, cr4);
400
401            CR4 cr8 = 0;
402            tc->setMiscReg(MISCREG_CR8, cr8);
403
404            const Addr PageMapLevel4 = pageTablePhysAddr;
405            //Point to the page tables.
406            tc->setMiscReg(MISCREG_CR3, PageMapLevel4);
407
408            tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
409
410            tc->setMiscReg(MISCREG_APIC_BASE, 0xfee00900);
411
412            tc->setMiscReg(MISCREG_SEG_BASE(MISCREG_TSG - MISCREG_SEG_SEL_BASE), GDTVirtAddr);
413            tc->setMiscReg(MISCREG_SEG_LIMIT(MISCREG_TSG - MISCREG_SEG_SEL_BASE), 0xffff);
414
415            tc->setMiscReg(MISCREG_SEG_BASE(MISCREG_IDTR - MISCREG_SEG_SEL_BASE), IDTVirtAddr);
416            tc->setMiscReg(MISCREG_SEG_LIMIT(MISCREG_IDTR - MISCREG_SEG_SEL_BASE), 0xffff);
417
418            /* enabling syscall and sysret */
419            MiscReg star = ((MiscReg)sret << 48) | ((MiscReg)scall << 32);
420            tc->setMiscReg(MISCREG_STAR, star);
421            MiscReg lstar = (MiscReg) syscallCodeVirtAddr;
422            tc->setMiscReg(MISCREG_LSTAR, lstar);
423            MiscReg sfmask = (1<<8) | (1<<10); // TF | DF
424            tc->setMiscReg(MISCREG_SF_MASK, sfmask);
425        }
426
427        /*
428         * Setting up the content of the TSS
429         * and writting it to physical memory
430         */
431
432        struct {
433            uint32_t reserved0;        // +00h
434            uint32_t RSP0_low;         // +04h
435            uint32_t RSP0_high;        // +08h
436            uint32_t RSP1_low;         // +0Ch
437            uint32_t RSP1_high;        // +10h
438            uint32_t RSP2_low;         // +14h
439            uint32_t RSP2_high;        // +18h
440            uint32_t reserved1;        // +1Ch
441            uint32_t reserved2;        // +20h
442            uint32_t IST1_low;         // +24h
443            uint32_t IST1_high;        // +28h
444            uint32_t IST2_low;         // +2Ch
445            uint32_t IST2_high;        // +30h
446            uint32_t IST3_low;         // +34h
447            uint32_t IST3_high;        // +38h
448            uint32_t IST4_low;         // +3Ch
449            uint32_t IST4_high;        // +40h
450            uint32_t IST5_low;         // +44h
451            uint32_t IST5_high;        // +48h
452            uint32_t IST6_low;         // +4Ch
453            uint32_t IST6_high;        // +50h
454            uint32_t IST7_low;         // +54h
455            uint32_t IST7_high;        // +58h
456            uint32_t reserved3;        // +5Ch
457            uint32_t reserved4;        // +60h
458            uint16_t reserved5;        // +64h
459            uint16_t IO_MapBase;       // +66h
460        } tss;
461
462        /** setting Interrupt Stack Table */
463        uint64_t IST_start = ISTVirtAddr + PageBytes;
464        tss.IST1_low  = (uint32_t)IST_start;
465        tss.IST1_high = (uint32_t)(IST_start >> 32);
466        tss.RSP0_low  = tss.IST1_low;
467        tss.RSP0_high = tss.IST1_high;
468        tss.RSP1_low  = tss.IST1_low;
469        tss.RSP1_high = tss.IST1_high;
470        tss.RSP2_low  = tss.IST1_low;
471        tss.RSP2_high = tss.IST1_high;
472        physProxy.writeBlob(TSSPhysAddr, (uint8_t *)(&tss), sizeof(tss));
473
474        /* Setting IDT gates */
475        GateDescriptorLow PFGateLow = 0;
476        PFGateLow.offsetHigh = (uint16_t)((uint32_t)PFHandlerVirtAddr >> 16);
477        PFGateLow.offsetLow = (uint16_t)PFHandlerVirtAddr;
478        PFGateLow.selector = (MiscReg)csLowPL;
479        PFGateLow.p = 1;
480        PFGateLow.dpl = 0;
481        PFGateLow.type = 0xe;      // gate interrupt type
482        PFGateLow.IST = 0;         // setting IST to 0 and using RSP0
483
484        GateDescriptorHigh PFGateHigh = 0;
485        PFGateHigh.offset = (uint32_t)(PFHandlerVirtAddr >> 32);
486
487        struct {
488            uint64_t low;
489            uint64_t high;
490        } PFGate = {PFGateLow, PFGateHigh};
491
492        physProxy.writeBlob(IDTPhysAddr + 0xE0,
493                            (uint8_t *)(&PFGate), sizeof(PFGate));
494
495        /** System call handler */
496        uint8_t syscallBlob[] = {
497            0x48,0xa3,0x00,0x60,0x00,
498            0x00,0x00,0xc9,0xff,0xff, // mov    %rax, (0xffffc90000005600)
499            0x48,0x0f,0x07,           // sysret
500        };
501
502        physProxy.writeBlob(syscallCodePhysAddr,
503                            syscallBlob, sizeof(syscallBlob));
504
505        /** Page fault handler */
506        uint8_t faultBlob[] = {
507            0x48,0xa3,0x00,0x61,0x00,
508            0x00,0x00,0xc9,0xff,0xff, // mov    %rax, (0xffffc90000005700)
509            0x48,0x83,0xc4,0x08,      // add    $0x8,%rsp # skip error
510            0x48,0xcf,                // iretq
511        };
512
513        physProxy.writeBlob(PFHandlerPhysAddr, faultBlob, sizeof(faultBlob));
514
515        MultiLevelPageTable<PageTableOps> *pt =
516            dynamic_cast<MultiLevelPageTable<PageTableOps> *>(pTable);
517
518        /* Syscall handler */
519        pt->map(syscallCodeVirtAddr, syscallCodePhysAddr, PageBytes, false);
520        /* GDT */
521        pt->map(GDTVirtAddr, GDTPhysAddr, PageBytes, false);
522        /* IDT */
523        pt->map(IDTVirtAddr, IDTPhysAddr, PageBytes, false);
524        /* TSS */
525        pt->map(TSSVirtAddr, TSSPhysAddr, PageBytes, false);
526        /* IST */
527        pt->map(ISTVirtAddr, ISTPhysAddr, PageBytes, false);
528        /* PF handler */
529        pt->map(PFHandlerVirtAddr, PFHandlerPhysAddr, PageBytes, false);
530        /* MMIO region for m5ops */
531        pt->map(MMIORegionVirtAddr, MMIORegionPhysAddr, 16*PageBytes, false);
532    } else {
533        for (int i = 0; i < contextIds.size(); i++) {
534            ThreadContext * tc = system->getThreadContext(contextIds[i]);
535
536            SegAttr dataAttr = 0;
537            dataAttr.dpl = 3;
538            dataAttr.unusable = 0;
539            dataAttr.defaultSize = 1;
540            dataAttr.longMode = 1;
541            dataAttr.avl = 0;
542            dataAttr.granularity = 1;
543            dataAttr.present = 1;
544            dataAttr.type = 3;
545            dataAttr.writable = 1;
546            dataAttr.readable = 1;
547            dataAttr.expandDown = 0;
548            dataAttr.system = 1;
549
550            //Initialize the segment registers.
551            for(int seg = 0; seg < NUM_SEGMENTREGS; seg++) {
552                tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0);
553                tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0);
554                tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr);
555            }
556
557            SegAttr csAttr = 0;
558            csAttr.dpl = 3;
559            csAttr.unusable = 0;
560            csAttr.defaultSize = 0;
561            csAttr.longMode = 1;
562            csAttr.avl = 0;
563            csAttr.granularity = 1;
564            csAttr.present = 1;
565            csAttr.type = 10;
566            csAttr.writable = 0;
567            csAttr.readable = 1;
568            csAttr.expandDown = 0;
569            csAttr.system = 1;
570
571            tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr);
572
573            Efer efer = 0;
574            efer.sce = 1; // Enable system call extensions.
575            efer.lme = 1; // Enable long mode.
576            efer.lma = 1; // Activate long mode.
577            efer.nxe = 1; // Enable nx support.
578            efer.svme = 0; // Disable svm support for now. It isn't implemented.
579            efer.ffxsr = 1; // Turn on fast fxsave and fxrstor.
580            tc->setMiscReg(MISCREG_EFER, efer);
581
582            //Set up the registers that describe the operating mode.
583            CR0 cr0 = 0;
584            cr0.pg = 1; // Turn on paging.
585            cr0.cd = 0; // Don't disable caching.
586            cr0.nw = 0; // This is bit is defined to be ignored.
587            cr0.am = 0; // No alignment checking
588            cr0.wp = 0; // Supervisor mode can write read only pages
589            cr0.ne = 1;
590            cr0.et = 1; // This should always be 1
591            cr0.ts = 0; // We don't do task switching, so causing fp exceptions
592                        // would be pointless.
593            cr0.em = 0; // Allow x87 instructions to execute natively.
594            cr0.mp = 1; // This doesn't really matter, but the manual suggests
595                        // setting it to one.
596            cr0.pe = 1; // We're definitely in protected mode.
597            tc->setMiscReg(MISCREG_CR0, cr0);
598
599            tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
600        }
601    }
602}
603
604void
605I386LiveProcess::initState()
606{
607    X86LiveProcess::initState();
608
609    argsInit(sizeof(uint32_t), PageBytes);
610
611    /*
612     * Set up a GDT for this process. The whole GDT wouldn't really be for
613     * this process, but the only parts we care about are.
614     */
615    allocateMem(_gdtStart, _gdtSize);
616    uint64_t zero = 0;
617    assert(_gdtSize % sizeof(zero) == 0);
618    for (Addr gdtCurrent = _gdtStart;
619            gdtCurrent < _gdtStart + _gdtSize; gdtCurrent += sizeof(zero)) {
620        initVirtMem.write(gdtCurrent, zero);
621    }
622
623    // Set up the vsyscall page for this process.
624    allocateMem(vsyscallPage.base, vsyscallPage.size);
625    uint8_t vsyscallBlob[] = {
626        0x51,       // push %ecx
627        0x52,       // push %edp
628        0x55,       // push %ebp
629        0x89, 0xe5, // mov %esp, %ebp
630        0x0f, 0x34  // sysenter
631    };
632    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsyscallOffset,
633            vsyscallBlob, sizeof(vsyscallBlob));
634
635    uint8_t vsysexitBlob[] = {
636        0x5d,       // pop %ebp
637        0x5a,       // pop %edx
638        0x59,       // pop %ecx
639        0xc3        // ret
640    };
641    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsysexitOffset,
642            vsysexitBlob, sizeof(vsysexitBlob));
643
644    for (int i = 0; i < contextIds.size(); i++) {
645        ThreadContext * tc = system->getThreadContext(contextIds[i]);
646
647        SegAttr dataAttr = 0;
648        dataAttr.dpl = 3;
649        dataAttr.unusable = 0;
650        dataAttr.defaultSize = 1;
651        dataAttr.longMode = 0;
652        dataAttr.avl = 0;
653        dataAttr.granularity = 1;
654        dataAttr.present = 1;
655        dataAttr.type = 3;
656        dataAttr.writable = 1;
657        dataAttr.readable = 1;
658        dataAttr.expandDown = 0;
659        dataAttr.system = 1;
660
661        //Initialize the segment registers.
662        for(int seg = 0; seg < NUM_SEGMENTREGS; seg++) {
663            tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0);
664            tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0);
665            tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr);
666            tc->setMiscRegNoEffect(MISCREG_SEG_SEL(seg), 0xB);
667            tc->setMiscRegNoEffect(MISCREG_SEG_LIMIT(seg), (uint32_t)(-1));
668        }
669
670        SegAttr csAttr = 0;
671        csAttr.dpl = 3;
672        csAttr.unusable = 0;
673        csAttr.defaultSize = 1;
674        csAttr.longMode = 0;
675        csAttr.avl = 0;
676        csAttr.granularity = 1;
677        csAttr.present = 1;
678        csAttr.type = 0xa;
679        csAttr.writable = 0;
680        csAttr.readable = 1;
681        csAttr.expandDown = 0;
682        csAttr.system = 1;
683
684        tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr);
685
686        tc->setMiscRegNoEffect(MISCREG_TSG_BASE, _gdtStart);
687        tc->setMiscRegNoEffect(MISCREG_TSG_EFF_BASE, _gdtStart);
688        tc->setMiscRegNoEffect(MISCREG_TSG_LIMIT, _gdtStart + _gdtSize - 1);
689
690        // Set the LDT selector to 0 to deactivate it.
691        tc->setMiscRegNoEffect(MISCREG_TSL, 0);
692
693        Efer efer = 0;
694        efer.sce = 1; // Enable system call extensions.
695        efer.lme = 1; // Enable long mode.
696        efer.lma = 0; // Deactivate long mode.
697        efer.nxe = 1; // Enable nx support.
698        efer.svme = 0; // Disable svm support for now. It isn't implemented.
699        efer.ffxsr = 1; // Turn on fast fxsave and fxrstor.
700        tc->setMiscReg(MISCREG_EFER, efer);
701
702        //Set up the registers that describe the operating mode.
703        CR0 cr0 = 0;
704        cr0.pg = 1; // Turn on paging.
705        cr0.cd = 0; // Don't disable caching.
706        cr0.nw = 0; // This is bit is defined to be ignored.
707        cr0.am = 0; // No alignment checking
708        cr0.wp = 0; // Supervisor mode can write read only pages
709        cr0.ne = 1;
710        cr0.et = 1; // This should always be 1
711        cr0.ts = 0; // We don't do task switching, so causing fp exceptions
712                    // would be pointless.
713        cr0.em = 0; // Allow x87 instructions to execute natively.
714        cr0.mp = 1; // This doesn't really matter, but the manual suggests
715                    // setting it to one.
716        cr0.pe = 1; // We're definitely in protected mode.
717        tc->setMiscReg(MISCREG_CR0, cr0);
718
719        tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
720    }
721}
722
723template<class IntType>
724void
725X86LiveProcess::argsInit(int pageSize,
726        std::vector<AuxVector<IntType> > extraAuxvs)
727{
728    int intSize = sizeof(IntType);
729
730    typedef AuxVector<IntType> auxv_t;
731    std::vector<auxv_t> auxv = extraAuxvs;
732
733    string filename;
734    if(argv.size() < 1)
735        filename = "";
736    else
737        filename = argv[0];
738
739    //We want 16 byte alignment
740    uint64_t align = 16;
741
742    // load object file into target memory
743    objFile->loadSections(initVirtMem);
744
745    enum X86CpuFeature {
746        X86_OnboardFPU = 1 << 0,
747        X86_VirtualModeExtensions = 1 << 1,
748        X86_DebuggingExtensions = 1 << 2,
749        X86_PageSizeExtensions = 1 << 3,
750
751        X86_TimeStampCounter = 1 << 4,
752        X86_ModelSpecificRegisters = 1 << 5,
753        X86_PhysicalAddressExtensions = 1 << 6,
754        X86_MachineCheckExtensions = 1 << 7,
755
756        X86_CMPXCHG8Instruction = 1 << 8,
757        X86_OnboardAPIC = 1 << 9,
758        X86_SYSENTER_SYSEXIT = 1 << 11,
759
760        X86_MemoryTypeRangeRegisters = 1 << 12,
761        X86_PageGlobalEnable = 1 << 13,
762        X86_MachineCheckArchitecture = 1 << 14,
763        X86_CMOVInstruction = 1 << 15,
764
765        X86_PageAttributeTable = 1 << 16,
766        X86_36BitPSEs = 1 << 17,
767        X86_ProcessorSerialNumber = 1 << 18,
768        X86_CLFLUSHInstruction = 1 << 19,
769
770        X86_DebugTraceStore = 1 << 21,
771        X86_ACPIViaMSR = 1 << 22,
772        X86_MultimediaExtensions = 1 << 23,
773
774        X86_FXSAVE_FXRSTOR = 1 << 24,
775        X86_StreamingSIMDExtensions = 1 << 25,
776        X86_StreamingSIMDExtensions2 = 1 << 26,
777        X86_CPUSelfSnoop = 1 << 27,
778
779        X86_HyperThreading = 1 << 28,
780        X86_AutomaticClockControl = 1 << 29,
781        X86_IA64Processor = 1 << 30
782    };
783
784    //Setup the auxilliary vectors. These will already have endian conversion.
785    //Auxilliary vectors are loaded only for elf formatted executables.
786    ElfObject * elfObject = dynamic_cast<ElfObject *>(objFile);
787    if(elfObject)
788    {
789        uint64_t features =
790            X86_OnboardFPU |
791            X86_VirtualModeExtensions |
792            X86_DebuggingExtensions |
793            X86_PageSizeExtensions |
794            X86_TimeStampCounter |
795            X86_ModelSpecificRegisters |
796            X86_PhysicalAddressExtensions |
797            X86_MachineCheckExtensions |
798            X86_CMPXCHG8Instruction |
799            X86_OnboardAPIC |
800            X86_SYSENTER_SYSEXIT |
801            X86_MemoryTypeRangeRegisters |
802            X86_PageGlobalEnable |
803            X86_MachineCheckArchitecture |
804            X86_CMOVInstruction |
805            X86_PageAttributeTable |
806            X86_36BitPSEs |
807//            X86_ProcessorSerialNumber |
808            X86_CLFLUSHInstruction |
809//            X86_DebugTraceStore |
810//            X86_ACPIViaMSR |
811            X86_MultimediaExtensions |
812            X86_FXSAVE_FXRSTOR |
813            X86_StreamingSIMDExtensions |
814            X86_StreamingSIMDExtensions2 |
815//            X86_CPUSelfSnoop |
816//            X86_HyperThreading |
817//            X86_AutomaticClockControl |
818//            X86_IA64Processor |
819            0;
820
821        //Bits which describe the system hardware capabilities
822        //XXX Figure out what these should be
823        auxv.push_back(auxv_t(M5_AT_HWCAP, features));
824        //The system page size
825        auxv.push_back(auxv_t(M5_AT_PAGESZ, X86ISA::PageBytes));
826        //Frequency at which times() increments
827        //Defined to be 100 in the kernel source.
828        auxv.push_back(auxv_t(M5_AT_CLKTCK, 100));
829        // For statically linked executables, this is the virtual address of the
830        // program header tables if they appear in the executable image
831        auxv.push_back(auxv_t(M5_AT_PHDR, elfObject->programHeaderTable()));
832        // This is the size of a program header entry from the elf file.
833        auxv.push_back(auxv_t(M5_AT_PHENT, elfObject->programHeaderSize()));
834        // This is the number of program headers from the original elf file.
835        auxv.push_back(auxv_t(M5_AT_PHNUM, elfObject->programHeaderCount()));
836        //This is the address of the elf "interpreter", It should be set
837        //to 0 for regular executables. It should be something else
838        //(not sure what) for dynamic libraries.
839        auxv.push_back(auxv_t(M5_AT_BASE, 0));
840
841        //XXX Figure out what this should be.
842        auxv.push_back(auxv_t(M5_AT_FLAGS, 0));
843        //The entry point to the program
844        auxv.push_back(auxv_t(M5_AT_ENTRY, objFile->entryPoint()));
845        //Different user and group IDs
846        auxv.push_back(auxv_t(M5_AT_UID, uid()));
847        auxv.push_back(auxv_t(M5_AT_EUID, euid()));
848        auxv.push_back(auxv_t(M5_AT_GID, gid()));
849        auxv.push_back(auxv_t(M5_AT_EGID, egid()));
850        //Whether to enable "secure mode" in the executable
851        auxv.push_back(auxv_t(M5_AT_SECURE, 0));
852        //The address of 16 "random" bytes.
853        auxv.push_back(auxv_t(M5_AT_RANDOM, 0));
854        //The name of the program
855        auxv.push_back(auxv_t(M5_AT_EXECFN, 0));
856        //The platform string
857        auxv.push_back(auxv_t(M5_AT_PLATFORM, 0));
858    }
859
860    //Figure out how big the initial stack needs to be
861
862    // A sentry NULL void pointer at the top of the stack.
863    int sentry_size = intSize;
864
865    //This is the name of the file which is present on the initial stack
866    //It's purpose is to let the user space linker examine the original file.
867    int file_name_size = filename.size() + 1;
868
869    const int numRandomBytes = 16;
870    int aux_data_size = numRandomBytes;
871
872    string platform = "x86_64";
873    aux_data_size += platform.size() + 1;
874
875    int env_data_size = 0;
876    for (int i = 0; i < envp.size(); ++i) {
877        env_data_size += envp[i].size() + 1;
878    }
879    int arg_data_size = 0;
880    for (int i = 0; i < argv.size(); ++i) {
881        arg_data_size += argv[i].size() + 1;
882    }
883
884    //The info_block needs to be padded so it's size is a multiple of the
885    //alignment mask. Also, it appears that there needs to be at least some
886    //padding, so if the size is already a multiple, we need to increase it
887    //anyway.
888    int base_info_block_size =
889        sentry_size + file_name_size + env_data_size + arg_data_size;
890
891    int info_block_size = roundUp(base_info_block_size, align);
892
893    int info_block_padding = info_block_size - base_info_block_size;
894
895    //Each auxilliary vector is two 8 byte words
896    int aux_array_size = intSize * 2 * (auxv.size() + 1);
897
898    int envp_array_size = intSize * (envp.size() + 1);
899    int argv_array_size = intSize * (argv.size() + 1);
900
901    int argc_size = intSize;
902
903    //Figure out the size of the contents of the actual initial frame
904    int frame_size =
905        aux_array_size +
906        envp_array_size +
907        argv_array_size +
908        argc_size;
909
910    //There needs to be padding after the auxiliary vector data so that the
911    //very bottom of the stack is aligned properly.
912    int partial_size = frame_size + aux_data_size;
913    int aligned_partial_size = roundUp(partial_size, align);
914    int aux_padding = aligned_partial_size - partial_size;
915
916    int space_needed =
917        info_block_size +
918        aux_data_size +
919        aux_padding +
920        frame_size;
921
922    stack_min = stack_base - space_needed;
923    stack_min = roundDown(stack_min, align);
924    stack_size = roundUp(stack_base - stack_min, pageSize);
925
926    // map memory
927    Addr stack_end = roundDown(stack_base - stack_size, pageSize);
928
929    DPRINTF(Stack, "Mapping the stack: 0x%x %dB\n", stack_end, stack_size);
930    allocateMem(stack_end, stack_size);
931
932    // map out initial stack contents
933    IntType sentry_base = stack_base - sentry_size;
934    IntType file_name_base = sentry_base - file_name_size;
935    IntType env_data_base = file_name_base - env_data_size;
936    IntType arg_data_base = env_data_base - arg_data_size;
937    IntType aux_data_base = arg_data_base - info_block_padding - aux_data_size;
938    IntType auxv_array_base = aux_data_base - aux_array_size - aux_padding;
939    IntType envp_array_base = auxv_array_base - envp_array_size;
940    IntType argv_array_base = envp_array_base - argv_array_size;
941    IntType argc_base = argv_array_base - argc_size;
942
943    DPRINTF(Stack, "The addresses of items on the initial stack:\n");
944    DPRINTF(Stack, "0x%x - file name\n", file_name_base);
945    DPRINTF(Stack, "0x%x - env data\n", env_data_base);
946    DPRINTF(Stack, "0x%x - arg data\n", arg_data_base);
947    DPRINTF(Stack, "0x%x - aux data\n", aux_data_base);
948    DPRINTF(Stack, "0x%x - auxv array\n", auxv_array_base);
949    DPRINTF(Stack, "0x%x - envp array\n", envp_array_base);
950    DPRINTF(Stack, "0x%x - argv array\n", argv_array_base);
951    DPRINTF(Stack, "0x%x - argc \n", argc_base);
952    DPRINTF(Stack, "0x%x - stack min\n", stack_min);
953
954    // write contents to stack
955
956    // figure out argc
957    IntType argc = argv.size();
958    IntType guestArgc = X86ISA::htog(argc);
959
960    //Write out the sentry void *
961    IntType sentry_NULL = 0;
962    initVirtMem.writeBlob(sentry_base,
963            (uint8_t*)&sentry_NULL, sentry_size);
964
965    //Write the file name
966    initVirtMem.writeString(file_name_base, filename.c_str());
967
968    //Fix up the aux vectors which point to data
969    assert(auxv[auxv.size() - 3].a_type == M5_AT_RANDOM);
970    auxv[auxv.size() - 3].a_val = aux_data_base;
971    assert(auxv[auxv.size() - 2].a_type == M5_AT_EXECFN);
972    auxv[auxv.size() - 2].a_val = argv_array_base;
973    assert(auxv[auxv.size() - 1].a_type == M5_AT_PLATFORM);
974    auxv[auxv.size() - 1].a_val = aux_data_base + numRandomBytes;
975
976    //Copy the aux stuff
977    for(int x = 0; x < auxv.size(); x++)
978    {
979        initVirtMem.writeBlob(auxv_array_base + x * 2 * intSize,
980                (uint8_t*)&(auxv[x].a_type), intSize);
981        initVirtMem.writeBlob(auxv_array_base + (x * 2 + 1) * intSize,
982                (uint8_t*)&(auxv[x].a_val), intSize);
983    }
984    //Write out the terminating zeroed auxilliary vector
985    const uint64_t zero = 0;
986    initVirtMem.writeBlob(auxv_array_base + 2 * intSize * auxv.size(),
987            (uint8_t*)&zero, 2 * intSize);
988
989    initVirtMem.writeString(aux_data_base, platform.c_str());
990
991    copyStringArray(envp, envp_array_base, env_data_base, initVirtMem);
992    copyStringArray(argv, argv_array_base, arg_data_base, initVirtMem);
993
994    initVirtMem.writeBlob(argc_base, (uint8_t*)&guestArgc, intSize);
995
996    ThreadContext *tc = system->getThreadContext(contextIds[0]);
997    //Set the stack pointer register
998    tc->setIntReg(StackPointerReg, stack_min);
999
1000    // There doesn't need to be any segment base added in since we're dealing
1001    // with the flat segmentation model.
1002    tc->pcState(objFile->entryPoint());
1003
1004    //Align the "stack_min" to a page boundary.
1005    stack_min = roundDown(stack_min, pageSize);
1006
1007//    num_processes++;
1008}
1009
1010void
1011X86_64LiveProcess::argsInit(int intSize, int pageSize)
1012{
1013    std::vector<AuxVector<uint64_t> > extraAuxvs;
1014    extraAuxvs.push_back(AuxVector<uint64_t>(M5_AT_SYSINFO_EHDR,
1015                vsyscallPage.base));
1016    X86LiveProcess::argsInit<uint64_t>(pageSize, extraAuxvs);
1017}
1018
1019void
1020I386LiveProcess::argsInit(int intSize, int pageSize)
1021{
1022    std::vector<AuxVector<uint32_t> > extraAuxvs;
1023    //Tell the binary where the vsyscall part of the vsyscall page is.
1024    extraAuxvs.push_back(AuxVector<uint32_t>(M5_AT_SYSINFO,
1025                vsyscallPage.base + vsyscallPage.vsyscallOffset));
1026    extraAuxvs.push_back(AuxVector<uint32_t>(M5_AT_SYSINFO_EHDR,
1027                vsyscallPage.base));
1028    X86LiveProcess::argsInit<uint32_t>(pageSize, extraAuxvs);
1029}
1030
1031void
1032X86LiveProcess::setSyscallReturn(ThreadContext *tc, SyscallReturn retval)
1033{
1034    tc->setIntReg(INTREG_RAX, retval.encodedValue());
1035}
1036
1037X86ISA::IntReg
1038X86_64LiveProcess::getSyscallArg(ThreadContext *tc, int &i)
1039{
1040    assert(i < NumArgumentRegs);
1041    return tc->readIntReg(ArgumentReg[i++]);
1042}
1043
1044void
1045X86_64LiveProcess::setSyscallArg(ThreadContext *tc, int i, X86ISA::IntReg val)
1046{
1047    assert(i < NumArgumentRegs);
1048    return tc->setIntReg(ArgumentReg[i], val);
1049}
1050
1051X86ISA::IntReg
1052I386LiveProcess::getSyscallArg(ThreadContext *tc, int &i)
1053{
1054    assert(i < NumArgumentRegs32);
1055    return tc->readIntReg(ArgumentReg32[i++]);
1056}
1057
1058X86ISA::IntReg
1059I386LiveProcess::getSyscallArg(ThreadContext *tc, int &i, int width)
1060{
1061    assert(width == 32 || width == 64);
1062    assert(i < NumArgumentRegs);
1063    uint64_t retVal = tc->readIntReg(ArgumentReg32[i++]) & mask(32);
1064    if (width == 64)
1065        retVal |= ((uint64_t)tc->readIntReg(ArgumentReg[i++]) << 32);
1066    return retVal;
1067}
1068
1069void
1070I386LiveProcess::setSyscallArg(ThreadContext *tc, int i, X86ISA::IntReg val)
1071{
1072    assert(i < NumArgumentRegs);
1073    return tc->setIntReg(ArgumentReg[i], val);
1074}
1075