process.cc revision 11385:dbbf54058f6f
1/*
2 * Copyright (c) 2014 Advanced Micro Devices, Inc.
3 * Copyright (c) 2007 The Hewlett-Packard Development Company
4 * All rights reserved.
5 *
6 * The license below extends only to copyright in the software and shall
7 * not be construed as granting a license to any other intellectual
8 * property including but not limited to intellectual property relating
9 * to a hardware implementation of the functionality of the software
10 * licensed hereunder.  You may use the software subject to the license
11 * terms below provided that you ensure that this notice is replicated
12 * unmodified and in its entirety in all distributions of the software,
13 * modified or unmodified, in source code or in binary form.
14 *
15 * Copyright (c) 2003-2006 The Regents of The University of Michigan
16 * All rights reserved.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are
20 * met: redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer;
22 * redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution;
25 * neither the name of the copyright holders nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * Authors: Gabe Black
42 *          Ali Saidi
43 */
44
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/regs/segment.hh"
47#include "arch/x86/isa_traits.hh"
48#include "arch/x86/process.hh"
49#include "arch/x86/system.hh"
50#include "arch/x86/types.hh"
51#include "base/loader/elf_object.hh"
52#include "base/loader/object_file.hh"
53#include "base/misc.hh"
54#include "base/trace.hh"
55#include "cpu/thread_context.hh"
56#include "debug/Stack.hh"
57#include "mem/multi_level_page_table.hh"
58#include "mem/page_table.hh"
59#include "sim/process_impl.hh"
60#include "sim/syscall_emul.hh"
61#include "sim/system.hh"
62
63using namespace std;
64using namespace X86ISA;
65
66static const int ArgumentReg[] = {
67    INTREG_RDI,
68    INTREG_RSI,
69    INTREG_RDX,
70    //This argument register is r10 for syscalls and rcx for C.
71    INTREG_R10W,
72    //INTREG_RCX,
73    INTREG_R8W,
74    INTREG_R9W
75};
76static const int NumArgumentRegs = sizeof(ArgumentReg) / sizeof(const int);
77static const int ArgumentReg32[] = {
78    INTREG_EBX,
79    INTREG_ECX,
80    INTREG_EDX,
81    INTREG_ESI,
82    INTREG_EDI,
83    INTREG_EBP
84};
85static const int NumArgumentRegs32 = sizeof(ArgumentReg) / sizeof(const int);
86
87X86LiveProcess::X86LiveProcess(LiveProcessParams * params, ObjectFile *objFile,
88        SyscallDesc *_syscallDescs, int _numSyscallDescs) :
89    LiveProcess(params, objFile), syscallDescs(_syscallDescs),
90    numSyscallDescs(_numSyscallDescs)
91{
92    brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize();
93    brk_point = roundUp(brk_point, PageBytes);
94}
95
96X86_64LiveProcess::X86_64LiveProcess(LiveProcessParams *params,
97        ObjectFile *objFile, SyscallDesc *_syscallDescs,
98        int _numSyscallDescs) :
99    X86LiveProcess(params, objFile, _syscallDescs, _numSyscallDescs)
100{
101
102    vsyscallPage.base = 0xffffffffff600000ULL;
103    vsyscallPage.size = PageBytes;
104    vsyscallPage.vtimeOffset = 0x400;
105    vsyscallPage.vgettimeofdayOffset = 0x0;
106
107    // Set up stack. On X86_64 Linux, stack goes from the top of memory
108    // downward, less the hole for the kernel address space plus one page
109    // for undertermined purposes.
110    stack_base = (Addr)0x7FFFFFFFF000ULL;
111
112    // Set pointer for next thread stack.  Reserve 8M for main stack.
113    next_thread_stack_base = stack_base - (8 * 1024 * 1024);
114
115    // Set up region for mmaps. This was determined empirically and may not
116    // always be correct.
117    mmap_start = mmap_end = (Addr)0x2aaaaaaab000ULL;
118}
119
120void
121I386LiveProcess::syscall(int64_t callnum, ThreadContext *tc)
122{
123    TheISA::PCState pc = tc->pcState();
124    Addr eip = pc.pc();
125    if (eip >= vsyscallPage.base &&
126            eip < vsyscallPage.base + vsyscallPage.size) {
127        pc.npc(vsyscallPage.base + vsyscallPage.vsysexitOffset);
128        tc->pcState(pc);
129    }
130    X86LiveProcess::syscall(callnum, tc);
131}
132
133
134I386LiveProcess::I386LiveProcess(LiveProcessParams *params,
135        ObjectFile *objFile, SyscallDesc *_syscallDescs,
136        int _numSyscallDescs) :
137    X86LiveProcess(params, objFile, _syscallDescs, _numSyscallDescs)
138{
139    _gdtStart = ULL(0xffffd000);
140    _gdtSize = PageBytes;
141
142    vsyscallPage.base = 0xffffe000ULL;
143    vsyscallPage.size = PageBytes;
144    vsyscallPage.vsyscallOffset = 0x400;
145    vsyscallPage.vsysexitOffset = 0x410;
146
147    stack_base = _gdtStart;
148
149    // Set pointer for next thread stack.  Reserve 8M for main stack.
150    next_thread_stack_base = stack_base - (8 * 1024 * 1024);
151
152    // Set up region for mmaps. This was determined empirically and may not
153    // always be correct.
154    mmap_start = mmap_end = (Addr)0xf7ffe000ULL;
155}
156
157SyscallDesc*
158X86LiveProcess::getDesc(int callnum)
159{
160    if (callnum < 0 || callnum >= numSyscallDescs)
161        return NULL;
162    return &syscallDescs[callnum];
163}
164
165void
166X86_64LiveProcess::initState()
167{
168    X86LiveProcess::initState();
169
170    argsInit(sizeof(uint64_t), PageBytes);
171
172       // Set up the vsyscall page for this process.
173    allocateMem(vsyscallPage.base, vsyscallPage.size);
174    uint8_t vtimeBlob[] = {
175        0x48,0xc7,0xc0,0xc9,0x00,0x00,0x00,    // mov    $0xc9,%rax
176        0x0f,0x05,                             // syscall
177        0xc3                                   // retq
178    };
179    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vtimeOffset,
180            vtimeBlob, sizeof(vtimeBlob));
181
182    uint8_t vgettimeofdayBlob[] = {
183        0x48,0xc7,0xc0,0x60,0x00,0x00,0x00,    // mov    $0x60,%rax
184        0x0f,0x05,                             // syscall
185        0xc3                                   // retq
186    };
187    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vgettimeofdayOffset,
188            vgettimeofdayBlob, sizeof(vgettimeofdayBlob));
189
190    if (kvmInSE) {
191        PortProxy physProxy = system->physProxy;
192
193        /*
194         * Set up the gdt.
195         */
196        uint8_t numGDTEntries = 0;
197        uint64_t nullDescriptor = 0;
198        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
199                            (uint8_t *)(&nullDescriptor), 8);
200        numGDTEntries++;
201
202        SegDescriptor initDesc = 0;
203        initDesc.type.codeOrData = 0; // code or data type
204        initDesc.type.c = 0;          // conforming
205        initDesc.type.r = 1;          // readable
206        initDesc.dpl = 0;             // privilege
207        initDesc.p = 1;               // present
208        initDesc.l = 1;               // longmode - 64 bit
209        initDesc.d = 0;               // operand size
210        initDesc.g = 1;               // granularity
211        initDesc.s = 1;               // system segment
212        initDesc.limitHigh = 0xFFFF;
213        initDesc.limitLow = 0xF;
214        initDesc.baseHigh = 0x0;
215        initDesc.baseLow = 0x0;
216
217        //64 bit code segment
218        SegDescriptor csLowPLDesc = initDesc;
219        csLowPLDesc.type.codeOrData = 1;
220        csLowPLDesc.dpl = 0;
221        uint64_t csLowPLDescVal = csLowPLDesc;
222        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
223                            (uint8_t *)(&csLowPLDescVal), 8);
224
225        numGDTEntries++;
226
227        SegSelector csLowPL = 0;
228        csLowPL.si = numGDTEntries - 1;
229        csLowPL.rpl = 0;
230
231        //64 bit data segment
232        SegDescriptor dsLowPLDesc = initDesc;
233        dsLowPLDesc.type.codeOrData = 0;
234        dsLowPLDesc.dpl = 0;
235        uint64_t dsLowPLDescVal = dsLowPLDesc;
236        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
237                            (uint8_t *)(&dsLowPLDescVal), 8);
238
239        numGDTEntries++;
240
241        SegSelector dsLowPL = 0;
242        dsLowPL.si = numGDTEntries - 1;
243        dsLowPL.rpl = 0;
244
245        //64 bit data segment
246        SegDescriptor dsDesc = initDesc;
247        dsDesc.type.codeOrData = 0;
248        dsDesc.dpl = 3;
249        uint64_t dsDescVal = dsDesc;
250        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
251                            (uint8_t *)(&dsDescVal), 8);
252
253        numGDTEntries++;
254
255        SegSelector ds = 0;
256        ds.si = numGDTEntries - 1;
257        ds.rpl = 3;
258
259        //64 bit code segment
260        SegDescriptor csDesc = initDesc;
261        csDesc.type.codeOrData = 1;
262        csDesc.dpl = 3;
263        uint64_t csDescVal = csDesc;
264        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
265                            (uint8_t *)(&csDescVal), 8);
266
267        numGDTEntries++;
268
269        SegSelector cs = 0;
270        cs.si = numGDTEntries - 1;
271        cs.rpl = 3;
272
273        SegSelector scall = 0;
274        scall.si = csLowPL.si;
275        scall.rpl = 0;
276
277        SegSelector sret = 0;
278        sret.si = dsLowPL.si;
279        sret.rpl = 3;
280
281        /* In long mode the TSS has been extended to 16 Bytes */
282        TSSlow TSSDescLow = 0;
283        TSSDescLow.type = 0xB;
284        TSSDescLow.dpl = 0; // Privelege level 0
285        TSSDescLow.p = 1; // Present
286        TSSDescLow.g = 1; // Page granularity
287        TSSDescLow.limitHigh = 0xF;
288        TSSDescLow.limitLow = 0xFFFF;
289        TSSDescLow.baseLow = bits(TSSVirtAddr, 23, 0);
290        TSSDescLow.baseHigh = bits(TSSVirtAddr, 31, 24);
291
292        TSShigh TSSDescHigh = 0;
293        TSSDescHigh.base = bits(TSSVirtAddr, 63, 32);
294
295        struct TSSDesc {
296            uint64_t low;
297            uint64_t high;
298        } tssDescVal = {TSSDescLow, TSSDescHigh};
299
300        physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8,
301                            (uint8_t *)(&tssDescVal), sizeof(tssDescVal));
302
303        numGDTEntries++;
304
305        SegSelector tssSel = 0;
306        tssSel.si = numGDTEntries - 1;
307
308        uint64_t tss_base_addr = (TSSDescHigh.base << 32) |
309                                 (TSSDescLow.baseHigh << 24) |
310                                  TSSDescLow.baseLow;
311        uint64_t tss_limit = TSSDescLow.limitLow | (TSSDescLow.limitHigh << 16);
312
313        SegAttr tss_attr = 0;
314
315        tss_attr.type = TSSDescLow.type;
316        tss_attr.dpl = TSSDescLow.dpl;
317        tss_attr.present = TSSDescLow.p;
318        tss_attr.granularity = TSSDescLow.g;
319        tss_attr.unusable = 0;
320
321        for (int i = 0; i < contextIds.size(); i++) {
322            ThreadContext * tc = system->getThreadContext(contextIds[i]);
323
324            tc->setMiscReg(MISCREG_CS, cs);
325            tc->setMiscReg(MISCREG_DS, ds);
326            tc->setMiscReg(MISCREG_ES, ds);
327            tc->setMiscReg(MISCREG_FS, ds);
328            tc->setMiscReg(MISCREG_GS, ds);
329            tc->setMiscReg(MISCREG_SS, ds);
330
331            // LDT
332            tc->setMiscReg(MISCREG_TSL, 0);
333            SegAttr tslAttr = 0;
334            tslAttr.present = 1;
335            tslAttr.type = 2;
336            tc->setMiscReg(MISCREG_TSL_ATTR, tslAttr);
337
338            tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr);
339            tc->setMiscReg(MISCREG_TSG_LIMIT, 8 * numGDTEntries - 1);
340
341            tc->setMiscReg(MISCREG_TR, tssSel);
342            tc->setMiscReg(MISCREG_TR_BASE, tss_base_addr);
343            tc->setMiscReg(MISCREG_TR_EFF_BASE, 0);
344            tc->setMiscReg(MISCREG_TR_LIMIT, tss_limit);
345            tc->setMiscReg(MISCREG_TR_ATTR, tss_attr);
346
347            //Start using longmode segments.
348            installSegDesc(tc, SEGMENT_REG_CS, csDesc, true);
349            installSegDesc(tc, SEGMENT_REG_DS, dsDesc, true);
350            installSegDesc(tc, SEGMENT_REG_ES, dsDesc, true);
351            installSegDesc(tc, SEGMENT_REG_FS, dsDesc, true);
352            installSegDesc(tc, SEGMENT_REG_GS, dsDesc, true);
353            installSegDesc(tc, SEGMENT_REG_SS, dsDesc, true);
354
355            Efer efer = 0;
356            efer.sce = 1; // Enable system call extensions.
357            efer.lme = 1; // Enable long mode.
358            efer.lma = 1; // Activate long mode.
359            efer.nxe = 0; // Enable nx support.
360            efer.svme = 1; // Enable svm support for now.
361            efer.ffxsr = 0; // Turn on fast fxsave and fxrstor.
362            tc->setMiscReg(MISCREG_EFER, efer);
363
364            //Set up the registers that describe the operating mode.
365            CR0 cr0 = 0;
366            cr0.pg = 1; // Turn on paging.
367            cr0.cd = 0; // Don't disable caching.
368            cr0.nw = 0; // This is bit is defined to be ignored.
369            cr0.am = 1; // No alignment checking
370            cr0.wp = 1; // Supervisor mode can write read only pages
371            cr0.ne = 1;
372            cr0.et = 1; // This should always be 1
373            cr0.ts = 0; // We don't do task switching, so causing fp exceptions
374                        // would be pointless.
375            cr0.em = 0; // Allow x87 instructions to execute natively.
376            cr0.mp = 1; // This doesn't really matter, but the manual suggests
377                        // setting it to one.
378            cr0.pe = 1; // We're definitely in protected mode.
379            tc->setMiscReg(MISCREG_CR0, cr0);
380
381            CR0 cr2 = 0;
382            tc->setMiscReg(MISCREG_CR2, cr2);
383
384            CR3 cr3 = pageTablePhysAddr;
385            tc->setMiscReg(MISCREG_CR3, cr3);
386
387            CR4 cr4 = 0;
388            //Turn on pae.
389            cr4.osxsave = 1; // Enable XSAVE and Proc Extended States
390            cr4.osxmmexcpt = 1; // Operating System Unmasked Exception
391            cr4.osfxsr = 1; // Operating System FXSave/FSRSTOR Support
392            cr4.pce = 0; // Performance-Monitoring Counter Enable
393            cr4.pge = 0; // Page-Global Enable
394            cr4.mce = 0; // Machine Check Enable
395            cr4.pae = 1; // Physical-Address Extension
396            cr4.pse = 0; // Page Size Extensions
397            cr4.de = 0; // Debugging Extensions
398            cr4.tsd = 0; // Time Stamp Disable
399            cr4.pvi = 0; // Protected-Mode Virtual Interrupts
400            cr4.vme = 0; // Virtual-8086 Mode Extensions
401
402            tc->setMiscReg(MISCREG_CR4, cr4);
403
404            CR4 cr8 = 0;
405            tc->setMiscReg(MISCREG_CR8, cr8);
406
407            const Addr PageMapLevel4 = pageTablePhysAddr;
408            //Point to the page tables.
409            tc->setMiscReg(MISCREG_CR3, PageMapLevel4);
410
411            tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
412
413            tc->setMiscReg(MISCREG_APIC_BASE, 0xfee00900);
414
415            tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr);
416            tc->setMiscReg(MISCREG_TSG_LIMIT, 0xffff);
417
418            tc->setMiscReg(MISCREG_IDTR_BASE, IDTVirtAddr);
419            tc->setMiscReg(MISCREG_IDTR_LIMIT, 0xffff);
420
421            /* enabling syscall and sysret */
422            MiscReg star = ((MiscReg)sret << 48) | ((MiscReg)scall << 32);
423            tc->setMiscReg(MISCREG_STAR, star);
424            MiscReg lstar = (MiscReg)syscallCodeVirtAddr;
425            tc->setMiscReg(MISCREG_LSTAR, lstar);
426            MiscReg sfmask = (1 << 8) | (1 << 10); // TF | DF
427            tc->setMiscReg(MISCREG_SF_MASK, sfmask);
428        }
429
430        /* Set up the content of the TSS and write it to physical memory. */
431
432        struct {
433            uint32_t reserved0;        // +00h
434            uint32_t RSP0_low;         // +04h
435            uint32_t RSP0_high;        // +08h
436            uint32_t RSP1_low;         // +0Ch
437            uint32_t RSP1_high;        // +10h
438            uint32_t RSP2_low;         // +14h
439            uint32_t RSP2_high;        // +18h
440            uint32_t reserved1;        // +1Ch
441            uint32_t reserved2;        // +20h
442            uint32_t IST1_low;         // +24h
443            uint32_t IST1_high;        // +28h
444            uint32_t IST2_low;         // +2Ch
445            uint32_t IST2_high;        // +30h
446            uint32_t IST3_low;         // +34h
447            uint32_t IST3_high;        // +38h
448            uint32_t IST4_low;         // +3Ch
449            uint32_t IST4_high;        // +40h
450            uint32_t IST5_low;         // +44h
451            uint32_t IST5_high;        // +48h
452            uint32_t IST6_low;         // +4Ch
453            uint32_t IST6_high;        // +50h
454            uint32_t IST7_low;         // +54h
455            uint32_t IST7_high;        // +58h
456            uint32_t reserved3;        // +5Ch
457            uint32_t reserved4;        // +60h
458            uint16_t reserved5;        // +64h
459            uint16_t IO_MapBase;       // +66h
460        } tss;
461
462        /** setting Interrupt Stack Table */
463        uint64_t IST_start = ISTVirtAddr + PageBytes;
464        tss.IST1_low  = IST_start;
465        tss.IST1_high = IST_start >> 32;
466        tss.RSP0_low  = tss.IST1_low;
467        tss.RSP0_high = tss.IST1_high;
468        tss.RSP1_low  = tss.IST1_low;
469        tss.RSP1_high = tss.IST1_high;
470        tss.RSP2_low  = tss.IST1_low;
471        tss.RSP2_high = tss.IST1_high;
472        physProxy.writeBlob(TSSPhysAddr, (uint8_t *)(&tss), sizeof(tss));
473
474        /* Setting IDT gates */
475        GateDescriptorLow PFGateLow = 0;
476        PFGateLow.offsetHigh = bits(PFHandlerVirtAddr, 31, 16);
477        PFGateLow.offsetLow = bits(PFHandlerVirtAddr, 15, 0);
478        PFGateLow.selector = csLowPL;
479        PFGateLow.p = 1;
480        PFGateLow.dpl = 0;
481        PFGateLow.type = 0xe;      // gate interrupt type
482        PFGateLow.IST = 0;         // setting IST to 0 and using RSP0
483
484        GateDescriptorHigh PFGateHigh = 0;
485        PFGateHigh.offset = bits(PFHandlerVirtAddr, 63, 32);
486
487        struct {
488            uint64_t low;
489            uint64_t high;
490        } PFGate = {PFGateLow, PFGateHigh};
491
492        physProxy.writeBlob(IDTPhysAddr + 0xE0,
493                            (uint8_t *)(&PFGate), sizeof(PFGate));
494
495        /* System call handler */
496        uint8_t syscallBlob[] = {
497            // mov    %rax, (0xffffc90000005600)
498            0x48, 0xa3, 0x00, 0x60, 0x00,
499            0x00, 0x00, 0xc9, 0xff, 0xff,
500            // sysret
501            0x48, 0x0f, 0x07
502        };
503
504        physProxy.writeBlob(syscallCodePhysAddr,
505                            syscallBlob, sizeof(syscallBlob));
506
507        /** Page fault handler */
508        uint8_t faultBlob[] = {
509            // mov    %rax, (0xffffc90000005700)
510            0x48, 0xa3, 0x00, 0x61, 0x00,
511            0x00, 0x00, 0xc9, 0xff, 0xff,
512            // add    $0x8, %rsp # skip error
513            0x48, 0x83, 0xc4, 0x08,
514            // iretq
515            0x48, 0xcf
516        };
517
518        physProxy.writeBlob(PFHandlerPhysAddr, faultBlob, sizeof(faultBlob));
519
520        MultiLevelPageTable<PageTableOps> *pt =
521            dynamic_cast<MultiLevelPageTable<PageTableOps> *>(pTable);
522
523        /* Syscall handler */
524        pt->map(syscallCodeVirtAddr, syscallCodePhysAddr, PageBytes, false);
525        /* GDT */
526        pt->map(GDTVirtAddr, GDTPhysAddr, PageBytes, false);
527        /* IDT */
528        pt->map(IDTVirtAddr, IDTPhysAddr, PageBytes, false);
529        /* TSS */
530        pt->map(TSSVirtAddr, TSSPhysAddr, PageBytes, false);
531        /* IST */
532        pt->map(ISTVirtAddr, ISTPhysAddr, PageBytes, false);
533        /* PF handler */
534        pt->map(PFHandlerVirtAddr, PFHandlerPhysAddr, PageBytes, false);
535        /* MMIO region for m5ops */
536        pt->map(MMIORegionVirtAddr, MMIORegionPhysAddr, 16*PageBytes, false);
537    } else {
538        for (int i = 0; i < contextIds.size(); i++) {
539            ThreadContext * tc = system->getThreadContext(contextIds[i]);
540
541            SegAttr dataAttr = 0;
542            dataAttr.dpl = 3;
543            dataAttr.unusable = 0;
544            dataAttr.defaultSize = 1;
545            dataAttr.longMode = 1;
546            dataAttr.avl = 0;
547            dataAttr.granularity = 1;
548            dataAttr.present = 1;
549            dataAttr.type = 3;
550            dataAttr.writable = 1;
551            dataAttr.readable = 1;
552            dataAttr.expandDown = 0;
553            dataAttr.system = 1;
554
555            //Initialize the segment registers.
556            for (int seg = 0; seg < NUM_SEGMENTREGS; seg++) {
557                tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0);
558                tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0);
559                tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr);
560            }
561
562            SegAttr csAttr = 0;
563            csAttr.dpl = 3;
564            csAttr.unusable = 0;
565            csAttr.defaultSize = 0;
566            csAttr.longMode = 1;
567            csAttr.avl = 0;
568            csAttr.granularity = 1;
569            csAttr.present = 1;
570            csAttr.type = 10;
571            csAttr.writable = 0;
572            csAttr.readable = 1;
573            csAttr.expandDown = 0;
574            csAttr.system = 1;
575
576            tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr);
577
578            Efer efer = 0;
579            efer.sce = 1; // Enable system call extensions.
580            efer.lme = 1; // Enable long mode.
581            efer.lma = 1; // Activate long mode.
582            efer.nxe = 1; // Enable nx support.
583            efer.svme = 0; // Disable svm support for now. It isn't implemented.
584            efer.ffxsr = 1; // Turn on fast fxsave and fxrstor.
585            tc->setMiscReg(MISCREG_EFER, efer);
586
587            //Set up the registers that describe the operating mode.
588            CR0 cr0 = 0;
589            cr0.pg = 1; // Turn on paging.
590            cr0.cd = 0; // Don't disable caching.
591            cr0.nw = 0; // This is bit is defined to be ignored.
592            cr0.am = 0; // No alignment checking
593            cr0.wp = 0; // Supervisor mode can write read only pages
594            cr0.ne = 1;
595            cr0.et = 1; // This should always be 1
596            cr0.ts = 0; // We don't do task switching, so causing fp exceptions
597                        // would be pointless.
598            cr0.em = 0; // Allow x87 instructions to execute natively.
599            cr0.mp = 1; // This doesn't really matter, but the manual suggests
600                        // setting it to one.
601            cr0.pe = 1; // We're definitely in protected mode.
602            tc->setMiscReg(MISCREG_CR0, cr0);
603
604            tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
605        }
606    }
607}
608
609void
610I386LiveProcess::initState()
611{
612    X86LiveProcess::initState();
613
614    argsInit(sizeof(uint32_t), PageBytes);
615
616    /*
617     * Set up a GDT for this process. The whole GDT wouldn't really be for
618     * this process, but the only parts we care about are.
619     */
620    allocateMem(_gdtStart, _gdtSize);
621    uint64_t zero = 0;
622    assert(_gdtSize % sizeof(zero) == 0);
623    for (Addr gdtCurrent = _gdtStart;
624            gdtCurrent < _gdtStart + _gdtSize; gdtCurrent += sizeof(zero)) {
625        initVirtMem.write(gdtCurrent, zero);
626    }
627
628    // Set up the vsyscall page for this process.
629    allocateMem(vsyscallPage.base, vsyscallPage.size);
630    uint8_t vsyscallBlob[] = {
631        0x51,       // push %ecx
632        0x52,       // push %edp
633        0x55,       // push %ebp
634        0x89, 0xe5, // mov %esp, %ebp
635        0x0f, 0x34  // sysenter
636    };
637    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsyscallOffset,
638            vsyscallBlob, sizeof(vsyscallBlob));
639
640    uint8_t vsysexitBlob[] = {
641        0x5d,       // pop %ebp
642        0x5a,       // pop %edx
643        0x59,       // pop %ecx
644        0xc3        // ret
645    };
646    initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsysexitOffset,
647            vsysexitBlob, sizeof(vsysexitBlob));
648
649    for (int i = 0; i < contextIds.size(); i++) {
650        ThreadContext * tc = system->getThreadContext(contextIds[i]);
651
652        SegAttr dataAttr = 0;
653        dataAttr.dpl = 3;
654        dataAttr.unusable = 0;
655        dataAttr.defaultSize = 1;
656        dataAttr.longMode = 0;
657        dataAttr.avl = 0;
658        dataAttr.granularity = 1;
659        dataAttr.present = 1;
660        dataAttr.type = 3;
661        dataAttr.writable = 1;
662        dataAttr.readable = 1;
663        dataAttr.expandDown = 0;
664        dataAttr.system = 1;
665
666        //Initialize the segment registers.
667        for (int seg = 0; seg < NUM_SEGMENTREGS; seg++) {
668            tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0);
669            tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0);
670            tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr);
671            tc->setMiscRegNoEffect(MISCREG_SEG_SEL(seg), 0xB);
672            tc->setMiscRegNoEffect(MISCREG_SEG_LIMIT(seg), (uint32_t)(-1));
673        }
674
675        SegAttr csAttr = 0;
676        csAttr.dpl = 3;
677        csAttr.unusable = 0;
678        csAttr.defaultSize = 1;
679        csAttr.longMode = 0;
680        csAttr.avl = 0;
681        csAttr.granularity = 1;
682        csAttr.present = 1;
683        csAttr.type = 0xa;
684        csAttr.writable = 0;
685        csAttr.readable = 1;
686        csAttr.expandDown = 0;
687        csAttr.system = 1;
688
689        tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr);
690
691        tc->setMiscRegNoEffect(MISCREG_TSG_BASE, _gdtStart);
692        tc->setMiscRegNoEffect(MISCREG_TSG_EFF_BASE, _gdtStart);
693        tc->setMiscRegNoEffect(MISCREG_TSG_LIMIT, _gdtStart + _gdtSize - 1);
694
695        // Set the LDT selector to 0 to deactivate it.
696        tc->setMiscRegNoEffect(MISCREG_TSL, 0);
697
698        Efer efer = 0;
699        efer.sce = 1; // Enable system call extensions.
700        efer.lme = 1; // Enable long mode.
701        efer.lma = 0; // Deactivate long mode.
702        efer.nxe = 1; // Enable nx support.
703        efer.svme = 0; // Disable svm support for now. It isn't implemented.
704        efer.ffxsr = 1; // Turn on fast fxsave and fxrstor.
705        tc->setMiscReg(MISCREG_EFER, efer);
706
707        //Set up the registers that describe the operating mode.
708        CR0 cr0 = 0;
709        cr0.pg = 1; // Turn on paging.
710        cr0.cd = 0; // Don't disable caching.
711        cr0.nw = 0; // This is bit is defined to be ignored.
712        cr0.am = 0; // No alignment checking
713        cr0.wp = 0; // Supervisor mode can write read only pages
714        cr0.ne = 1;
715        cr0.et = 1; // This should always be 1
716        cr0.ts = 0; // We don't do task switching, so causing fp exceptions
717                    // would be pointless.
718        cr0.em = 0; // Allow x87 instructions to execute natively.
719        cr0.mp = 1; // This doesn't really matter, but the manual suggests
720                    // setting it to one.
721        cr0.pe = 1; // We're definitely in protected mode.
722        tc->setMiscReg(MISCREG_CR0, cr0);
723
724        tc->setMiscReg(MISCREG_MXCSR, 0x1f80);
725    }
726}
727
728template<class IntType>
729void
730X86LiveProcess::argsInit(int pageSize,
731        std::vector<AuxVector<IntType> > extraAuxvs)
732{
733    int intSize = sizeof(IntType);
734
735    typedef AuxVector<IntType> auxv_t;
736    std::vector<auxv_t> auxv = extraAuxvs;
737
738    string filename;
739    if (argv.size() < 1)
740        filename = "";
741    else
742        filename = argv[0];
743
744    //We want 16 byte alignment
745    uint64_t align = 16;
746
747    // load object file into target memory
748    objFile->loadSections(initVirtMem);
749
750    enum X86CpuFeature {
751        X86_OnboardFPU = 1 << 0,
752        X86_VirtualModeExtensions = 1 << 1,
753        X86_DebuggingExtensions = 1 << 2,
754        X86_PageSizeExtensions = 1 << 3,
755
756        X86_TimeStampCounter = 1 << 4,
757        X86_ModelSpecificRegisters = 1 << 5,
758        X86_PhysicalAddressExtensions = 1 << 6,
759        X86_MachineCheckExtensions = 1 << 7,
760
761        X86_CMPXCHG8Instruction = 1 << 8,
762        X86_OnboardAPIC = 1 << 9,
763        X86_SYSENTER_SYSEXIT = 1 << 11,
764
765        X86_MemoryTypeRangeRegisters = 1 << 12,
766        X86_PageGlobalEnable = 1 << 13,
767        X86_MachineCheckArchitecture = 1 << 14,
768        X86_CMOVInstruction = 1 << 15,
769
770        X86_PageAttributeTable = 1 << 16,
771        X86_36BitPSEs = 1 << 17,
772        X86_ProcessorSerialNumber = 1 << 18,
773        X86_CLFLUSHInstruction = 1 << 19,
774
775        X86_DebugTraceStore = 1 << 21,
776        X86_ACPIViaMSR = 1 << 22,
777        X86_MultimediaExtensions = 1 << 23,
778
779        X86_FXSAVE_FXRSTOR = 1 << 24,
780        X86_StreamingSIMDExtensions = 1 << 25,
781        X86_StreamingSIMDExtensions2 = 1 << 26,
782        X86_CPUSelfSnoop = 1 << 27,
783
784        X86_HyperThreading = 1 << 28,
785        X86_AutomaticClockControl = 1 << 29,
786        X86_IA64Processor = 1 << 30
787    };
788
789    // Setup the auxilliary vectors. These will already have endian conversion.
790    // Auxilliary vectors are loaded only for elf formatted executables.
791    ElfObject * elfObject = dynamic_cast<ElfObject *>(objFile);
792    if (elfObject) {
793        uint64_t features =
794            X86_OnboardFPU |
795            X86_VirtualModeExtensions |
796            X86_DebuggingExtensions |
797            X86_PageSizeExtensions |
798            X86_TimeStampCounter |
799            X86_ModelSpecificRegisters |
800            X86_PhysicalAddressExtensions |
801            X86_MachineCheckExtensions |
802            X86_CMPXCHG8Instruction |
803            X86_OnboardAPIC |
804            X86_SYSENTER_SYSEXIT |
805            X86_MemoryTypeRangeRegisters |
806            X86_PageGlobalEnable |
807            X86_MachineCheckArchitecture |
808            X86_CMOVInstruction |
809            X86_PageAttributeTable |
810            X86_36BitPSEs |
811//            X86_ProcessorSerialNumber |
812            X86_CLFLUSHInstruction |
813//            X86_DebugTraceStore |
814//            X86_ACPIViaMSR |
815            X86_MultimediaExtensions |
816            X86_FXSAVE_FXRSTOR |
817            X86_StreamingSIMDExtensions |
818            X86_StreamingSIMDExtensions2 |
819//            X86_CPUSelfSnoop |
820//            X86_HyperThreading |
821//            X86_AutomaticClockControl |
822//            X86_IA64Processor |
823            0;
824
825        //Bits which describe the system hardware capabilities
826        //XXX Figure out what these should be
827        auxv.push_back(auxv_t(M5_AT_HWCAP, features));
828        //The system page size
829        auxv.push_back(auxv_t(M5_AT_PAGESZ, X86ISA::PageBytes));
830        //Frequency at which times() increments
831        //Defined to be 100 in the kernel source.
832        auxv.push_back(auxv_t(M5_AT_CLKTCK, 100));
833        // For statically linked executables, this is the virtual address of the
834        // program header tables if they appear in the executable image
835        auxv.push_back(auxv_t(M5_AT_PHDR, elfObject->programHeaderTable()));
836        // This is the size of a program header entry from the elf file.
837        auxv.push_back(auxv_t(M5_AT_PHENT, elfObject->programHeaderSize()));
838        // This is the number of program headers from the original elf file.
839        auxv.push_back(auxv_t(M5_AT_PHNUM, elfObject->programHeaderCount()));
840        //This is the address of the elf "interpreter", It should be set
841        //to 0 for regular executables. It should be something else
842        //(not sure what) for dynamic libraries.
843        auxv.push_back(auxv_t(M5_AT_BASE, 0));
844
845        //XXX Figure out what this should be.
846        auxv.push_back(auxv_t(M5_AT_FLAGS, 0));
847        //The entry point to the program
848        auxv.push_back(auxv_t(M5_AT_ENTRY, objFile->entryPoint()));
849        //Different user and group IDs
850        auxv.push_back(auxv_t(M5_AT_UID, uid()));
851        auxv.push_back(auxv_t(M5_AT_EUID, euid()));
852        auxv.push_back(auxv_t(M5_AT_GID, gid()));
853        auxv.push_back(auxv_t(M5_AT_EGID, egid()));
854        //Whether to enable "secure mode" in the executable
855        auxv.push_back(auxv_t(M5_AT_SECURE, 0));
856        //The address of 16 "random" bytes.
857        auxv.push_back(auxv_t(M5_AT_RANDOM, 0));
858        //The name of the program
859        auxv.push_back(auxv_t(M5_AT_EXECFN, 0));
860        //The platform string
861        auxv.push_back(auxv_t(M5_AT_PLATFORM, 0));
862    }
863
864    //Figure out how big the initial stack needs to be
865
866    // A sentry NULL void pointer at the top of the stack.
867    int sentry_size = intSize;
868
869    //This is the name of the file which is present on the initial stack
870    //It's purpose is to let the user space linker examine the original file.
871    int file_name_size = filename.size() + 1;
872
873    const int numRandomBytes = 16;
874    int aux_data_size = numRandomBytes;
875
876    string platform = "x86_64";
877    aux_data_size += platform.size() + 1;
878
879    int env_data_size = 0;
880    for (int i = 0; i < envp.size(); ++i)
881        env_data_size += envp[i].size() + 1;
882    int arg_data_size = 0;
883    for (int i = 0; i < argv.size(); ++i)
884        arg_data_size += argv[i].size() + 1;
885
886    //The info_block needs to be padded so it's size is a multiple of the
887    //alignment mask. Also, it appears that there needs to be at least some
888    //padding, so if the size is already a multiple, we need to increase it
889    //anyway.
890    int base_info_block_size =
891        sentry_size + file_name_size + env_data_size + arg_data_size;
892
893    int info_block_size = roundUp(base_info_block_size, align);
894
895    int info_block_padding = info_block_size - base_info_block_size;
896
897    //Each auxilliary vector is two 8 byte words
898    int aux_array_size = intSize * 2 * (auxv.size() + 1);
899
900    int envp_array_size = intSize * (envp.size() + 1);
901    int argv_array_size = intSize * (argv.size() + 1);
902
903    int argc_size = intSize;
904
905    //Figure out the size of the contents of the actual initial frame
906    int frame_size =
907        aux_array_size +
908        envp_array_size +
909        argv_array_size +
910        argc_size;
911
912    //There needs to be padding after the auxiliary vector data so that the
913    //very bottom of the stack is aligned properly.
914    int partial_size = frame_size + aux_data_size;
915    int aligned_partial_size = roundUp(partial_size, align);
916    int aux_padding = aligned_partial_size - partial_size;
917
918    int space_needed =
919        info_block_size +
920        aux_data_size +
921        aux_padding +
922        frame_size;
923
924    stack_min = stack_base - space_needed;
925    stack_min = roundDown(stack_min, align);
926    stack_size = roundUp(stack_base - stack_min, pageSize);
927
928    // map memory
929    Addr stack_end = roundDown(stack_base - stack_size, pageSize);
930
931    DPRINTF(Stack, "Mapping the stack: 0x%x %dB\n", stack_end, stack_size);
932    allocateMem(stack_end, stack_size);
933
934    // map out initial stack contents
935    IntType sentry_base = stack_base - sentry_size;
936    IntType file_name_base = sentry_base - file_name_size;
937    IntType env_data_base = file_name_base - env_data_size;
938    IntType arg_data_base = env_data_base - arg_data_size;
939    IntType aux_data_base = arg_data_base - info_block_padding - aux_data_size;
940    IntType auxv_array_base = aux_data_base - aux_array_size - aux_padding;
941    IntType envp_array_base = auxv_array_base - envp_array_size;
942    IntType argv_array_base = envp_array_base - argv_array_size;
943    IntType argc_base = argv_array_base - argc_size;
944
945    DPRINTF(Stack, "The addresses of items on the initial stack:\n");
946    DPRINTF(Stack, "0x%x - file name\n", file_name_base);
947    DPRINTF(Stack, "0x%x - env data\n", env_data_base);
948    DPRINTF(Stack, "0x%x - arg data\n", arg_data_base);
949    DPRINTF(Stack, "0x%x - aux data\n", aux_data_base);
950    DPRINTF(Stack, "0x%x - auxv array\n", auxv_array_base);
951    DPRINTF(Stack, "0x%x - envp array\n", envp_array_base);
952    DPRINTF(Stack, "0x%x - argv array\n", argv_array_base);
953    DPRINTF(Stack, "0x%x - argc \n", argc_base);
954    DPRINTF(Stack, "0x%x - stack min\n", stack_min);
955
956    // write contents to stack
957
958    // figure out argc
959    IntType argc = argv.size();
960    IntType guestArgc = X86ISA::htog(argc);
961
962    //Write out the sentry void *
963    IntType sentry_NULL = 0;
964    initVirtMem.writeBlob(sentry_base,
965            (uint8_t*)&sentry_NULL, sentry_size);
966
967    //Write the file name
968    initVirtMem.writeString(file_name_base, filename.c_str());
969
970    //Fix up the aux vectors which point to data
971    assert(auxv[auxv.size() - 3].a_type == M5_AT_RANDOM);
972    auxv[auxv.size() - 3].a_val = aux_data_base;
973    assert(auxv[auxv.size() - 2].a_type == M5_AT_EXECFN);
974    auxv[auxv.size() - 2].a_val = argv_array_base;
975    assert(auxv[auxv.size() - 1].a_type == M5_AT_PLATFORM);
976    auxv[auxv.size() - 1].a_val = aux_data_base + numRandomBytes;
977
978    //Copy the aux stuff
979    for (int x = 0; x < auxv.size(); x++) {
980        initVirtMem.writeBlob(auxv_array_base + x * 2 * intSize,
981                (uint8_t*)&(auxv[x].a_type), intSize);
982        initVirtMem.writeBlob(auxv_array_base + (x * 2 + 1) * intSize,
983                (uint8_t*)&(auxv[x].a_val), intSize);
984    }
985    //Write out the terminating zeroed auxilliary vector
986    const uint64_t zero = 0;
987    initVirtMem.writeBlob(auxv_array_base + auxv.size() * 2 * intSize,
988                          (uint8_t*)&zero, intSize);
989    initVirtMem.writeBlob(auxv_array_base + (auxv.size() * 2 + 1) * intSize,
990                          (uint8_t*)&zero, intSize);
991
992    initVirtMem.writeString(aux_data_base, platform.c_str());
993
994    copyStringArray(envp, envp_array_base, env_data_base, initVirtMem);
995    copyStringArray(argv, argv_array_base, arg_data_base, initVirtMem);
996
997    initVirtMem.writeBlob(argc_base, (uint8_t*)&guestArgc, intSize);
998
999    ThreadContext *tc = system->getThreadContext(contextIds[0]);
1000    //Set the stack pointer register
1001    tc->setIntReg(StackPointerReg, stack_min);
1002
1003    // There doesn't need to be any segment base added in since we're dealing
1004    // with the flat segmentation model.
1005    tc->pcState(objFile->entryPoint());
1006
1007    //Align the "stack_min" to a page boundary.
1008    stack_min = roundDown(stack_min, pageSize);
1009
1010//    num_processes++;
1011}
1012
1013void
1014X86_64LiveProcess::argsInit(int intSize, int pageSize)
1015{
1016    std::vector<AuxVector<uint64_t> > extraAuxvs;
1017    extraAuxvs.push_back(AuxVector<uint64_t>(M5_AT_SYSINFO_EHDR,
1018                vsyscallPage.base));
1019    X86LiveProcess::argsInit<uint64_t>(pageSize, extraAuxvs);
1020}
1021
1022void
1023I386LiveProcess::argsInit(int intSize, int pageSize)
1024{
1025    std::vector<AuxVector<uint32_t> > extraAuxvs;
1026    //Tell the binary where the vsyscall part of the vsyscall page is.
1027    extraAuxvs.push_back(AuxVector<uint32_t>(M5_AT_SYSINFO,
1028                vsyscallPage.base + vsyscallPage.vsyscallOffset));
1029    extraAuxvs.push_back(AuxVector<uint32_t>(M5_AT_SYSINFO_EHDR,
1030                vsyscallPage.base));
1031    X86LiveProcess::argsInit<uint32_t>(pageSize, extraAuxvs);
1032}
1033
1034void
1035X86LiveProcess::setSyscallReturn(ThreadContext *tc, SyscallReturn retval)
1036{
1037    tc->setIntReg(INTREG_RAX, retval.encodedValue());
1038}
1039
1040X86ISA::IntReg
1041X86_64LiveProcess::getSyscallArg(ThreadContext *tc, int &i)
1042{
1043    assert(i < NumArgumentRegs);
1044    return tc->readIntReg(ArgumentReg[i++]);
1045}
1046
1047void
1048X86_64LiveProcess::setSyscallArg(ThreadContext *tc, int i, X86ISA::IntReg val)
1049{
1050    assert(i < NumArgumentRegs);
1051    return tc->setIntReg(ArgumentReg[i], val);
1052}
1053
1054X86ISA::IntReg
1055I386LiveProcess::getSyscallArg(ThreadContext *tc, int &i)
1056{
1057    assert(i < NumArgumentRegs32);
1058    return tc->readIntReg(ArgumentReg32[i++]);
1059}
1060
1061X86ISA::IntReg
1062I386LiveProcess::getSyscallArg(ThreadContext *tc, int &i, int width)
1063{
1064    assert(width == 32 || width == 64);
1065    assert(i < NumArgumentRegs);
1066    uint64_t retVal = tc->readIntReg(ArgumentReg32[i++]) & mask(32);
1067    if (width == 64)
1068        retVal |= ((uint64_t)tc->readIntReg(ArgumentReg[i++]) << 32);
1069    return retVal;
1070}
1071
1072void
1073I386LiveProcess::setSyscallArg(ThreadContext *tc, int i, X86ISA::IntReg val)
1074{
1075    assert(i < NumArgumentRegs);
1076    return tc->setIntReg(ArgumentReg[i], val);
1077}
1078