system.cc revision 7901
1/*
2 * Copyright (c) 2007 The Hewlett-Packard Development Company
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabe Black
38 */
39
40#include "arch/x86/bios/smbios.hh"
41#include "arch/x86/bios/intelmp.hh"
42#include "arch/x86/isa_traits.hh"
43#include "arch/x86/regs/misc.hh"
44#include "arch/x86/system.hh"
45#include "arch/vtophys.hh"
46#include "base/intmath.hh"
47#include "base/loader/object_file.hh"
48#include "base/loader/symtab.hh"
49#include "base/trace.hh"
50#include "cpu/thread_context.hh"
51#include "mem/physical.hh"
52#include "params/X86System.hh"
53#include "sim/byteswap.hh"
54
55
56using namespace LittleEndianGuest;
57using namespace X86ISA;
58
59X86System::X86System(Params *p) :
60    System(p), smbiosTable(p->smbios_table),
61    mpFloatingPointer(p->intel_mp_pointer),
62    mpConfigTable(p->intel_mp_table),
63    rsdp(p->acpi_description_table_pointer)
64{
65    if (kernel->getArch() == ObjectFile::I386)
66        fatal("Loading a 32 bit x86 kernel is not supported.\n");
67}
68
69static void
70installSegDesc(ThreadContext *tc, SegmentRegIndex seg,
71        SegDescriptor desc, bool longmode)
72{
73    uint64_t base = desc.baseLow + (desc.baseHigh << 24);
74    bool honorBase = !longmode || seg == SEGMENT_REG_FS ||
75                                  seg == SEGMENT_REG_GS ||
76                                  seg == SEGMENT_REG_TSL ||
77                                  seg == SYS_SEGMENT_REG_TR;
78    uint64_t limit = desc.limitLow | (desc.limitHigh << 16);
79
80    SegAttr attr = 0;
81
82    attr.dpl = desc.dpl;
83    attr.unusable = 0;
84    attr.defaultSize = desc.d;
85    attr.longMode = desc.l;
86    attr.avl = desc.avl;
87    attr.granularity = desc.g;
88    attr.present = desc.p;
89    attr.system = desc.s;
90    attr.type = desc.type;
91    if (desc.s) {
92        if (desc.type.codeOrData) {
93            // Code segment
94            attr.expandDown = 0;
95            attr.readable = desc.type.r;
96            attr.writable = 0;
97        } else {
98            // Data segment
99            attr.expandDown = desc.type.e;
100            attr.readable = 1;
101            attr.writable = desc.type.w;
102        }
103    } else {
104        attr.readable = 1;
105        attr.writable = 1;
106        attr.expandDown = 0;
107    }
108
109    tc->setMiscReg(MISCREG_SEG_BASE(seg), base);
110    tc->setMiscReg(MISCREG_SEG_EFF_BASE(seg), honorBase ? base : 0);
111    tc->setMiscReg(MISCREG_SEG_LIMIT(seg), limit);
112    tc->setMiscReg(MISCREG_SEG_ATTR(seg), (MiscReg)attr);
113}
114
115void
116X86System::initState()
117{
118    System::initState();
119
120    ThreadContext *tc = threadContexts[0];
121    // This is the boot strap processor (BSP). Initialize it to look like
122    // the boot loader has just turned control over to the 64 bit OS. We
123    // won't actually set up real mode or legacy protected mode descriptor
124    // tables because we aren't executing any code that would require
125    // them. We do, however toggle the control bits in the correct order
126    // while allowing consistency checks and the underlying mechansims
127    // just to be safe.
128
129    const int NumPDTs = 4;
130
131    const Addr PageMapLevel4 = 0x70000;
132    const Addr PageDirPtrTable = 0x71000;
133    const Addr PageDirTable[NumPDTs] =
134        {0x72000, 0x73000, 0x74000, 0x75000};
135    const Addr GDTBase = 0x76000;
136
137    const int PML4Bits = 9;
138    const int PDPTBits = 9;
139    const int PDTBits = 9;
140
141    // Get a port to write the page tables and descriptor tables.
142    FunctionalPort * physPort = tc->getPhysPort();
143
144    /*
145     * Set up the gdt.
146     */
147    uint8_t numGDTEntries = 0;
148    // Place holder at selector 0
149    uint64_t nullDescriptor = 0;
150    physPort->writeBlob(GDTBase + numGDTEntries * 8,
151            (uint8_t *)(&nullDescriptor), 8);
152    numGDTEntries++;
153
154    //64 bit code segment
155    SegDescriptor csDesc = 0;
156    csDesc.type.codeOrData = 1;
157    csDesc.type.c = 0; // Not conforming
158    csDesc.type.r = 1; // Readable
159    csDesc.dpl = 0; // Privelege level 0
160    csDesc.p = 1; // Present
161    csDesc.l = 1; // 64 bit
162    csDesc.d = 0; // default operand size
163    csDesc.g = 1; // Page granularity
164    csDesc.s = 1; // Not a system segment
165    csDesc.limitHigh = 0xF;
166    csDesc.limitLow = 0xFF;
167    //Because we're dealing with a pointer and I don't think it's
168    //guaranteed that there isn't anything in a nonvirtual class between
169    //it's beginning in memory and it's actual data, we'll use an
170    //intermediary.
171    uint64_t csDescVal = csDesc;
172    physPort->writeBlob(GDTBase + numGDTEntries * 8,
173            (uint8_t *)(&csDescVal), 8);
174
175    numGDTEntries++;
176
177    SegSelector cs = 0;
178    cs.si = numGDTEntries - 1;
179
180    tc->setMiscReg(MISCREG_CS, (MiscReg)cs);
181
182    //32 bit data segment
183    SegDescriptor dsDesc = 0;
184    dsDesc.type.codeOrData = 0;
185    dsDesc.type.e = 0; // Not expand down
186    dsDesc.type.w = 1; // Writable
187    dsDesc.dpl = 0; // Privelege level 0
188    dsDesc.p = 1; // Present
189    dsDesc.d = 1; // default operand size
190    dsDesc.g = 1; // Page granularity
191    dsDesc.s = 1; // Not a system segment
192    dsDesc.limitHigh = 0xF;
193    dsDesc.limitLow = 0xFF;
194    uint64_t dsDescVal = dsDesc;
195    physPort->writeBlob(GDTBase + numGDTEntries * 8,
196            (uint8_t *)(&dsDescVal), 8);
197
198    numGDTEntries++;
199
200    SegSelector ds = 0;
201    ds.si = numGDTEntries - 1;
202
203    tc->setMiscReg(MISCREG_DS, (MiscReg)ds);
204    tc->setMiscReg(MISCREG_ES, (MiscReg)ds);
205    tc->setMiscReg(MISCREG_FS, (MiscReg)ds);
206    tc->setMiscReg(MISCREG_GS, (MiscReg)ds);
207    tc->setMiscReg(MISCREG_SS, (MiscReg)ds);
208
209    tc->setMiscReg(MISCREG_TSL, 0);
210    tc->setMiscReg(MISCREG_TSG_BASE, GDTBase);
211    tc->setMiscReg(MISCREG_TSG_LIMIT, 8 * numGDTEntries - 1);
212
213    SegDescriptor tssDesc = 0;
214    tssDesc.type = 0xB;
215    tssDesc.dpl = 0; // Privelege level 0
216    tssDesc.p = 1; // Present
217    tssDesc.d = 1; // default operand size
218    tssDesc.g = 1; // Page granularity
219    tssDesc.s = 1; // Not a system segment
220    tssDesc.limitHigh = 0xF;
221    tssDesc.limitLow = 0xFF;
222    uint64_t tssDescVal = tssDesc;
223    physPort->writeBlob(GDTBase + numGDTEntries * 8,
224            (uint8_t *)(&tssDescVal), 8);
225
226    numGDTEntries++;
227
228    SegSelector tss = 0;
229    tss.si = numGDTEntries - 1;
230
231    tc->setMiscReg(MISCREG_TR, (MiscReg)tss);
232    installSegDesc(tc, SYS_SEGMENT_REG_TR, tssDesc, true);
233
234    /*
235     * Identity map the first 4GB of memory. In order to map this region
236     * of memory in long mode, there needs to be one actual page map level
237     * 4 entry which points to one page directory pointer table which
238     * points to 4 different page directory tables which are full of two
239     * megabyte pages. All of the other entries in valid tables are set
240     * to indicate that they don't pertain to anything valid and will
241     * cause a fault if used.
242     */
243
244    // Put valid values in all of the various table entries which indicate
245    // that those entries don't point to further tables or pages. Then
246    // set the values of those entries which are needed.
247
248    // Page Map Level 4
249
250    // read/write, user, not present
251    uint64_t pml4e = X86ISA::htog(0x6);
252    for (int offset = 0; offset < (1 << PML4Bits) * 8; offset += 8) {
253        physPort->writeBlob(PageMapLevel4 + offset, (uint8_t *)(&pml4e), 8);
254    }
255    // Point to the only PDPT
256    pml4e = X86ISA::htog(0x7 | PageDirPtrTable);
257    physPort->writeBlob(PageMapLevel4, (uint8_t *)(&pml4e), 8);
258
259    // Page Directory Pointer Table
260
261    // read/write, user, not present
262    uint64_t pdpe = X86ISA::htog(0x6);
263    for (int offset = 0; offset < (1 << PDPTBits) * 8; offset += 8) {
264        physPort->writeBlob(PageDirPtrTable + offset,
265                (uint8_t *)(&pdpe), 8);
266    }
267    // Point to the PDTs
268    for (int table = 0; table < NumPDTs; table++) {
269        pdpe = X86ISA::htog(0x7 | PageDirTable[table]);
270        physPort->writeBlob(PageDirPtrTable + table * 8,
271                (uint8_t *)(&pdpe), 8);
272    }
273
274    // Page Directory Tables
275
276    Addr base = 0;
277    const Addr pageSize = 2 << 20;
278    for (int table = 0; table < NumPDTs; table++) {
279        for (int offset = 0; offset < (1 << PDTBits) * 8; offset += 8) {
280            // read/write, user, present, 4MB
281            uint64_t pdte = X86ISA::htog(0x87 | base);
282            physPort->writeBlob(PageDirTable[table] + offset,
283                    (uint8_t *)(&pdte), 8);
284            base += pageSize;
285        }
286    }
287
288    /*
289     * Transition from real mode all the way up to Long mode
290     */
291    CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
292    //Turn off paging.
293    cr0.pg = 0;
294    tc->setMiscReg(MISCREG_CR0, cr0);
295    //Turn on protected mode.
296    cr0.pe = 1;
297    tc->setMiscReg(MISCREG_CR0, cr0);
298
299    CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
300    //Turn on pae.
301    cr4.pae = 1;
302    tc->setMiscReg(MISCREG_CR4, cr4);
303
304    //Point to the page tables.
305    tc->setMiscReg(MISCREG_CR3, PageMapLevel4);
306
307    Efer efer = tc->readMiscRegNoEffect(MISCREG_EFER);
308    //Enable long mode.
309    efer.lme = 1;
310    tc->setMiscReg(MISCREG_EFER, efer);
311
312    //Start using longmode segments.
313    installSegDesc(tc, SEGMENT_REG_CS, csDesc, true);
314    installSegDesc(tc, SEGMENT_REG_DS, dsDesc, true);
315    installSegDesc(tc, SEGMENT_REG_ES, dsDesc, true);
316    installSegDesc(tc, SEGMENT_REG_FS, dsDesc, true);
317    installSegDesc(tc, SEGMENT_REG_GS, dsDesc, true);
318    installSegDesc(tc, SEGMENT_REG_SS, dsDesc, true);
319
320    //Activate long mode.
321    cr0.pg = 1;
322    tc->setMiscReg(MISCREG_CR0, cr0);
323
324    tc->pcState(tc->getSystemPtr()->kernelEntry);
325
326    // We should now be in long mode. Yay!
327
328    Addr ebdaPos = 0xF0000;
329    Addr fixed, table;
330
331    //Write out the SMBios/DMI table
332    writeOutSMBiosTable(ebdaPos, fixed, table);
333    ebdaPos += (fixed + table);
334    ebdaPos = roundUp(ebdaPos, 16);
335
336    //Write out the Intel MP Specification configuration table
337    writeOutMPTable(ebdaPos, fixed, table);
338    ebdaPos += (fixed + table);
339}
340
341void
342X86System::writeOutSMBiosTable(Addr header,
343        Addr &headerSize, Addr &structSize, Addr table)
344{
345    // Get a port to write the table and header to memory.
346    FunctionalPort * physPort = threadContexts[0]->getPhysPort();
347
348    // If the table location isn't specified, just put it after the header.
349    // The header size as of the 2.5 SMBios specification is 0x1F bytes
350    if (!table)
351        table = header + 0x1F;
352    smbiosTable->setTableAddr(table);
353
354    smbiosTable->writeOut(physPort, header, headerSize, structSize);
355
356    // Do some bounds checking to make sure we at least didn't step on
357    // ourselves.
358    assert(header > table || header + headerSize <= table);
359    assert(table > header || table + structSize <= header);
360}
361
362void
363X86System::writeOutMPTable(Addr fp,
364        Addr &fpSize, Addr &tableSize, Addr table)
365{
366    // Get a port to write the table and header to memory.
367    FunctionalPort * physPort = threadContexts[0]->getPhysPort();
368
369    // If the table location isn't specified and it exists, just put
370    // it after the floating pointer. The fp size as of the 1.4 Intel MP
371    // specification is 0x10 bytes.
372    if (mpConfigTable) {
373        if (!table)
374            table = fp + 0x10;
375        mpFloatingPointer->setTableAddr(table);
376    }
377
378    fpSize = mpFloatingPointer->writeOut(physPort, fp);
379    if (mpConfigTable)
380        tableSize = mpConfigTable->writeOut(physPort, table);
381    else
382        tableSize = 0;
383
384    // Do some bounds checking to make sure we at least didn't step on
385    // ourselves and the fp structure was the size we thought it was.
386    assert(fp > table || fp + fpSize <= table);
387    assert(table > fp || table + tableSize <= fp);
388    assert(fpSize == 0x10);
389}
390
391
392X86System::~X86System()
393{
394    delete smbiosTable;
395}
396
397void
398X86System::serialize(std::ostream &os)
399{
400    System::serialize(os);
401}
402
403
404void
405X86System::unserialize(Checkpoint *cp, const std::string &section)
406{
407    System::unserialize(cp,section);
408}
409
410X86System *
411X86SystemParams::create()
412{
413    return new X86System(this);
414}
415