176} 177 178SyscallDesc* 179X86Process::getDesc(int callnum) 180{ 181 if (callnum < 0 || callnum >= numSyscallDescs) 182 return NULL; 183 return &syscallDescs[callnum]; 184} 185 186void 187X86_64Process::initState() 188{ 189 X86Process::initState(); 190 191 argsInit(PageBytes); 192 193 // Set up the vsyscall page for this process. 194 allocateMem(vsyscallPage.base, vsyscallPage.size); 195 uint8_t vtimeBlob[] = { 196 0x48,0xc7,0xc0,0xc9,0x00,0x00,0x00, // mov $0xc9,%rax 197 0x0f,0x05, // syscall 198 0xc3 // retq 199 }; 200 initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vtimeOffset, 201 vtimeBlob, sizeof(vtimeBlob)); 202 203 uint8_t vgettimeofdayBlob[] = { 204 0x48,0xc7,0xc0,0x60,0x00,0x00,0x00, // mov $0x60,%rax 205 0x0f,0x05, // syscall 206 0xc3 // retq 207 }; 208 initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vgettimeofdayOffset, 209 vgettimeofdayBlob, sizeof(vgettimeofdayBlob)); 210 211 if (kvmInSE) { 212 PortProxy physProxy = system->physProxy; 213 214 /* 215 * Set up the gdt. 216 */ 217 uint8_t numGDTEntries = 0; 218 uint64_t nullDescriptor = 0; 219 physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, 220 (uint8_t *)(&nullDescriptor), 8); 221 numGDTEntries++; 222 223 SegDescriptor initDesc = 0; 224 initDesc.type.codeOrData = 0; // code or data type 225 initDesc.type.c = 0; // conforming 226 initDesc.type.r = 1; // readable 227 initDesc.dpl = 0; // privilege 228 initDesc.p = 1; // present 229 initDesc.l = 1; // longmode - 64 bit 230 initDesc.d = 0; // operand size 231 initDesc.g = 1; // granularity 232 initDesc.s = 1; // system segment 233 initDesc.limitHigh = 0xFFFF; 234 initDesc.limitLow = 0xF; 235 initDesc.baseHigh = 0x0; 236 initDesc.baseLow = 0x0; 237 238 //64 bit code segment 239 SegDescriptor csLowPLDesc = initDesc; 240 csLowPLDesc.type.codeOrData = 1; 241 csLowPLDesc.dpl = 0; 242 uint64_t csLowPLDescVal = csLowPLDesc; 243 physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, 244 (uint8_t *)(&csLowPLDescVal), 8); 245 246 numGDTEntries++; 247 248 SegSelector csLowPL = 0; 249 csLowPL.si = numGDTEntries - 1; 250 csLowPL.rpl = 0; 251 252 //64 bit data segment 253 SegDescriptor dsLowPLDesc = initDesc; 254 dsLowPLDesc.type.codeOrData = 0; 255 dsLowPLDesc.dpl = 0; 256 uint64_t dsLowPLDescVal = dsLowPLDesc; 257 physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, 258 (uint8_t *)(&dsLowPLDescVal), 8); 259 260 numGDTEntries++; 261 262 SegSelector dsLowPL = 0; 263 dsLowPL.si = numGDTEntries - 1; 264 dsLowPL.rpl = 0; 265 266 //64 bit data segment 267 SegDescriptor dsDesc = initDesc; 268 dsDesc.type.codeOrData = 0; 269 dsDesc.dpl = 3; 270 uint64_t dsDescVal = dsDesc; 271 physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, 272 (uint8_t *)(&dsDescVal), 8); 273 274 numGDTEntries++; 275 276 SegSelector ds = 0; 277 ds.si = numGDTEntries - 1; 278 ds.rpl = 3; 279 280 //64 bit code segment 281 SegDescriptor csDesc = initDesc; 282 csDesc.type.codeOrData = 1; 283 csDesc.dpl = 3; 284 uint64_t csDescVal = csDesc; 285 physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, 286 (uint8_t *)(&csDescVal), 8); 287 288 numGDTEntries++; 289 290 SegSelector cs = 0; 291 cs.si = numGDTEntries - 1; 292 cs.rpl = 3; 293 294 SegSelector scall = 0; 295 scall.si = csLowPL.si; 296 scall.rpl = 0; 297 298 SegSelector sret = 0; 299 sret.si = dsLowPL.si; 300 sret.rpl = 3; 301 302 /* In long mode the TSS has been extended to 16 Bytes */ 303 TSSlow TSSDescLow = 0; 304 TSSDescLow.type = 0xB; 305 TSSDescLow.dpl = 0; // Privelege level 0 306 TSSDescLow.p = 1; // Present 307 TSSDescLow.g = 1; // Page granularity 308 TSSDescLow.limitHigh = 0xF; 309 TSSDescLow.limitLow = 0xFFFF; 310 TSSDescLow.baseLow = bits(TSSVirtAddr, 23, 0); 311 TSSDescLow.baseHigh = bits(TSSVirtAddr, 31, 24); 312 313 TSShigh TSSDescHigh = 0; 314 TSSDescHigh.base = bits(TSSVirtAddr, 63, 32); 315 316 struct TSSDesc { 317 uint64_t low; 318 uint64_t high; 319 } tssDescVal = {TSSDescLow, TSSDescHigh}; 320 321 physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, 322 (uint8_t *)(&tssDescVal), sizeof(tssDescVal)); 323 324 numGDTEntries++; 325 326 SegSelector tssSel = 0; 327 tssSel.si = numGDTEntries - 1; 328 329 uint64_t tss_base_addr = (TSSDescHigh.base << 32) | 330 (TSSDescLow.baseHigh << 24) | 331 TSSDescLow.baseLow; 332 uint64_t tss_limit = TSSDescLow.limitLow | (TSSDescLow.limitHigh << 16); 333 334 SegAttr tss_attr = 0; 335 336 tss_attr.type = TSSDescLow.type; 337 tss_attr.dpl = TSSDescLow.dpl; 338 tss_attr.present = TSSDescLow.p; 339 tss_attr.granularity = TSSDescLow.g; 340 tss_attr.unusable = 0; 341 342 for (int i = 0; i < contextIds.size(); i++) { 343 ThreadContext * tc = system->getThreadContext(contextIds[i]); 344 345 tc->setMiscReg(MISCREG_CS, cs); 346 tc->setMiscReg(MISCREG_DS, ds); 347 tc->setMiscReg(MISCREG_ES, ds); 348 tc->setMiscReg(MISCREG_FS, ds); 349 tc->setMiscReg(MISCREG_GS, ds); 350 tc->setMiscReg(MISCREG_SS, ds); 351 352 // LDT 353 tc->setMiscReg(MISCREG_TSL, 0); 354 SegAttr tslAttr = 0; 355 tslAttr.present = 1; 356 tslAttr.type = 2; 357 tc->setMiscReg(MISCREG_TSL_ATTR, tslAttr); 358 359 tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr); 360 tc->setMiscReg(MISCREG_TSG_LIMIT, 8 * numGDTEntries - 1); 361 362 tc->setMiscReg(MISCREG_TR, tssSel); 363 tc->setMiscReg(MISCREG_TR_BASE, tss_base_addr); 364 tc->setMiscReg(MISCREG_TR_EFF_BASE, 0); 365 tc->setMiscReg(MISCREG_TR_LIMIT, tss_limit); 366 tc->setMiscReg(MISCREG_TR_ATTR, tss_attr); 367 368 //Start using longmode segments. 369 installSegDesc(tc, SEGMENT_REG_CS, csDesc, true); 370 installSegDesc(tc, SEGMENT_REG_DS, dsDesc, true); 371 installSegDesc(tc, SEGMENT_REG_ES, dsDesc, true); 372 installSegDesc(tc, SEGMENT_REG_FS, dsDesc, true); 373 installSegDesc(tc, SEGMENT_REG_GS, dsDesc, true); 374 installSegDesc(tc, SEGMENT_REG_SS, dsDesc, true); 375 376 Efer efer = 0; 377 efer.sce = 1; // Enable system call extensions. 378 efer.lme = 1; // Enable long mode. 379 efer.lma = 1; // Activate long mode. 380 efer.nxe = 0; // Enable nx support. 381 efer.svme = 1; // Enable svm support for now. 382 efer.ffxsr = 0; // Turn on fast fxsave and fxrstor. 383 tc->setMiscReg(MISCREG_EFER, efer); 384 385 //Set up the registers that describe the operating mode. 386 CR0 cr0 = 0; 387 cr0.pg = 1; // Turn on paging. 388 cr0.cd = 0; // Don't disable caching. 389 cr0.nw = 0; // This is bit is defined to be ignored. 390 cr0.am = 1; // No alignment checking 391 cr0.wp = 1; // Supervisor mode can write read only pages 392 cr0.ne = 1; 393 cr0.et = 1; // This should always be 1 394 cr0.ts = 0; // We don't do task switching, so causing fp exceptions 395 // would be pointless. 396 cr0.em = 0; // Allow x87 instructions to execute natively. 397 cr0.mp = 1; // This doesn't really matter, but the manual suggests 398 // setting it to one. 399 cr0.pe = 1; // We're definitely in protected mode. 400 tc->setMiscReg(MISCREG_CR0, cr0); 401 402 CR0 cr2 = 0; 403 tc->setMiscReg(MISCREG_CR2, cr2); 404 405 CR3 cr3 = pageTablePhysAddr; 406 tc->setMiscReg(MISCREG_CR3, cr3); 407 408 CR4 cr4 = 0; 409 //Turn on pae. 410 cr4.osxsave = 1; // Enable XSAVE and Proc Extended States 411 cr4.osxmmexcpt = 1; // Operating System Unmasked Exception 412 cr4.osfxsr = 1; // Operating System FXSave/FSRSTOR Support 413 cr4.pce = 0; // Performance-Monitoring Counter Enable 414 cr4.pge = 0; // Page-Global Enable 415 cr4.mce = 0; // Machine Check Enable 416 cr4.pae = 1; // Physical-Address Extension 417 cr4.pse = 0; // Page Size Extensions 418 cr4.de = 0; // Debugging Extensions 419 cr4.tsd = 0; // Time Stamp Disable 420 cr4.pvi = 0; // Protected-Mode Virtual Interrupts 421 cr4.vme = 0; // Virtual-8086 Mode Extensions 422 423 tc->setMiscReg(MISCREG_CR4, cr4); 424 425 CR4 cr8 = 0; 426 tc->setMiscReg(MISCREG_CR8, cr8); 427 428 const Addr PageMapLevel4 = pageTablePhysAddr; 429 //Point to the page tables. 430 tc->setMiscReg(MISCREG_CR3, PageMapLevel4); 431 432 tc->setMiscReg(MISCREG_MXCSR, 0x1f80); 433 434 tc->setMiscReg(MISCREG_APIC_BASE, 0xfee00900); 435 436 tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr); 437 tc->setMiscReg(MISCREG_TSG_LIMIT, 0xffff); 438 439 tc->setMiscReg(MISCREG_IDTR_BASE, IDTVirtAddr); 440 tc->setMiscReg(MISCREG_IDTR_LIMIT, 0xffff); 441 442 /* enabling syscall and sysret */ 443 MiscReg star = ((MiscReg)sret << 48) | ((MiscReg)scall << 32); 444 tc->setMiscReg(MISCREG_STAR, star); 445 MiscReg lstar = (MiscReg)syscallCodeVirtAddr; 446 tc->setMiscReg(MISCREG_LSTAR, lstar); 447 MiscReg sfmask = (1 << 8) | (1 << 10); // TF | DF 448 tc->setMiscReg(MISCREG_SF_MASK, sfmask); 449 } 450 451 /* Set up the content of the TSS and write it to physical memory. */ 452 453 struct { 454 uint32_t reserved0; // +00h 455 uint32_t RSP0_low; // +04h 456 uint32_t RSP0_high; // +08h 457 uint32_t RSP1_low; // +0Ch 458 uint32_t RSP1_high; // +10h 459 uint32_t RSP2_low; // +14h 460 uint32_t RSP2_high; // +18h 461 uint32_t reserved1; // +1Ch 462 uint32_t reserved2; // +20h 463 uint32_t IST1_low; // +24h 464 uint32_t IST1_high; // +28h 465 uint32_t IST2_low; // +2Ch 466 uint32_t IST2_high; // +30h 467 uint32_t IST3_low; // +34h 468 uint32_t IST3_high; // +38h 469 uint32_t IST4_low; // +3Ch 470 uint32_t IST4_high; // +40h 471 uint32_t IST5_low; // +44h 472 uint32_t IST5_high; // +48h 473 uint32_t IST6_low; // +4Ch 474 uint32_t IST6_high; // +50h 475 uint32_t IST7_low; // +54h 476 uint32_t IST7_high; // +58h 477 uint32_t reserved3; // +5Ch 478 uint32_t reserved4; // +60h 479 uint16_t reserved5; // +64h 480 uint16_t IO_MapBase; // +66h 481 } tss; 482 483 /** setting Interrupt Stack Table */ 484 uint64_t IST_start = ISTVirtAddr + PageBytes; 485 tss.IST1_low = IST_start; 486 tss.IST1_high = IST_start >> 32; 487 tss.RSP0_low = tss.IST1_low; 488 tss.RSP0_high = tss.IST1_high; 489 tss.RSP1_low = tss.IST1_low; 490 tss.RSP1_high = tss.IST1_high; 491 tss.RSP2_low = tss.IST1_low; 492 tss.RSP2_high = tss.IST1_high; 493 physProxy.writeBlob(TSSPhysAddr, (uint8_t *)(&tss), sizeof(tss)); 494 495 /* Setting IDT gates */ 496 GateDescriptorLow PFGateLow = 0; 497 PFGateLow.offsetHigh = bits(PFHandlerVirtAddr, 31, 16); 498 PFGateLow.offsetLow = bits(PFHandlerVirtAddr, 15, 0); 499 PFGateLow.selector = csLowPL; 500 PFGateLow.p = 1; 501 PFGateLow.dpl = 0; 502 PFGateLow.type = 0xe; // gate interrupt type 503 PFGateLow.IST = 0; // setting IST to 0 and using RSP0 504 505 GateDescriptorHigh PFGateHigh = 0; 506 PFGateHigh.offset = bits(PFHandlerVirtAddr, 63, 32); 507 508 struct { 509 uint64_t low; 510 uint64_t high; 511 } PFGate = {PFGateLow, PFGateHigh}; 512 513 physProxy.writeBlob(IDTPhysAddr + 0xE0, 514 (uint8_t *)(&PFGate), sizeof(PFGate)); 515 516 /* System call handler */ 517 uint8_t syscallBlob[] = { 518 // mov %rax, (0xffffc90000005600) 519 0x48, 0xa3, 0x00, 0x60, 0x00, 520 0x00, 0x00, 0xc9, 0xff, 0xff, 521 // sysret 522 0x48, 0x0f, 0x07 523 }; 524 525 physProxy.writeBlob(syscallCodePhysAddr, 526 syscallBlob, sizeof(syscallBlob)); 527 528 /** Page fault handler */ 529 uint8_t faultBlob[] = { 530 // mov %rax, (0xffffc90000005700) 531 0x48, 0xa3, 0x00, 0x61, 0x00, 532 0x00, 0x00, 0xc9, 0xff, 0xff, 533 // add $0x8, %rsp # skip error 534 0x48, 0x83, 0xc4, 0x08, 535 // iretq 536 0x48, 0xcf 537 }; 538 539 physProxy.writeBlob(PFHandlerPhysAddr, faultBlob, sizeof(faultBlob)); 540 541 MultiLevelPageTable<PageTableOps> *pt = 542 dynamic_cast<MultiLevelPageTable<PageTableOps> *>(pTable); 543 544 /* Syscall handler */ 545 pt->map(syscallCodeVirtAddr, syscallCodePhysAddr, PageBytes, false); 546 /* GDT */ 547 pt->map(GDTVirtAddr, GDTPhysAddr, PageBytes, false); 548 /* IDT */ 549 pt->map(IDTVirtAddr, IDTPhysAddr, PageBytes, false); 550 /* TSS */ 551 pt->map(TSSVirtAddr, TSSPhysAddr, PageBytes, false); 552 /* IST */ 553 pt->map(ISTVirtAddr, ISTPhysAddr, PageBytes, false); 554 /* PF handler */ 555 pt->map(PFHandlerVirtAddr, PFHandlerPhysAddr, PageBytes, false); 556 /* MMIO region for m5ops */ 557 pt->map(MMIORegionVirtAddr, MMIORegionPhysAddr, 16*PageBytes, false); 558 } else { 559 for (int i = 0; i < contextIds.size(); i++) { 560 ThreadContext * tc = system->getThreadContext(contextIds[i]); 561 562 SegAttr dataAttr = 0; 563 dataAttr.dpl = 3; 564 dataAttr.unusable = 0; 565 dataAttr.defaultSize = 1; 566 dataAttr.longMode = 1; 567 dataAttr.avl = 0; 568 dataAttr.granularity = 1; 569 dataAttr.present = 1; 570 dataAttr.type = 3; 571 dataAttr.writable = 1; 572 dataAttr.readable = 1; 573 dataAttr.expandDown = 0; 574 dataAttr.system = 1; 575 576 //Initialize the segment registers. 577 for (int seg = 0; seg < NUM_SEGMENTREGS; seg++) { 578 tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0); 579 tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0); 580 tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr); 581 } 582 583 SegAttr csAttr = 0; 584 csAttr.dpl = 3; 585 csAttr.unusable = 0; 586 csAttr.defaultSize = 0; 587 csAttr.longMode = 1; 588 csAttr.avl = 0; 589 csAttr.granularity = 1; 590 csAttr.present = 1; 591 csAttr.type = 10; 592 csAttr.writable = 0; 593 csAttr.readable = 1; 594 csAttr.expandDown = 0; 595 csAttr.system = 1; 596 597 tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr); 598 599 Efer efer = 0; 600 efer.sce = 1; // Enable system call extensions. 601 efer.lme = 1; // Enable long mode. 602 efer.lma = 1; // Activate long mode. 603 efer.nxe = 1; // Enable nx support. 604 efer.svme = 0; // Disable svm support for now. It isn't implemented. 605 efer.ffxsr = 1; // Turn on fast fxsave and fxrstor. 606 tc->setMiscReg(MISCREG_EFER, efer); 607 608 //Set up the registers that describe the operating mode. 609 CR0 cr0 = 0; 610 cr0.pg = 1; // Turn on paging. 611 cr0.cd = 0; // Don't disable caching. 612 cr0.nw = 0; // This is bit is defined to be ignored. 613 cr0.am = 0; // No alignment checking 614 cr0.wp = 0; // Supervisor mode can write read only pages 615 cr0.ne = 1; 616 cr0.et = 1; // This should always be 1 617 cr0.ts = 0; // We don't do task switching, so causing fp exceptions 618 // would be pointless. 619 cr0.em = 0; // Allow x87 instructions to execute natively. 620 cr0.mp = 1; // This doesn't really matter, but the manual suggests 621 // setting it to one. 622 cr0.pe = 1; // We're definitely in protected mode. 623 tc->setMiscReg(MISCREG_CR0, cr0); 624 625 tc->setMiscReg(MISCREG_MXCSR, 0x1f80); 626 } 627 } 628} 629 630void 631I386Process::initState() 632{ 633 X86Process::initState(); 634 635 argsInit(PageBytes); 636 637 /* 638 * Set up a GDT for this process. The whole GDT wouldn't really be for 639 * this process, but the only parts we care about are. 640 */ 641 allocateMem(_gdtStart, _gdtSize); 642 uint64_t zero = 0; 643 assert(_gdtSize % sizeof(zero) == 0); 644 for (Addr gdtCurrent = _gdtStart; 645 gdtCurrent < _gdtStart + _gdtSize; gdtCurrent += sizeof(zero)) { 646 initVirtMem.write(gdtCurrent, zero); 647 } 648 649 // Set up the vsyscall page for this process. 650 allocateMem(vsyscallPage.base, vsyscallPage.size); 651 uint8_t vsyscallBlob[] = { 652 0x51, // push %ecx 653 0x52, // push %edp 654 0x55, // push %ebp 655 0x89, 0xe5, // mov %esp, %ebp 656 0x0f, 0x34 // sysenter 657 }; 658 initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsyscallOffset, 659 vsyscallBlob, sizeof(vsyscallBlob)); 660 661 uint8_t vsysexitBlob[] = { 662 0x5d, // pop %ebp 663 0x5a, // pop %edx 664 0x59, // pop %ecx 665 0xc3 // ret 666 }; 667 initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsysexitOffset, 668 vsysexitBlob, sizeof(vsysexitBlob)); 669 670 for (int i = 0; i < contextIds.size(); i++) { 671 ThreadContext * tc = system->getThreadContext(contextIds[i]); 672 673 SegAttr dataAttr = 0; 674 dataAttr.dpl = 3; 675 dataAttr.unusable = 0; 676 dataAttr.defaultSize = 1; 677 dataAttr.longMode = 0; 678 dataAttr.avl = 0; 679 dataAttr.granularity = 1; 680 dataAttr.present = 1; 681 dataAttr.type = 3; 682 dataAttr.writable = 1; 683 dataAttr.readable = 1; 684 dataAttr.expandDown = 0; 685 dataAttr.system = 1; 686 687 //Initialize the segment registers. 688 for (int seg = 0; seg < NUM_SEGMENTREGS; seg++) { 689 tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0); 690 tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0); 691 tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr); 692 tc->setMiscRegNoEffect(MISCREG_SEG_SEL(seg), 0xB); 693 tc->setMiscRegNoEffect(MISCREG_SEG_LIMIT(seg), (uint32_t)(-1)); 694 } 695 696 SegAttr csAttr = 0; 697 csAttr.dpl = 3; 698 csAttr.unusable = 0; 699 csAttr.defaultSize = 1; 700 csAttr.longMode = 0; 701 csAttr.avl = 0; 702 csAttr.granularity = 1; 703 csAttr.present = 1; 704 csAttr.type = 0xa; 705 csAttr.writable = 0; 706 csAttr.readable = 1; 707 csAttr.expandDown = 0; 708 csAttr.system = 1; 709 710 tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr); 711 712 tc->setMiscRegNoEffect(MISCREG_TSG_BASE, _gdtStart); 713 tc->setMiscRegNoEffect(MISCREG_TSG_EFF_BASE, _gdtStart); 714 tc->setMiscRegNoEffect(MISCREG_TSG_LIMIT, _gdtStart + _gdtSize - 1); 715 716 // Set the LDT selector to 0 to deactivate it. 717 tc->setMiscRegNoEffect(MISCREG_TSL, 0); 718 719 Efer efer = 0; 720 efer.sce = 1; // Enable system call extensions. 721 efer.lme = 1; // Enable long mode. 722 efer.lma = 0; // Deactivate long mode. 723 efer.nxe = 1; // Enable nx support. 724 efer.svme = 0; // Disable svm support for now. It isn't implemented. 725 efer.ffxsr = 1; // Turn on fast fxsave and fxrstor. 726 tc->setMiscReg(MISCREG_EFER, efer); 727 728 //Set up the registers that describe the operating mode. 729 CR0 cr0 = 0; 730 cr0.pg = 1; // Turn on paging. 731 cr0.cd = 0; // Don't disable caching. 732 cr0.nw = 0; // This is bit is defined to be ignored. 733 cr0.am = 0; // No alignment checking 734 cr0.wp = 0; // Supervisor mode can write read only pages 735 cr0.ne = 1; 736 cr0.et = 1; // This should always be 1 737 cr0.ts = 0; // We don't do task switching, so causing fp exceptions 738 // would be pointless. 739 cr0.em = 0; // Allow x87 instructions to execute natively. 740 cr0.mp = 1; // This doesn't really matter, but the manual suggests 741 // setting it to one. 742 cr0.pe = 1; // We're definitely in protected mode. 743 tc->setMiscReg(MISCREG_CR0, cr0); 744 745 tc->setMiscReg(MISCREG_MXCSR, 0x1f80); 746 } 747} 748 749template<class IntType> 750void 751X86Process::argsInit(int pageSize, 752 std::vector<AuxVector<IntType> > extraAuxvs) 753{ 754 int intSize = sizeof(IntType); 755 756 typedef AuxVector<IntType> auxv_t; 757 std::vector<auxv_t> auxv = extraAuxvs; 758 759 string filename; 760 if (argv.size() < 1) 761 filename = ""; 762 else 763 filename = argv[0]; 764 765 //We want 16 byte alignment 766 uint64_t align = 16; 767 768 // Patch the ld_bias for dynamic executables. 769 updateBias(); 770 771 // load object file into target memory 772 objFile->loadSections(initVirtMem); 773 774 enum X86CpuFeature { 775 X86_OnboardFPU = 1 << 0, 776 X86_VirtualModeExtensions = 1 << 1, 777 X86_DebuggingExtensions = 1 << 2, 778 X86_PageSizeExtensions = 1 << 3, 779 780 X86_TimeStampCounter = 1 << 4, 781 X86_ModelSpecificRegisters = 1 << 5, 782 X86_PhysicalAddressExtensions = 1 << 6, 783 X86_MachineCheckExtensions = 1 << 7, 784 785 X86_CMPXCHG8Instruction = 1 << 8, 786 X86_OnboardAPIC = 1 << 9, 787 X86_SYSENTER_SYSEXIT = 1 << 11, 788 789 X86_MemoryTypeRangeRegisters = 1 << 12, 790 X86_PageGlobalEnable = 1 << 13, 791 X86_MachineCheckArchitecture = 1 << 14, 792 X86_CMOVInstruction = 1 << 15, 793 794 X86_PageAttributeTable = 1 << 16, 795 X86_36BitPSEs = 1 << 17, 796 X86_ProcessorSerialNumber = 1 << 18, 797 X86_CLFLUSHInstruction = 1 << 19, 798 799 X86_DebugTraceStore = 1 << 21, 800 X86_ACPIViaMSR = 1 << 22, 801 X86_MultimediaExtensions = 1 << 23, 802 803 X86_FXSAVE_FXRSTOR = 1 << 24, 804 X86_StreamingSIMDExtensions = 1 << 25, 805 X86_StreamingSIMDExtensions2 = 1 << 26, 806 X86_CPUSelfSnoop = 1 << 27, 807 808 X86_HyperThreading = 1 << 28, 809 X86_AutomaticClockControl = 1 << 29, 810 X86_IA64Processor = 1 << 30 811 }; 812 813 // Setup the auxiliary vectors. These will already have endian 814 // conversion. Auxiliary vectors are loaded only for elf formatted 815 // executables; the auxv is responsible for passing information from 816 // the OS to the interpreter. 817 ElfObject * elfObject = dynamic_cast<ElfObject *>(objFile); 818 if (elfObject) { 819 uint64_t features = 820 X86_OnboardFPU | 821 X86_VirtualModeExtensions | 822 X86_DebuggingExtensions | 823 X86_PageSizeExtensions | 824 X86_TimeStampCounter | 825 X86_ModelSpecificRegisters | 826 X86_PhysicalAddressExtensions | 827 X86_MachineCheckExtensions | 828 X86_CMPXCHG8Instruction | 829 X86_OnboardAPIC | 830 X86_SYSENTER_SYSEXIT | 831 X86_MemoryTypeRangeRegisters | 832 X86_PageGlobalEnable | 833 X86_MachineCheckArchitecture | 834 X86_CMOVInstruction | 835 X86_PageAttributeTable | 836 X86_36BitPSEs | 837// X86_ProcessorSerialNumber | 838 X86_CLFLUSHInstruction | 839// X86_DebugTraceStore | 840// X86_ACPIViaMSR | 841 X86_MultimediaExtensions | 842 X86_FXSAVE_FXRSTOR | 843 X86_StreamingSIMDExtensions | 844 X86_StreamingSIMDExtensions2 | 845// X86_CPUSelfSnoop | 846// X86_HyperThreading | 847// X86_AutomaticClockControl | 848// X86_IA64Processor | 849 0; 850 851 //Bits which describe the system hardware capabilities 852 //XXX Figure out what these should be 853 auxv.push_back(auxv_t(M5_AT_HWCAP, features)); 854 //The system page size 855 auxv.push_back(auxv_t(M5_AT_PAGESZ, X86ISA::PageBytes)); 856 //Frequency at which times() increments 857 //Defined to be 100 in the kernel source. 858 auxv.push_back(auxv_t(M5_AT_CLKTCK, 100)); 859 // This is the virtual address of the program header tables if they 860 // appear in the executable image. 861 auxv.push_back(auxv_t(M5_AT_PHDR, elfObject->programHeaderTable())); 862 // This is the size of a program header entry from the elf file. 863 auxv.push_back(auxv_t(M5_AT_PHENT, elfObject->programHeaderSize())); 864 // This is the number of program headers from the original elf file. 865 auxv.push_back(auxv_t(M5_AT_PHNUM, elfObject->programHeaderCount())); 866 // This is the base address of the ELF interpreter; it should be 867 // zero for static executables or contain the base address for 868 // dynamic executables. 869 auxv.push_back(auxv_t(M5_AT_BASE, getBias())); 870 //XXX Figure out what this should be. 871 auxv.push_back(auxv_t(M5_AT_FLAGS, 0)); 872 //The entry point to the program 873 auxv.push_back(auxv_t(M5_AT_ENTRY, objFile->entryPoint())); 874 //Different user and group IDs 875 auxv.push_back(auxv_t(M5_AT_UID, uid())); 876 auxv.push_back(auxv_t(M5_AT_EUID, euid())); 877 auxv.push_back(auxv_t(M5_AT_GID, gid())); 878 auxv.push_back(auxv_t(M5_AT_EGID, egid())); 879 //Whether to enable "secure mode" in the executable 880 auxv.push_back(auxv_t(M5_AT_SECURE, 0)); 881 //The address of 16 "random" bytes. 882 auxv.push_back(auxv_t(M5_AT_RANDOM, 0)); 883 //The name of the program 884 auxv.push_back(auxv_t(M5_AT_EXECFN, 0)); 885 //The platform string 886 auxv.push_back(auxv_t(M5_AT_PLATFORM, 0)); 887 } 888 889 //Figure out how big the initial stack needs to be 890 891 // A sentry NULL void pointer at the top of the stack. 892 int sentry_size = intSize; 893 894 //This is the name of the file which is present on the initial stack 895 //It's purpose is to let the user space linker examine the original file. 896 int file_name_size = filename.size() + 1; 897 898 const int numRandomBytes = 16; 899 int aux_data_size = numRandomBytes; 900 901 string platform = "x86_64"; 902 aux_data_size += platform.size() + 1; 903 904 int env_data_size = 0; 905 for (int i = 0; i < envp.size(); ++i) 906 env_data_size += envp[i].size() + 1; 907 int arg_data_size = 0; 908 for (int i = 0; i < argv.size(); ++i) 909 arg_data_size += argv[i].size() + 1; 910 911 //The info_block needs to be padded so it's size is a multiple of the 912 //alignment mask. Also, it appears that there needs to be at least some 913 //padding, so if the size is already a multiple, we need to increase it 914 //anyway. 915 int base_info_block_size = 916 sentry_size + file_name_size + env_data_size + arg_data_size; 917 918 int info_block_size = roundUp(base_info_block_size, align); 919 920 int info_block_padding = info_block_size - base_info_block_size; 921 922 //Each auxilliary vector is two 8 byte words 923 int aux_array_size = intSize * 2 * (auxv.size() + 1); 924 925 int envp_array_size = intSize * (envp.size() + 1); 926 int argv_array_size = intSize * (argv.size() + 1); 927 928 int argc_size = intSize; 929 930 //Figure out the size of the contents of the actual initial frame 931 int frame_size = 932 aux_array_size + 933 envp_array_size + 934 argv_array_size + 935 argc_size; 936 937 //There needs to be padding after the auxiliary vector data so that the 938 //very bottom of the stack is aligned properly. 939 int partial_size = frame_size + aux_data_size; 940 int aligned_partial_size = roundUp(partial_size, align); 941 int aux_padding = aligned_partial_size - partial_size; 942 943 int space_needed = 944 info_block_size + 945 aux_data_size + 946 aux_padding + 947 frame_size; 948
| 185} 186 187SyscallDesc* 188X86Process::getDesc(int callnum) 189{ 190 if (callnum < 0 || callnum >= numSyscallDescs) 191 return NULL; 192 return &syscallDescs[callnum]; 193} 194 195void 196X86_64Process::initState() 197{ 198 X86Process::initState(); 199 200 argsInit(PageBytes); 201 202 // Set up the vsyscall page for this process. 203 allocateMem(vsyscallPage.base, vsyscallPage.size); 204 uint8_t vtimeBlob[] = { 205 0x48,0xc7,0xc0,0xc9,0x00,0x00,0x00, // mov $0xc9,%rax 206 0x0f,0x05, // syscall 207 0xc3 // retq 208 }; 209 initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vtimeOffset, 210 vtimeBlob, sizeof(vtimeBlob)); 211 212 uint8_t vgettimeofdayBlob[] = { 213 0x48,0xc7,0xc0,0x60,0x00,0x00,0x00, // mov $0x60,%rax 214 0x0f,0x05, // syscall 215 0xc3 // retq 216 }; 217 initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vgettimeofdayOffset, 218 vgettimeofdayBlob, sizeof(vgettimeofdayBlob)); 219 220 if (kvmInSE) { 221 PortProxy physProxy = system->physProxy; 222 223 /* 224 * Set up the gdt. 225 */ 226 uint8_t numGDTEntries = 0; 227 uint64_t nullDescriptor = 0; 228 physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, 229 (uint8_t *)(&nullDescriptor), 8); 230 numGDTEntries++; 231 232 SegDescriptor initDesc = 0; 233 initDesc.type.codeOrData = 0; // code or data type 234 initDesc.type.c = 0; // conforming 235 initDesc.type.r = 1; // readable 236 initDesc.dpl = 0; // privilege 237 initDesc.p = 1; // present 238 initDesc.l = 1; // longmode - 64 bit 239 initDesc.d = 0; // operand size 240 initDesc.g = 1; // granularity 241 initDesc.s = 1; // system segment 242 initDesc.limitHigh = 0xFFFF; 243 initDesc.limitLow = 0xF; 244 initDesc.baseHigh = 0x0; 245 initDesc.baseLow = 0x0; 246 247 //64 bit code segment 248 SegDescriptor csLowPLDesc = initDesc; 249 csLowPLDesc.type.codeOrData = 1; 250 csLowPLDesc.dpl = 0; 251 uint64_t csLowPLDescVal = csLowPLDesc; 252 physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, 253 (uint8_t *)(&csLowPLDescVal), 8); 254 255 numGDTEntries++; 256 257 SegSelector csLowPL = 0; 258 csLowPL.si = numGDTEntries - 1; 259 csLowPL.rpl = 0; 260 261 //64 bit data segment 262 SegDescriptor dsLowPLDesc = initDesc; 263 dsLowPLDesc.type.codeOrData = 0; 264 dsLowPLDesc.dpl = 0; 265 uint64_t dsLowPLDescVal = dsLowPLDesc; 266 physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, 267 (uint8_t *)(&dsLowPLDescVal), 8); 268 269 numGDTEntries++; 270 271 SegSelector dsLowPL = 0; 272 dsLowPL.si = numGDTEntries - 1; 273 dsLowPL.rpl = 0; 274 275 //64 bit data segment 276 SegDescriptor dsDesc = initDesc; 277 dsDesc.type.codeOrData = 0; 278 dsDesc.dpl = 3; 279 uint64_t dsDescVal = dsDesc; 280 physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, 281 (uint8_t *)(&dsDescVal), 8); 282 283 numGDTEntries++; 284 285 SegSelector ds = 0; 286 ds.si = numGDTEntries - 1; 287 ds.rpl = 3; 288 289 //64 bit code segment 290 SegDescriptor csDesc = initDesc; 291 csDesc.type.codeOrData = 1; 292 csDesc.dpl = 3; 293 uint64_t csDescVal = csDesc; 294 physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, 295 (uint8_t *)(&csDescVal), 8); 296 297 numGDTEntries++; 298 299 SegSelector cs = 0; 300 cs.si = numGDTEntries - 1; 301 cs.rpl = 3; 302 303 SegSelector scall = 0; 304 scall.si = csLowPL.si; 305 scall.rpl = 0; 306 307 SegSelector sret = 0; 308 sret.si = dsLowPL.si; 309 sret.rpl = 3; 310 311 /* In long mode the TSS has been extended to 16 Bytes */ 312 TSSlow TSSDescLow = 0; 313 TSSDescLow.type = 0xB; 314 TSSDescLow.dpl = 0; // Privelege level 0 315 TSSDescLow.p = 1; // Present 316 TSSDescLow.g = 1; // Page granularity 317 TSSDescLow.limitHigh = 0xF; 318 TSSDescLow.limitLow = 0xFFFF; 319 TSSDescLow.baseLow = bits(TSSVirtAddr, 23, 0); 320 TSSDescLow.baseHigh = bits(TSSVirtAddr, 31, 24); 321 322 TSShigh TSSDescHigh = 0; 323 TSSDescHigh.base = bits(TSSVirtAddr, 63, 32); 324 325 struct TSSDesc { 326 uint64_t low; 327 uint64_t high; 328 } tssDescVal = {TSSDescLow, TSSDescHigh}; 329 330 physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, 331 (uint8_t *)(&tssDescVal), sizeof(tssDescVal)); 332 333 numGDTEntries++; 334 335 SegSelector tssSel = 0; 336 tssSel.si = numGDTEntries - 1; 337 338 uint64_t tss_base_addr = (TSSDescHigh.base << 32) | 339 (TSSDescLow.baseHigh << 24) | 340 TSSDescLow.baseLow; 341 uint64_t tss_limit = TSSDescLow.limitLow | (TSSDescLow.limitHigh << 16); 342 343 SegAttr tss_attr = 0; 344 345 tss_attr.type = TSSDescLow.type; 346 tss_attr.dpl = TSSDescLow.dpl; 347 tss_attr.present = TSSDescLow.p; 348 tss_attr.granularity = TSSDescLow.g; 349 tss_attr.unusable = 0; 350 351 for (int i = 0; i < contextIds.size(); i++) { 352 ThreadContext * tc = system->getThreadContext(contextIds[i]); 353 354 tc->setMiscReg(MISCREG_CS, cs); 355 tc->setMiscReg(MISCREG_DS, ds); 356 tc->setMiscReg(MISCREG_ES, ds); 357 tc->setMiscReg(MISCREG_FS, ds); 358 tc->setMiscReg(MISCREG_GS, ds); 359 tc->setMiscReg(MISCREG_SS, ds); 360 361 // LDT 362 tc->setMiscReg(MISCREG_TSL, 0); 363 SegAttr tslAttr = 0; 364 tslAttr.present = 1; 365 tslAttr.type = 2; 366 tc->setMiscReg(MISCREG_TSL_ATTR, tslAttr); 367 368 tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr); 369 tc->setMiscReg(MISCREG_TSG_LIMIT, 8 * numGDTEntries - 1); 370 371 tc->setMiscReg(MISCREG_TR, tssSel); 372 tc->setMiscReg(MISCREG_TR_BASE, tss_base_addr); 373 tc->setMiscReg(MISCREG_TR_EFF_BASE, 0); 374 tc->setMiscReg(MISCREG_TR_LIMIT, tss_limit); 375 tc->setMiscReg(MISCREG_TR_ATTR, tss_attr); 376 377 //Start using longmode segments. 378 installSegDesc(tc, SEGMENT_REG_CS, csDesc, true); 379 installSegDesc(tc, SEGMENT_REG_DS, dsDesc, true); 380 installSegDesc(tc, SEGMENT_REG_ES, dsDesc, true); 381 installSegDesc(tc, SEGMENT_REG_FS, dsDesc, true); 382 installSegDesc(tc, SEGMENT_REG_GS, dsDesc, true); 383 installSegDesc(tc, SEGMENT_REG_SS, dsDesc, true); 384 385 Efer efer = 0; 386 efer.sce = 1; // Enable system call extensions. 387 efer.lme = 1; // Enable long mode. 388 efer.lma = 1; // Activate long mode. 389 efer.nxe = 0; // Enable nx support. 390 efer.svme = 1; // Enable svm support for now. 391 efer.ffxsr = 0; // Turn on fast fxsave and fxrstor. 392 tc->setMiscReg(MISCREG_EFER, efer); 393 394 //Set up the registers that describe the operating mode. 395 CR0 cr0 = 0; 396 cr0.pg = 1; // Turn on paging. 397 cr0.cd = 0; // Don't disable caching. 398 cr0.nw = 0; // This is bit is defined to be ignored. 399 cr0.am = 1; // No alignment checking 400 cr0.wp = 1; // Supervisor mode can write read only pages 401 cr0.ne = 1; 402 cr0.et = 1; // This should always be 1 403 cr0.ts = 0; // We don't do task switching, so causing fp exceptions 404 // would be pointless. 405 cr0.em = 0; // Allow x87 instructions to execute natively. 406 cr0.mp = 1; // This doesn't really matter, but the manual suggests 407 // setting it to one. 408 cr0.pe = 1; // We're definitely in protected mode. 409 tc->setMiscReg(MISCREG_CR0, cr0); 410 411 CR0 cr2 = 0; 412 tc->setMiscReg(MISCREG_CR2, cr2); 413 414 CR3 cr3 = pageTablePhysAddr; 415 tc->setMiscReg(MISCREG_CR3, cr3); 416 417 CR4 cr4 = 0; 418 //Turn on pae. 419 cr4.osxsave = 1; // Enable XSAVE and Proc Extended States 420 cr4.osxmmexcpt = 1; // Operating System Unmasked Exception 421 cr4.osfxsr = 1; // Operating System FXSave/FSRSTOR Support 422 cr4.pce = 0; // Performance-Monitoring Counter Enable 423 cr4.pge = 0; // Page-Global Enable 424 cr4.mce = 0; // Machine Check Enable 425 cr4.pae = 1; // Physical-Address Extension 426 cr4.pse = 0; // Page Size Extensions 427 cr4.de = 0; // Debugging Extensions 428 cr4.tsd = 0; // Time Stamp Disable 429 cr4.pvi = 0; // Protected-Mode Virtual Interrupts 430 cr4.vme = 0; // Virtual-8086 Mode Extensions 431 432 tc->setMiscReg(MISCREG_CR4, cr4); 433 434 CR4 cr8 = 0; 435 tc->setMiscReg(MISCREG_CR8, cr8); 436 437 const Addr PageMapLevel4 = pageTablePhysAddr; 438 //Point to the page tables. 439 tc->setMiscReg(MISCREG_CR3, PageMapLevel4); 440 441 tc->setMiscReg(MISCREG_MXCSR, 0x1f80); 442 443 tc->setMiscReg(MISCREG_APIC_BASE, 0xfee00900); 444 445 tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr); 446 tc->setMiscReg(MISCREG_TSG_LIMIT, 0xffff); 447 448 tc->setMiscReg(MISCREG_IDTR_BASE, IDTVirtAddr); 449 tc->setMiscReg(MISCREG_IDTR_LIMIT, 0xffff); 450 451 /* enabling syscall and sysret */ 452 MiscReg star = ((MiscReg)sret << 48) | ((MiscReg)scall << 32); 453 tc->setMiscReg(MISCREG_STAR, star); 454 MiscReg lstar = (MiscReg)syscallCodeVirtAddr; 455 tc->setMiscReg(MISCREG_LSTAR, lstar); 456 MiscReg sfmask = (1 << 8) | (1 << 10); // TF | DF 457 tc->setMiscReg(MISCREG_SF_MASK, sfmask); 458 } 459 460 /* Set up the content of the TSS and write it to physical memory. */ 461 462 struct { 463 uint32_t reserved0; // +00h 464 uint32_t RSP0_low; // +04h 465 uint32_t RSP0_high; // +08h 466 uint32_t RSP1_low; // +0Ch 467 uint32_t RSP1_high; // +10h 468 uint32_t RSP2_low; // +14h 469 uint32_t RSP2_high; // +18h 470 uint32_t reserved1; // +1Ch 471 uint32_t reserved2; // +20h 472 uint32_t IST1_low; // +24h 473 uint32_t IST1_high; // +28h 474 uint32_t IST2_low; // +2Ch 475 uint32_t IST2_high; // +30h 476 uint32_t IST3_low; // +34h 477 uint32_t IST3_high; // +38h 478 uint32_t IST4_low; // +3Ch 479 uint32_t IST4_high; // +40h 480 uint32_t IST5_low; // +44h 481 uint32_t IST5_high; // +48h 482 uint32_t IST6_low; // +4Ch 483 uint32_t IST6_high; // +50h 484 uint32_t IST7_low; // +54h 485 uint32_t IST7_high; // +58h 486 uint32_t reserved3; // +5Ch 487 uint32_t reserved4; // +60h 488 uint16_t reserved5; // +64h 489 uint16_t IO_MapBase; // +66h 490 } tss; 491 492 /** setting Interrupt Stack Table */ 493 uint64_t IST_start = ISTVirtAddr + PageBytes; 494 tss.IST1_low = IST_start; 495 tss.IST1_high = IST_start >> 32; 496 tss.RSP0_low = tss.IST1_low; 497 tss.RSP0_high = tss.IST1_high; 498 tss.RSP1_low = tss.IST1_low; 499 tss.RSP1_high = tss.IST1_high; 500 tss.RSP2_low = tss.IST1_low; 501 tss.RSP2_high = tss.IST1_high; 502 physProxy.writeBlob(TSSPhysAddr, (uint8_t *)(&tss), sizeof(tss)); 503 504 /* Setting IDT gates */ 505 GateDescriptorLow PFGateLow = 0; 506 PFGateLow.offsetHigh = bits(PFHandlerVirtAddr, 31, 16); 507 PFGateLow.offsetLow = bits(PFHandlerVirtAddr, 15, 0); 508 PFGateLow.selector = csLowPL; 509 PFGateLow.p = 1; 510 PFGateLow.dpl = 0; 511 PFGateLow.type = 0xe; // gate interrupt type 512 PFGateLow.IST = 0; // setting IST to 0 and using RSP0 513 514 GateDescriptorHigh PFGateHigh = 0; 515 PFGateHigh.offset = bits(PFHandlerVirtAddr, 63, 32); 516 517 struct { 518 uint64_t low; 519 uint64_t high; 520 } PFGate = {PFGateLow, PFGateHigh}; 521 522 physProxy.writeBlob(IDTPhysAddr + 0xE0, 523 (uint8_t *)(&PFGate), sizeof(PFGate)); 524 525 /* System call handler */ 526 uint8_t syscallBlob[] = { 527 // mov %rax, (0xffffc90000005600) 528 0x48, 0xa3, 0x00, 0x60, 0x00, 529 0x00, 0x00, 0xc9, 0xff, 0xff, 530 // sysret 531 0x48, 0x0f, 0x07 532 }; 533 534 physProxy.writeBlob(syscallCodePhysAddr, 535 syscallBlob, sizeof(syscallBlob)); 536 537 /** Page fault handler */ 538 uint8_t faultBlob[] = { 539 // mov %rax, (0xffffc90000005700) 540 0x48, 0xa3, 0x00, 0x61, 0x00, 541 0x00, 0x00, 0xc9, 0xff, 0xff, 542 // add $0x8, %rsp # skip error 543 0x48, 0x83, 0xc4, 0x08, 544 // iretq 545 0x48, 0xcf 546 }; 547 548 physProxy.writeBlob(PFHandlerPhysAddr, faultBlob, sizeof(faultBlob)); 549 550 MultiLevelPageTable<PageTableOps> *pt = 551 dynamic_cast<MultiLevelPageTable<PageTableOps> *>(pTable); 552 553 /* Syscall handler */ 554 pt->map(syscallCodeVirtAddr, syscallCodePhysAddr, PageBytes, false); 555 /* GDT */ 556 pt->map(GDTVirtAddr, GDTPhysAddr, PageBytes, false); 557 /* IDT */ 558 pt->map(IDTVirtAddr, IDTPhysAddr, PageBytes, false); 559 /* TSS */ 560 pt->map(TSSVirtAddr, TSSPhysAddr, PageBytes, false); 561 /* IST */ 562 pt->map(ISTVirtAddr, ISTPhysAddr, PageBytes, false); 563 /* PF handler */ 564 pt->map(PFHandlerVirtAddr, PFHandlerPhysAddr, PageBytes, false); 565 /* MMIO region for m5ops */ 566 pt->map(MMIORegionVirtAddr, MMIORegionPhysAddr, 16*PageBytes, false); 567 } else { 568 for (int i = 0; i < contextIds.size(); i++) { 569 ThreadContext * tc = system->getThreadContext(contextIds[i]); 570 571 SegAttr dataAttr = 0; 572 dataAttr.dpl = 3; 573 dataAttr.unusable = 0; 574 dataAttr.defaultSize = 1; 575 dataAttr.longMode = 1; 576 dataAttr.avl = 0; 577 dataAttr.granularity = 1; 578 dataAttr.present = 1; 579 dataAttr.type = 3; 580 dataAttr.writable = 1; 581 dataAttr.readable = 1; 582 dataAttr.expandDown = 0; 583 dataAttr.system = 1; 584 585 //Initialize the segment registers. 586 for (int seg = 0; seg < NUM_SEGMENTREGS; seg++) { 587 tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0); 588 tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0); 589 tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr); 590 } 591 592 SegAttr csAttr = 0; 593 csAttr.dpl = 3; 594 csAttr.unusable = 0; 595 csAttr.defaultSize = 0; 596 csAttr.longMode = 1; 597 csAttr.avl = 0; 598 csAttr.granularity = 1; 599 csAttr.present = 1; 600 csAttr.type = 10; 601 csAttr.writable = 0; 602 csAttr.readable = 1; 603 csAttr.expandDown = 0; 604 csAttr.system = 1; 605 606 tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr); 607 608 Efer efer = 0; 609 efer.sce = 1; // Enable system call extensions. 610 efer.lme = 1; // Enable long mode. 611 efer.lma = 1; // Activate long mode. 612 efer.nxe = 1; // Enable nx support. 613 efer.svme = 0; // Disable svm support for now. It isn't implemented. 614 efer.ffxsr = 1; // Turn on fast fxsave and fxrstor. 615 tc->setMiscReg(MISCREG_EFER, efer); 616 617 //Set up the registers that describe the operating mode. 618 CR0 cr0 = 0; 619 cr0.pg = 1; // Turn on paging. 620 cr0.cd = 0; // Don't disable caching. 621 cr0.nw = 0; // This is bit is defined to be ignored. 622 cr0.am = 0; // No alignment checking 623 cr0.wp = 0; // Supervisor mode can write read only pages 624 cr0.ne = 1; 625 cr0.et = 1; // This should always be 1 626 cr0.ts = 0; // We don't do task switching, so causing fp exceptions 627 // would be pointless. 628 cr0.em = 0; // Allow x87 instructions to execute natively. 629 cr0.mp = 1; // This doesn't really matter, but the manual suggests 630 // setting it to one. 631 cr0.pe = 1; // We're definitely in protected mode. 632 tc->setMiscReg(MISCREG_CR0, cr0); 633 634 tc->setMiscReg(MISCREG_MXCSR, 0x1f80); 635 } 636 } 637} 638 639void 640I386Process::initState() 641{ 642 X86Process::initState(); 643 644 argsInit(PageBytes); 645 646 /* 647 * Set up a GDT for this process. The whole GDT wouldn't really be for 648 * this process, but the only parts we care about are. 649 */ 650 allocateMem(_gdtStart, _gdtSize); 651 uint64_t zero = 0; 652 assert(_gdtSize % sizeof(zero) == 0); 653 for (Addr gdtCurrent = _gdtStart; 654 gdtCurrent < _gdtStart + _gdtSize; gdtCurrent += sizeof(zero)) { 655 initVirtMem.write(gdtCurrent, zero); 656 } 657 658 // Set up the vsyscall page for this process. 659 allocateMem(vsyscallPage.base, vsyscallPage.size); 660 uint8_t vsyscallBlob[] = { 661 0x51, // push %ecx 662 0x52, // push %edp 663 0x55, // push %ebp 664 0x89, 0xe5, // mov %esp, %ebp 665 0x0f, 0x34 // sysenter 666 }; 667 initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsyscallOffset, 668 vsyscallBlob, sizeof(vsyscallBlob)); 669 670 uint8_t vsysexitBlob[] = { 671 0x5d, // pop %ebp 672 0x5a, // pop %edx 673 0x59, // pop %ecx 674 0xc3 // ret 675 }; 676 initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vsysexitOffset, 677 vsysexitBlob, sizeof(vsysexitBlob)); 678 679 for (int i = 0; i < contextIds.size(); i++) { 680 ThreadContext * tc = system->getThreadContext(contextIds[i]); 681 682 SegAttr dataAttr = 0; 683 dataAttr.dpl = 3; 684 dataAttr.unusable = 0; 685 dataAttr.defaultSize = 1; 686 dataAttr.longMode = 0; 687 dataAttr.avl = 0; 688 dataAttr.granularity = 1; 689 dataAttr.present = 1; 690 dataAttr.type = 3; 691 dataAttr.writable = 1; 692 dataAttr.readable = 1; 693 dataAttr.expandDown = 0; 694 dataAttr.system = 1; 695 696 //Initialize the segment registers. 697 for (int seg = 0; seg < NUM_SEGMENTREGS; seg++) { 698 tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0); 699 tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0); 700 tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr); 701 tc->setMiscRegNoEffect(MISCREG_SEG_SEL(seg), 0xB); 702 tc->setMiscRegNoEffect(MISCREG_SEG_LIMIT(seg), (uint32_t)(-1)); 703 } 704 705 SegAttr csAttr = 0; 706 csAttr.dpl = 3; 707 csAttr.unusable = 0; 708 csAttr.defaultSize = 1; 709 csAttr.longMode = 0; 710 csAttr.avl = 0; 711 csAttr.granularity = 1; 712 csAttr.present = 1; 713 csAttr.type = 0xa; 714 csAttr.writable = 0; 715 csAttr.readable = 1; 716 csAttr.expandDown = 0; 717 csAttr.system = 1; 718 719 tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr); 720 721 tc->setMiscRegNoEffect(MISCREG_TSG_BASE, _gdtStart); 722 tc->setMiscRegNoEffect(MISCREG_TSG_EFF_BASE, _gdtStart); 723 tc->setMiscRegNoEffect(MISCREG_TSG_LIMIT, _gdtStart + _gdtSize - 1); 724 725 // Set the LDT selector to 0 to deactivate it. 726 tc->setMiscRegNoEffect(MISCREG_TSL, 0); 727 728 Efer efer = 0; 729 efer.sce = 1; // Enable system call extensions. 730 efer.lme = 1; // Enable long mode. 731 efer.lma = 0; // Deactivate long mode. 732 efer.nxe = 1; // Enable nx support. 733 efer.svme = 0; // Disable svm support for now. It isn't implemented. 734 efer.ffxsr = 1; // Turn on fast fxsave and fxrstor. 735 tc->setMiscReg(MISCREG_EFER, efer); 736 737 //Set up the registers that describe the operating mode. 738 CR0 cr0 = 0; 739 cr0.pg = 1; // Turn on paging. 740 cr0.cd = 0; // Don't disable caching. 741 cr0.nw = 0; // This is bit is defined to be ignored. 742 cr0.am = 0; // No alignment checking 743 cr0.wp = 0; // Supervisor mode can write read only pages 744 cr0.ne = 1; 745 cr0.et = 1; // This should always be 1 746 cr0.ts = 0; // We don't do task switching, so causing fp exceptions 747 // would be pointless. 748 cr0.em = 0; // Allow x87 instructions to execute natively. 749 cr0.mp = 1; // This doesn't really matter, but the manual suggests 750 // setting it to one. 751 cr0.pe = 1; // We're definitely in protected mode. 752 tc->setMiscReg(MISCREG_CR0, cr0); 753 754 tc->setMiscReg(MISCREG_MXCSR, 0x1f80); 755 } 756} 757 758template<class IntType> 759void 760X86Process::argsInit(int pageSize, 761 std::vector<AuxVector<IntType> > extraAuxvs) 762{ 763 int intSize = sizeof(IntType); 764 765 typedef AuxVector<IntType> auxv_t; 766 std::vector<auxv_t> auxv = extraAuxvs; 767 768 string filename; 769 if (argv.size() < 1) 770 filename = ""; 771 else 772 filename = argv[0]; 773 774 //We want 16 byte alignment 775 uint64_t align = 16; 776 777 // Patch the ld_bias for dynamic executables. 778 updateBias(); 779 780 // load object file into target memory 781 objFile->loadSections(initVirtMem); 782 783 enum X86CpuFeature { 784 X86_OnboardFPU = 1 << 0, 785 X86_VirtualModeExtensions = 1 << 1, 786 X86_DebuggingExtensions = 1 << 2, 787 X86_PageSizeExtensions = 1 << 3, 788 789 X86_TimeStampCounter = 1 << 4, 790 X86_ModelSpecificRegisters = 1 << 5, 791 X86_PhysicalAddressExtensions = 1 << 6, 792 X86_MachineCheckExtensions = 1 << 7, 793 794 X86_CMPXCHG8Instruction = 1 << 8, 795 X86_OnboardAPIC = 1 << 9, 796 X86_SYSENTER_SYSEXIT = 1 << 11, 797 798 X86_MemoryTypeRangeRegisters = 1 << 12, 799 X86_PageGlobalEnable = 1 << 13, 800 X86_MachineCheckArchitecture = 1 << 14, 801 X86_CMOVInstruction = 1 << 15, 802 803 X86_PageAttributeTable = 1 << 16, 804 X86_36BitPSEs = 1 << 17, 805 X86_ProcessorSerialNumber = 1 << 18, 806 X86_CLFLUSHInstruction = 1 << 19, 807 808 X86_DebugTraceStore = 1 << 21, 809 X86_ACPIViaMSR = 1 << 22, 810 X86_MultimediaExtensions = 1 << 23, 811 812 X86_FXSAVE_FXRSTOR = 1 << 24, 813 X86_StreamingSIMDExtensions = 1 << 25, 814 X86_StreamingSIMDExtensions2 = 1 << 26, 815 X86_CPUSelfSnoop = 1 << 27, 816 817 X86_HyperThreading = 1 << 28, 818 X86_AutomaticClockControl = 1 << 29, 819 X86_IA64Processor = 1 << 30 820 }; 821 822 // Setup the auxiliary vectors. These will already have endian 823 // conversion. Auxiliary vectors are loaded only for elf formatted 824 // executables; the auxv is responsible for passing information from 825 // the OS to the interpreter. 826 ElfObject * elfObject = dynamic_cast<ElfObject *>(objFile); 827 if (elfObject) { 828 uint64_t features = 829 X86_OnboardFPU | 830 X86_VirtualModeExtensions | 831 X86_DebuggingExtensions | 832 X86_PageSizeExtensions | 833 X86_TimeStampCounter | 834 X86_ModelSpecificRegisters | 835 X86_PhysicalAddressExtensions | 836 X86_MachineCheckExtensions | 837 X86_CMPXCHG8Instruction | 838 X86_OnboardAPIC | 839 X86_SYSENTER_SYSEXIT | 840 X86_MemoryTypeRangeRegisters | 841 X86_PageGlobalEnable | 842 X86_MachineCheckArchitecture | 843 X86_CMOVInstruction | 844 X86_PageAttributeTable | 845 X86_36BitPSEs | 846// X86_ProcessorSerialNumber | 847 X86_CLFLUSHInstruction | 848// X86_DebugTraceStore | 849// X86_ACPIViaMSR | 850 X86_MultimediaExtensions | 851 X86_FXSAVE_FXRSTOR | 852 X86_StreamingSIMDExtensions | 853 X86_StreamingSIMDExtensions2 | 854// X86_CPUSelfSnoop | 855// X86_HyperThreading | 856// X86_AutomaticClockControl | 857// X86_IA64Processor | 858 0; 859 860 //Bits which describe the system hardware capabilities 861 //XXX Figure out what these should be 862 auxv.push_back(auxv_t(M5_AT_HWCAP, features)); 863 //The system page size 864 auxv.push_back(auxv_t(M5_AT_PAGESZ, X86ISA::PageBytes)); 865 //Frequency at which times() increments 866 //Defined to be 100 in the kernel source. 867 auxv.push_back(auxv_t(M5_AT_CLKTCK, 100)); 868 // This is the virtual address of the program header tables if they 869 // appear in the executable image. 870 auxv.push_back(auxv_t(M5_AT_PHDR, elfObject->programHeaderTable())); 871 // This is the size of a program header entry from the elf file. 872 auxv.push_back(auxv_t(M5_AT_PHENT, elfObject->programHeaderSize())); 873 // This is the number of program headers from the original elf file. 874 auxv.push_back(auxv_t(M5_AT_PHNUM, elfObject->programHeaderCount())); 875 // This is the base address of the ELF interpreter; it should be 876 // zero for static executables or contain the base address for 877 // dynamic executables. 878 auxv.push_back(auxv_t(M5_AT_BASE, getBias())); 879 //XXX Figure out what this should be. 880 auxv.push_back(auxv_t(M5_AT_FLAGS, 0)); 881 //The entry point to the program 882 auxv.push_back(auxv_t(M5_AT_ENTRY, objFile->entryPoint())); 883 //Different user and group IDs 884 auxv.push_back(auxv_t(M5_AT_UID, uid())); 885 auxv.push_back(auxv_t(M5_AT_EUID, euid())); 886 auxv.push_back(auxv_t(M5_AT_GID, gid())); 887 auxv.push_back(auxv_t(M5_AT_EGID, egid())); 888 //Whether to enable "secure mode" in the executable 889 auxv.push_back(auxv_t(M5_AT_SECURE, 0)); 890 //The address of 16 "random" bytes. 891 auxv.push_back(auxv_t(M5_AT_RANDOM, 0)); 892 //The name of the program 893 auxv.push_back(auxv_t(M5_AT_EXECFN, 0)); 894 //The platform string 895 auxv.push_back(auxv_t(M5_AT_PLATFORM, 0)); 896 } 897 898 //Figure out how big the initial stack needs to be 899 900 // A sentry NULL void pointer at the top of the stack. 901 int sentry_size = intSize; 902 903 //This is the name of the file which is present on the initial stack 904 //It's purpose is to let the user space linker examine the original file. 905 int file_name_size = filename.size() + 1; 906 907 const int numRandomBytes = 16; 908 int aux_data_size = numRandomBytes; 909 910 string platform = "x86_64"; 911 aux_data_size += platform.size() + 1; 912 913 int env_data_size = 0; 914 for (int i = 0; i < envp.size(); ++i) 915 env_data_size += envp[i].size() + 1; 916 int arg_data_size = 0; 917 for (int i = 0; i < argv.size(); ++i) 918 arg_data_size += argv[i].size() + 1; 919 920 //The info_block needs to be padded so it's size is a multiple of the 921 //alignment mask. Also, it appears that there needs to be at least some 922 //padding, so if the size is already a multiple, we need to increase it 923 //anyway. 924 int base_info_block_size = 925 sentry_size + file_name_size + env_data_size + arg_data_size; 926 927 int info_block_size = roundUp(base_info_block_size, align); 928 929 int info_block_padding = info_block_size - base_info_block_size; 930 931 //Each auxilliary vector is two 8 byte words 932 int aux_array_size = intSize * 2 * (auxv.size() + 1); 933 934 int envp_array_size = intSize * (envp.size() + 1); 935 int argv_array_size = intSize * (argv.size() + 1); 936 937 int argc_size = intSize; 938 939 //Figure out the size of the contents of the actual initial frame 940 int frame_size = 941 aux_array_size + 942 envp_array_size + 943 argv_array_size + 944 argc_size; 945 946 //There needs to be padding after the auxiliary vector data so that the 947 //very bottom of the stack is aligned properly. 948 int partial_size = frame_size + aux_data_size; 949 int aligned_partial_size = roundUp(partial_size, align); 950 int aux_padding = aligned_partial_size - partial_size; 951 952 int space_needed = 953 info_block_size + 954 aux_data_size + 955 aux_padding + 956 frame_size; 957
|