1/* 2 * Copyright (c) 2011-2013 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Copyright (c) 2003-2005 The Regents of The University of Michigan 15 * All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are 19 * met: redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer; 21 * redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution; 24 * neither the name of the copyright holders nor the names of its 25 * contributors may be used to endorse or promote products derived from 26 * this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Authors: Steve Reinhardt 41 * Ali Saidi 42 */ 43 44#include "base/loader/elf_object.hh" 45 46#include <fcntl.h> 47#include <sys/mman.h> 48#include <sys/stat.h> 49#include <sys/types.h> 50#include <unistd.h> 51 52#include <cassert> 53#include <string> 54 55#include "base/bitfield.hh" 56#include "base/loader/symtab.hh" 57#include "base/logging.hh" 58#include "base/trace.hh" 59#include "debug/Loader.hh" 60#include "gelf.h" 61#include "sim/byteswap.hh" 62 63ObjectFile * 64ElfObject::tryFile(const std::string &fname, size_t len, uint8_t *data, 65 bool skip_interp_check) 66{ 67 // check that header matches library version 68 if (elf_version(EV_CURRENT) == EV_NONE) 69 panic("wrong elf version number!"); 70 71 // get a pointer to elf structure 72 // Check that we actually have a elf file 73 Elf *elf = elf_memory((char*)data, len); 74 assert(elf); 75 76 GElf_Ehdr ehdr; 77 if (gelf_getehdr(elf, &ehdr) == 0) { 78 DPRINTFR(Loader, "Not ELF\n"); 79 elf_end(elf); 80 return NULL; 81 } 82 83 // Detect the architecture 84 Arch arch = UnknownArch; 85 if (ehdr.e_machine == EM_SPARC64 || 86 (ehdr.e_machine == EM_SPARC && 87 ehdr.e_ident[EI_CLASS] == ELFCLASS64) || 88 ehdr.e_machine == EM_SPARCV9) { 89 arch = SPARC64; 90 } else if (ehdr.e_machine == EM_SPARC32PLUS || 91 (ehdr.e_machine == EM_SPARC && 92 ehdr.e_ident[EI_CLASS] == ELFCLASS32)) { 93 arch = SPARC32; 94 } else if (ehdr.e_machine == EM_MIPS && 95 ehdr.e_ident[EI_CLASS] == ELFCLASS32) { 96 arch = Mips; 97 if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) { 98 fatal("The binary you're trying to load is compiled for big " 99 "endian MIPS. gem5\nonly supports little endian MIPS. " 100 "Please recompile your binary.\n"); 101 } 102 } else if (ehdr.e_machine == EM_X86_64 && 103 ehdr.e_ident[EI_CLASS] == ELFCLASS64) { 104 arch = X86_64; 105 } else if (ehdr.e_machine == EM_386 && 106 ehdr.e_ident[EI_CLASS] == ELFCLASS32) { 107 arch = I386; 108 } else if (ehdr.e_machine == EM_ARM && 109 ehdr.e_ident[EI_CLASS] == ELFCLASS32) { 110 arch = bits(ehdr.e_entry, 0) ? Thumb : Arm; 111 } else if (ehdr.e_machine == EM_AARCH64 && 112 ehdr.e_ident[EI_CLASS] == ELFCLASS64) { 113 arch = Arm64; 114 } else if (ehdr.e_machine == EM_RISCV) { 115 arch = (ehdr.e_ident[EI_CLASS] == ELFCLASS64) ? Riscv64 : Riscv32; 116 } else if (ehdr.e_machine == EM_PPC && 117 ehdr.e_ident[EI_CLASS] == ELFCLASS32) { 118 arch = Power; 119 if (ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 120 fatal("The binary you're trying to load is compiled for " 121 "little endian Power.\ngem5 only supports big " 122 "endian Power. Please recompile your binary.\n"); 123 } 124 } else if (ehdr.e_machine == EM_PPC64) { 125 fatal("The binary you're trying to load is compiled for 64-bit " 126 "Power. M5\n only supports 32-bit Power. Please " 127 "recompile your binary.\n"); 128 } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) { 129 // Since we don't know how to check for alpha right now, we'll 130 // just assume if it wasn't something else and it's 64 bit, that's 131 // what it must be. 132 arch = Alpha; 133 } else { 134 warn("Unknown architecture: %d\n", ehdr.e_machine); 135 arch = UnknownArch; 136 } 137 138 // Detect the operating system 139 OpSys op_sys; 140 switch (ehdr.e_ident[EI_OSABI]) { 141 case ELFOSABI_LINUX: 142 op_sys = Linux; 143 break; 144 case ELFOSABI_SOLARIS: 145 op_sys = Solaris; 146 break; 147 case ELFOSABI_TRU64: 148 op_sys = Tru64; 149 break; 150 case ELFOSABI_ARM: 151 op_sys = LinuxArmOABI; 152 break; 153 case ELFOSABI_FREEBSD: 154 op_sys = FreeBSD; 155 break; 156 default: 157 op_sys = UnknownOpSys; 158 } 159 160 // Take a look at the .note.ABI section. 161 // It can let us know what's what. 162 if (op_sys == UnknownOpSys) { 163 int sec_idx = 1; 164 165 // Get the first section 166 Elf_Scn *section = elf_getscn(elf, sec_idx); 167 168 // While there are no more sections 169 while (section && op_sys == UnknownOpSys) { 170 GElf_Shdr shdr; 171 gelf_getshdr(section, &shdr); 172 173 char *e_str = elf_strptr(elf, ehdr.e_shstrndx, shdr.sh_name); 174 if (shdr.sh_type == SHT_NOTE && 175 !strcmp(".note.ABI-tag", e_str)) { 176 // we have found a ABI note section 177 // Check the 5th 32bit word for OS 0 == linux, 1 == hurd, 178 // 2 == solaris, 3 == freebsd 179 Elf_Data *raw_data = elf_rawdata(section, NULL); 180 assert(raw_data && raw_data->d_buf); 181 182 uint32_t raw_abi = ((uint32_t*)raw_data->d_buf)[4]; 183 bool is_le = ehdr.e_ident[EI_DATA] == ELFDATA2LSB; 184 uint32_t os_abi = is_le ? htole(raw_abi) : htobe(raw_abi); 185 186 switch (os_abi) { 187 case 0: 188 op_sys = Linux; 189 break; 190 case 1: 191 fatal("gem5 does not support the HURD ABI.\n"); 192 case 2: 193 op_sys = Solaris; 194 break; 195 case 3: 196 op_sys = FreeBSD; 197 break; 198 } 199 } // if section found 200 201 if (!strcmp(".SUNW_version", e_str) || 202 !strcmp(".stab.index", e_str)) 203 op_sys = Solaris; 204 205 section = elf_getscn(elf, ++sec_idx); 206 } // while sections 207 } 208 209 ElfObject * result = new ElfObject(fname, len, data, arch, op_sys); 210 211 // The number of headers in the file 212 result->_programHeaderCount = ehdr.e_phnum; 213 // Record the size of each entry 214 result->_programHeaderSize = ehdr.e_phentsize; 215 result->_programHeaderTable = 0; 216 if (result->_programHeaderCount) { // If there is a program header table 217 // Figure out the virtual address of the header table in the 218 // final memory image. We use the program headers themselves 219 // to translate from a file offset to the address in the image. 220 GElf_Phdr phdr; 221 uint64_t e_phoff = ehdr.e_phoff; 222 223 for (int i = 0; i < result->_programHeaderCount; i++) { 224 gelf_getphdr(elf, i, &phdr); 225 // Check if we've found the segment with the headers in it 226 if (phdr.p_offset <= e_phoff && 227 phdr.p_offset + phdr.p_filesz > e_phoff) { 228 result->_programHeaderTable = 229 phdr.p_paddr + (e_phoff - phdr.p_offset); 230 break; 231 } 232 } 233 } 234 235 if (!skip_interp_check) { 236 for (int i = 0; i < ehdr.e_phnum; i++) { 237 GElf_Phdr phdr; 238 M5_VAR_USED void *check_p = gelf_getphdr(elf, i, &phdr); 239 assert(check_p != nullptr); 240 241 if (phdr.p_type != PT_INTERP) 242 continue; 243 244 char *interp_path = (char*)data + phdr.p_offset; 245 int fd = open(interp_path, O_RDONLY); 246 if (fd == -1) 247 fatal("Unable to open dynamic executable's interpreter.\n"); 248 249 struct stat sb; 250 M5_VAR_USED int check_i = fstat(fd, &sb); 251 assert(check_i == 0); 252 253 void *mm = mmap(nullptr, sb.st_size, PROT_READ, 254 MAP_PRIVATE, fd, 0); 255 assert(mm != MAP_FAILED); 256 close(fd); 257 258 uint8_t *interp_image = (uint8_t*)mm; 259 ObjectFile *obj = tryFile(interp_path, sb.st_size, 260 interp_image, true); 261 assert(obj != nullptr); 262 result->interpreter = dynamic_cast<ElfObject*>(obj); 263 assert(result->interpreter != nullptr); 264 break; 265 } 266 } 267 268 elf_end(elf); 269 return result; 270} 271 272ElfObject::ElfObject(const std::string &_filename, size_t _len, 273 uint8_t *_data, Arch _arch, OpSys _op_sys) 274 : ObjectFile(_filename, _len, _data, _arch, _op_sys), 275 _programHeaderTable(0), _programHeaderSize(0), _programHeaderCount(0), 276 interpreter(nullptr), ldBias(0), relocate(true), 277 ldMin(std::numeric_limits<Addr>::max()), 278 ldMax(std::numeric_limits<Addr>::min()) 279{ 280 // check that header matches library version 281 if (elf_version(EV_CURRENT) == EV_NONE) 282 panic("wrong elf version number!"); 283 284 // get a pointer to elf structure 285 Elf *elf = elf_memory((char*)fileData,len); 286 assert(elf); 287 288 // Check that we actually have a elf file 289 GElf_Ehdr ehdr; 290 if (gelf_getehdr(elf, &ehdr) ==0) { 291 panic("Not ELF, shouldn't be here"); 292 } 293 294 entry = ehdr.e_entry; 295 296 // initialize segment sizes to 0 in case they're not present 297 text.size = data.size = bss.size = 0; 298 text.baseAddr = data.baseAddr = bss.baseAddr = 0; 299 300 int sec_idx = 1; 301 302 // The first address of some important sections. 303 Addr text_sec_start = 0; 304 Addr data_sec_start = 0; 305 Addr bss_sec_start = 0; 306 307 // Get the first section 308 Elf_Scn *section = elf_getscn(elf, sec_idx); 309 310 // Find the beginning of the most interesting sections. 311 while (section) { 312 GElf_Shdr shdr; 313 gelf_getshdr(section, &shdr); 314 char *sec_name = elf_strptr(elf, ehdr.e_shstrndx, shdr.sh_name); 315 316 if (sec_name) { 317 if (!strcmp(".text", sec_name)) { 318 text_sec_start = shdr.sh_addr; 319 } else if (!strcmp(".data", sec_name)) { 320 data_sec_start = shdr.sh_addr; 321 } else if (!strcmp(".bss", sec_name)) { 322 bss_sec_start = shdr.sh_addr; 323 } 324 } else { 325 Elf_Error errorNum = (Elf_Error)elf_errno(); 326 if (errorNum != ELF_E_NONE) { 327 const char *errorMessage = elf_errmsg(errorNum); 328 fatal("Error from libelf: %s.\n", errorMessage); 329 } 330 } 331 332 section = elf_getscn(elf, ++sec_idx); 333 } 334 335 // Go through all the segments in the program, record them, and scrape 336 // out information about the text, data, and bss areas needed by other 337 // code. 338 for (int i = 0; i < ehdr.e_phnum; ++i) { 339 GElf_Phdr phdr; 340 if (gelf_getphdr(elf, i, &phdr) == 0) { 341 panic("gelf_getphdr failed for segment %d.", i); 342 } 343 344 // for now we don't care about non-loadable segments 345 if (!(phdr.p_type & PT_LOAD)) 346 continue; 347 348 ldMin = std::min(ldMin, phdr.p_vaddr); 349 ldMax = std::max(ldMax, phdr.p_vaddr + phdr.p_memsz); 350 351 // Check to see if this segment contains the bss section. 352 if (phdr.p_paddr <= bss_sec_start && 353 phdr.p_paddr + phdr.p_memsz > bss_sec_start && 354 phdr.p_memsz - phdr.p_filesz > 0) { 355 bss.baseAddr = phdr.p_paddr + phdr.p_filesz; 356 bss.size = phdr.p_memsz - phdr.p_filesz; 357 bss.fileImage = NULL; 358 } 359 360 // Check to see if this is the text or data segment 361 if (phdr.p_vaddr <= text_sec_start && 362 phdr.p_vaddr + phdr.p_filesz > text_sec_start) { 363 364 // If this value is nonzero, we need to flip the relocate flag. 365 if (phdr.p_vaddr != 0) 366 relocate = false; 367 368 text.baseAddr = phdr.p_paddr; 369 text.size = phdr.p_filesz; 370 text.fileImage = fileData + phdr.p_offset; 371 } else if (phdr.p_vaddr <= data_sec_start && 372 phdr.p_vaddr + phdr.p_filesz > data_sec_start) { 373 data.baseAddr = phdr.p_paddr; 374 data.size = phdr.p_filesz; 375 data.fileImage = fileData + phdr.p_offset; 376 } else { 377 // If it's none of the above but is loadable, 378 // load the filesize worth of data 379 Segment extra; 380 extra.baseAddr = phdr.p_paddr; 381 extra.size = phdr.p_filesz; 382 extra.fileImage = fileData + phdr.p_offset; 383 extraSegments.push_back(extra); 384 } 385 } 386 387 // should have found at least one loadable segment 388 warn_if(text.size == 0, 389 "Empty .text segment in '%s'. ELF file corrupted?\n", 390 filename); 391 392 DPRINTFR(Loader, "text: 0x%x %d\ndata: 0x%x %d\nbss: 0x%x %d\n", 393 text.baseAddr, text.size, data.baseAddr, data.size, 394 bss.baseAddr, bss.size); 395 396 elf_end(elf); 397 398 // We will actually read the sections when we need to load them 399} 400 401 402bool 403ElfObject::loadSomeSymbols(SymbolTable *symtab, int binding, Addr mask, 404 Addr base, Addr offset) 405{ 406 if (!symtab) 407 return false; 408 409 // check that header matches library version 410 if (elf_version(EV_CURRENT) == EV_NONE) 411 panic("wrong elf version number!"); 412 413 // get a pointer to elf structure 414 Elf *elf = elf_memory((char*)fileData,len); 415 assert(elf != NULL); 416 417 // Get the first section 418 int sec_idx = 1; // there is a 0 but it is nothing, go figure 419 Elf_Scn *section = elf_getscn(elf, sec_idx); 420 421 // While there are no more sections 422 bool found = false; 423 while (section != NULL) { 424 GElf_Shdr shdr; 425 gelf_getshdr(section, &shdr); 426 427 if (shdr.sh_type == SHT_SYMTAB) { 428 found = true; 429 Elf_Data *data = elf_getdata(section, NULL); 430 int count = shdr.sh_size / shdr.sh_entsize; 431 DPRINTF(Loader, "Found Symbol Table, %d symbols present\n", count); 432 433 // loop through all the symbols, only loading global ones 434 for (int i = 0; i < count; ++i) { 435 GElf_Sym sym; 436 gelf_getsym(data, i, &sym); 437 if (GELF_ST_BIND(sym.st_info) == binding) { 438 char *sym_name = elf_strptr(elf, shdr.sh_link, sym.st_name); 439 if (sym_name && sym_name[0] != '$') { 440 Addr value = sym.st_value - base + offset; 441 if (symtab->insert(value & mask, sym_name)) { 442 DPRINTF(Loader, "Symbol: %-40s value %#x\n", 443 sym_name, value); 444 } 445 } 446 } 447 } 448 } 449 ++sec_idx; 450 section = elf_getscn(elf, sec_idx); 451 } 452 453 elf_end(elf); 454 455 return found; 456} 457 458bool 459ElfObject::loadAllSymbols(SymbolTable *symtab, Addr base, Addr offset, 460 Addr addr_mask) 461{ 462 return (loadGlobalSymbols(symtab, base, offset, addr_mask) && 463 loadLocalSymbols(symtab, base, offset, addr_mask) && 464 loadWeakSymbols(symtab, base, offset, addr_mask)); 465} 466 467bool 468ElfObject::loadGlobalSymbols(SymbolTable *symtab, Addr base, Addr offset, 469 Addr addr_mask) 470{ 471 if (interpreter) { 472 interpreter->loadSomeSymbols(symtab, STB_GLOBAL, addr_mask, 473 base, offset); 474 } 475 return loadSomeSymbols(symtab, STB_GLOBAL, addr_mask, base, offset); 476} 477 478bool 479ElfObject::loadLocalSymbols(SymbolTable *symtab, Addr base, Addr offset, 480 Addr addr_mask) 481{ 482 if (interpreter) { 483 interpreter->loadSomeSymbols(symtab, STB_LOCAL, addr_mask, 484 base, offset); 485 } 486 return loadSomeSymbols(symtab, STB_LOCAL, addr_mask, base, offset); 487} 488 489bool 490ElfObject::loadWeakSymbols(SymbolTable *symtab, Addr base, Addr offset, 491 Addr addr_mask) 492{ 493 if (interpreter) { 494 interpreter->loadSomeSymbols(symtab, STB_WEAK, addr_mask, 495 base, offset); 496 } 497 return loadSomeSymbols(symtab, STB_WEAK, addr_mask, base, offset); 498} 499 500bool 501ElfObject::loadSections(const PortProxy& mem_proxy, Addr addr_mask, 502 Addr offset) 503{ 504 if (!ObjectFile::loadSections(mem_proxy, addr_mask, offset)) 505 return false; 506 507 for (auto seg : extraSegments) { 508 if (!loadSection(&seg, mem_proxy, addr_mask, offset)) { 509 return false; 510 } 511 } 512 513 if (interpreter) 514 interpreter->loadSections(mem_proxy, addr_mask, offset); 515 516 return true; 517} 518 519void 520ElfObject::getSections() 521{ 522 assert(!sectionNames.size()); 523 524 // check that header matches library version 525 if (elf_version(EV_CURRENT) == EV_NONE) 526 panic("wrong elf version number!"); 527 528 // get a pointer to elf structure 529 Elf *elf = elf_memory((char*)fileData,len); 530 assert(elf != NULL); 531 532 // Check that we actually have a elf file 533 GElf_Ehdr ehdr; 534 if (gelf_getehdr(elf, &ehdr) ==0) { 535 panic("Not ELF, shouldn't be here"); 536 } 537 538 // Get the first section 539 int sec_idx = 1; // there is a 0 but it is nothing, go figure 540 Elf_Scn *section = elf_getscn(elf, sec_idx); 541 542 // While there are no more sections 543 while (section) { 544 GElf_Shdr shdr; 545 gelf_getshdr(section, &shdr); 546 sectionNames.insert(elf_strptr(elf, ehdr.e_shstrndx, shdr.sh_name)); 547 section = elf_getscn(elf, ++sec_idx); 548 } // while sections 549 550 elf_end(elf); 551} 552 553bool 554ElfObject::sectionExists(std::string sec) 555{ 556 if (!sectionNames.size()) 557 getSections(); 558 559 return sectionNames.find(sec) != sectionNames.end(); 560} 561 562 563void 564ElfObject::updateBias(Addr bias_addr) 565{ 566 // Record the bias. 567 ldBias = bias_addr; 568 569 // Patch the entry point with bias_addr. 570 entry += bias_addr; 571 572 // Patch segments with the bias_addr. 573 text.baseAddr += bias_addr; 574 data.baseAddr += bias_addr; 575 bss.baseAddr += bias_addr; 576 for (auto &segment : extraSegments) 577 segment.baseAddr += bias_addr; 578} 579