本文用于记录对QEMU对ELF文件加载函数进行分析。根据“函数使用->函数定义->函数实现->函数实现的分析”的顺序进行分析,最终提取出ELF文件加载的代码。
1. load_elf
mips malta中,对load_elf的使用如下:
if (load_elf(loaderparams.kernel_filename, cpu_mips_kseg0_to_phys, NULL, (uint64_t *)&kernel_entry, NULL, (uint64_t *)&kernel_high, big_endian, ELF_MACHINE, 1) < 0) { fprintf(stderr, "qemu: could not load kernel '%s' ", loaderparams.kernel_filename); exit(1); }
load_elf在头文件include/hw/loader.h中,函数定义如下:
int load_elf(const char *filename, uint64_t (*translate_fn)(void *, uint64_t), void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, uint64_t *highaddr, int big_endian, int elf_machine, int clear_lsb);
load_elf的具体实现在hw/core/loader.c中,函数体如下:
int load_elf(const char *filename, uint64_t (*translate_fn)(void *, uint64_t), void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr, uint64_t *highaddr, int big_endian, int elf_machine, int clear_lsb) { int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED; uint8_t e_ident[EI_NIDENT]; fd = open(filename, O_RDONLY | O_BINARY); if (fd < 0) { perror(filename); return -1; } if (read(fd, e_ident, sizeof(e_ident)) != sizeof(e_ident)) goto fail; if (e_ident[0] != ELFMAG0 || e_ident[1] != ELFMAG1 || e_ident[2] != ELFMAG2 || e_ident[3] != ELFMAG3) { ret = ELF_LOAD_NOT_ELF; goto fail; } #ifdef HOST_WORDS_BIGENDIAN data_order = ELFDATA2MSB; #else data_order = ELFDATA2LSB; #endif must_swab = data_order != e_ident[EI_DATA]; if (big_endian) { target_data_order = ELFDATA2MSB; } else { target_data_order = ELFDATA2LSB; } if (target_data_order != e_ident[EI_DATA]) { ret = ELF_LOAD_WRONG_ENDIAN; goto fail; } lseek(fd, 0, SEEK_SET); if (e_ident[EI_CLASS] == ELFCLASS64) { ret = load_elf64(filename, fd, translate_fn, translate_opaque, must_swab, pentry, lowaddr, highaddr, elf_machine, clear_lsb); } else { ret = load_elf32(filename, fd, translate_fn, translate_opaque, must_swab, pentry, lowaddr, highaddr, elf_machine, clear_lsb); } fail: close(fd); return ret; }
在load_elf中,对elf文件进行读取分析的核心函数为load_elf64和load_elf32,下面将把它们进行展开。
2、load_elf64/load_elf32
load_elf64和load_elf32是通过glue(load_elf, SZ)来进行定义的,所在文件include/hw/elf_ops.h,具体函数如下:
static int glue(load_elf, SZ)(const char *name, int fd, uint64_t (*translate_fn)(void *, uint64_t), void *translate_opaque, int must_swab, uint64_t *pentry, uint64_t *lowaddr, uint64_t *highaddr, int elf_machine, int clear_lsb) { struct elfhdr ehdr; struct elf_phdr *phdr = NULL, *ph; int size, i, total_size; elf_word mem_size, file_size; uint64_t addr, low = (uint64_t)-1, high = 0; uint8_t *data = NULL; char label[128]; int ret = ELF_LOAD_FAILED; if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr)) goto fail; if (must_swab) { glue(bswap_ehdr, SZ)(&ehdr); } switch (elf_machine) { case EM_PPC64: if (EM_PPC64 != ehdr.e_machine) if (EM_PPC != ehdr.e_machine) { ret = ELF_LOAD_WRONG_ARCH; goto fail; } break; case EM_X86_64: if (EM_X86_64 != ehdr.e_machine) if (EM_386 != ehdr.e_machine) { ret = ELF_LOAD_WRONG_ARCH; goto fail; } break; case EM_MICROBLAZE: if (EM_MICROBLAZE != ehdr.e_machine) if (EM_MICROBLAZE_OLD != ehdr.e_machine) { ret = ELF_LOAD_WRONG_ARCH; goto fail; } break; default: if (elf_machine != ehdr.e_machine) { ret = ELF_LOAD_WRONG_ARCH; goto fail; } } if (pentry) *pentry = (uint64_t)(elf_sword)ehdr.e_entry; glue(load_symbols, SZ)(&ehdr, fd, must_swab, clear_lsb); size = ehdr.e_phnum * sizeof(phdr[0]); if (lseek(fd, ehdr.e_phoff, SEEK_SET) != ehdr.e_phoff) { goto fail; } phdr = g_malloc0(size); if (!phdr) goto fail; if (read(fd, phdr, size) != size) goto fail; if (must_swab) { for(i = 0; i < ehdr.e_phnum; i++) { ph = &phdr[i]; glue(bswap_phdr, SZ)(ph); } } total_size = 0; for(i = 0; i < ehdr.e_phnum; i++) { ph = &phdr[i]; if (ph->p_type == PT_LOAD) { mem_size = ph->p_memsz; /* Size of the ROM */ file_size = ph->p_filesz; /* Size of the allocated data */ data = g_malloc0(file_size); if (ph->p_filesz > 0) { if (lseek(fd, ph->p_offset, SEEK_SET) < 0) { goto fail; } if (read(fd, data, file_size) != file_size) { goto fail; } } /* address_offset is hack for kernel images that are linked at the wrong physical address. */ if (translate_fn) { addr = translate_fn(translate_opaque, ph->p_paddr); glue(elf_reloc, SZ)(&ehdr, fd, must_swab, translate_fn, translate_opaque, data, ph, elf_machine); } else { addr = ph->p_paddr; } /* the entry pointer in the ELF header is a virtual * address, if the text segments paddr and vaddr differ * we need to adjust the entry */ if (pentry && !translate_fn && ph->p_vaddr != ph->p_paddr && ehdr.e_entry >= ph->p_vaddr && ehdr.e_entry < ph->p_vaddr + ph->p_filesz && ph->p_flags & PF_X) { *pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr; } snprintf(label, sizeof(label), "phdr #%d: %s", i, name); /* rom_add_elf_program() seize the ownership of 'data' */ rom_add_elf_program(label, data, file_size, mem_size, addr); total_size += mem_size; if (addr < low) low = addr; if ((addr + mem_size) > high) high = addr + mem_size; data = NULL; } } g_free(phdr); if (lowaddr) *lowaddr = (uint64_t)(elf_sword)low; if (highaddr) *highaddr = (uint64_t)(elf_sword)high; return total_size; fail: g_free(data); g_free(phdr); return ret; }
其中,glue在文件include/qemu/compiler.h中,定义如下:
#ifndef glue #define xglue(x, y) x ## y #define glue(x, y) xglue(x, y) #define stringify(s) tostring(s) #define tostring(s) #s #endif
根据定义,我们可以知道,“glue(load_elf, SZ)”经过展开,会变成“load_elfSZ”,当SZ为32和64的时候,结果就是load_elf32和load_elf64了。
在文件hw/core/loader.c中,
#define SZ 32 #define elf_word uint32_t #define elf_sword int32_t #define bswapSZs bswap32s #include "hw/elf_ops.h" #undef elfhdr #undef elf_phdr #undef elf_shdr #undef elf_sym #undef elf_rela #undef elf_note #undef elf_word #undef elf_sword #undef bswapSZs #undef SZ #define elfhdr elf64_hdr #define elf_phdr elf64_phdr #define elf_note elf64_note #define elf_shdr elf64_shdr #define elf_sym elf64_sym #define elf_rela elf64_rela #define elf_word uint64_t #define elf_sword int64_t #define bswapSZs bswap64s #define SZ 64 #include "hw/elf_ops.h"
我们可以看到,loader.c在包涵elf_ops.h的时候,先对SZ等进行了宏定义。通过两次宏定义和包含,就得到了load_elf32和load_elf64。
在glue(load_elf, SZ)中,使用了以下4个函数:
glue(bswap_ehdr, SZ)(&ehdr); glue(load_symbols, SZ)(&ehdr, fd, must_swab, clear_lsb); glue(bswap_phdr, SZ)(ph); glue(elf_reloc, SZ)(&ehdr, fd, must_swab, translate_fn, translate_opaque, data, ph, elf_machine);
它们都在文件include/hw/elf_ops.h中。elf_ops.h中的其它函数也会在解析elf文件时用到,所以我们将elf_ops.h全部贴出来
3、 include/hw/elf_ops.h,文件内容如下:
static void glue(bswap_ehdr, SZ)(struct elfhdr *ehdr) { bswap16s(&ehdr->e_type); /* Object file type */ bswap16s(&ehdr->e_machine); /* Architecture */ bswap32s(&ehdr->e_version); /* Object file version */ bswapSZs(&ehdr->e_entry); /* Entry point virtual address */ bswapSZs(&ehdr->e_phoff); /* Program header table file offset */ bswapSZs(&ehdr->e_shoff); /* Section header table file offset */ bswap32s(&ehdr->e_flags); /* Processor-specific flags */ bswap16s(&ehdr->e_ehsize); /* ELF header size in bytes */ bswap16s(&ehdr->e_phentsize); /* Program header table entry size */ bswap16s(&ehdr->e_phnum); /* Program header table entry count */ bswap16s(&ehdr->e_shentsize); /* Section header table entry size */ bswap16s(&ehdr->e_shnum); /* Section header table entry count */ bswap16s(&ehdr->e_shstrndx); /* Section header string table index */ } static void glue(bswap_phdr, SZ)(struct elf_phdr *phdr) { bswap32s(&phdr->p_type); /* Segment type */ bswapSZs(&phdr->p_offset); /* Segment file offset */ bswapSZs(&phdr->p_vaddr); /* Segment virtual address */ bswapSZs(&phdr->p_paddr); /* Segment physical address */ bswapSZs(&phdr->p_filesz); /* Segment size in file */ bswapSZs(&phdr->p_memsz); /* Segment size in memory */ bswap32s(&phdr->p_flags); /* Segment flags */ bswapSZs(&phdr->p_align); /* Segment alignment */ } static void glue(bswap_shdr, SZ)(struct elf_shdr *shdr) { bswap32s(&shdr->sh_name); bswap32s(&shdr->sh_type); bswapSZs(&shdr->sh_flags); bswapSZs(&shdr->sh_addr); bswapSZs(&shdr->sh_offset); bswapSZs(&shdr->sh_size); bswap32s(&shdr->sh_link); bswap32s(&shdr->sh_info); bswapSZs(&shdr->sh_addralign); bswapSZs(&shdr->sh_entsize); } static void glue(bswap_sym, SZ)(struct elf_sym *sym) { bswap32s(&sym->st_name); bswapSZs(&sym->st_value); bswapSZs(&sym->st_size); bswap16s(&sym->st_shndx); } static void glue(bswap_rela, SZ)(struct elf_rela *rela) { bswapSZs(&rela->r_offset); bswapSZs(&rela->r_info); bswapSZs((elf_word *)&rela->r_addend); } static struct elf_shdr *glue(find_section, SZ)(struct elf_shdr *shdr_table, int n, int type) { int i; for(i=0;i<n;i++) { if (shdr_table[i].sh_type == type) return shdr_table + i; } return NULL; } static int glue(symfind, SZ)(const void *s0, const void *s1) { hwaddr addr = *(hwaddr *)s0; struct elf_sym *sym = (struct elf_sym *)s1; int result = 0; if (addr < sym->st_value) { result = -1; } else if (addr >= sym->st_value + sym->st_size) { result = 1; } return result; } static const char *glue(lookup_symbol, SZ)(struct syminfo *s, hwaddr orig_addr) { struct elf_sym *syms = glue(s->disas_symtab.elf, SZ); struct elf_sym *sym; sym = bsearch(&orig_addr, syms, s->disas_num_syms, sizeof(*syms), glue(symfind, SZ)); if (sym != NULL) { return s->disas_strtab + sym->st_name; } return ""; } static int glue(symcmp, SZ)(const void *s0, const void *s1) { struct elf_sym *sym0 = (struct elf_sym *)s0; struct elf_sym *sym1 = (struct elf_sym *)s1; return (sym0->st_value < sym1->st_value) ? -1 : ((sym0->st_value > sym1->st_value) ? 1 : 0); } static int glue(load_symbols, SZ)(struct elfhdr *ehdr, int fd, int must_swab, int clear_lsb) { struct elf_shdr *symtab, *strtab, *shdr_table = NULL; struct elf_sym *syms = NULL; struct syminfo *s; int nsyms, i; char *str = NULL; shdr_table = load_at(fd, ehdr->e_shoff, sizeof(struct elf_shdr) * ehdr->e_shnum); if (!shdr_table) return -1; if (must_swab) { for (i = 0; i < ehdr->e_shnum; i++) { glue(bswap_shdr, SZ)(shdr_table + i); } } symtab = glue(find_section, SZ)(shdr_table, ehdr->e_shnum, SHT_SYMTAB); if (!symtab) goto fail; syms = load_at(fd, symtab->sh_offset, symtab->sh_size); if (!syms) goto fail; nsyms = symtab->sh_size / sizeof(struct elf_sym); i = 0; while (i < nsyms) { if (must_swab) glue(bswap_sym, SZ)(&syms[i]); /* We are only interested in function symbols. Throw everything else away. */ if (syms[i].st_shndx == SHN_UNDEF || syms[i].st_shndx >= SHN_LORESERVE || ELF_ST_TYPE(syms[i].st_info) != STT_FUNC) { nsyms--; if (i < nsyms) { syms[i] = syms[nsyms]; } continue; } if (clear_lsb) { /* The bottom address bit marks a Thumb or MIPS16 symbol. */ syms[i].st_value &= ~(glue(glue(Elf, SZ), _Addr))1; } i++; } syms = g_realloc(syms, nsyms * sizeof(*syms)); qsort(syms, nsyms, sizeof(*syms), glue(symcmp, SZ)); for (i = 0; i < nsyms - 1; i++) { if (syms[i].st_size == 0) { syms[i].st_size = syms[i + 1].st_value - syms[i].st_value; } } /* String table */ if (symtab->sh_link >= ehdr->e_shnum) goto fail; strtab = &shdr_table[symtab->sh_link]; str = load_at(fd, strtab->sh_offset, strtab->sh_size); if (!str) goto fail; /* Commit */ s = g_malloc0(sizeof(*s)); s->lookup_symbol = glue(lookup_symbol, SZ); glue(s->disas_symtab.elf, SZ) = syms; s->disas_num_syms = nsyms; s->disas_strtab = str; s->next = syminfos; syminfos = s; g_free(shdr_table); return 0; fail: g_free(syms); g_free(str); g_free(shdr_table); return -1; } static int glue(elf_reloc, SZ)(struct elfhdr *ehdr, int fd, int must_swab, uint64_t (*translate_fn)(void *, uint64_t), void *translate_opaque, uint8_t *data, struct elf_phdr *ph, int elf_machine) { struct elf_shdr *reltab, *shdr_table = NULL; struct elf_rela *rels = NULL; int nrels, i, ret = -1; elf_word wordval; void *addr; shdr_table = load_at(fd, ehdr->e_shoff, sizeof(struct elf_shdr) * ehdr->e_shnum); if (!shdr_table) { return -1; } if (must_swab) { for (i = 0; i < ehdr->e_shnum; i++) { glue(bswap_shdr, SZ)(&shdr_table[i]); } } reltab = glue(find_section, SZ)(shdr_table, ehdr->e_shnum, SHT_RELA); if (!reltab) { goto fail; } rels = load_at(fd, reltab->sh_offset, reltab->sh_size); if (!rels) { goto fail; } nrels = reltab->sh_size / sizeof(struct elf_rela); for (i = 0; i < nrels; i++) { if (must_swab) { glue(bswap_rela, SZ)(&rels[i]); } if (rels[i].r_offset < ph->p_vaddr || rels[i].r_offset >= ph->p_vaddr + ph->p_filesz) { continue; } addr = &data[rels[i].r_offset - ph->p_vaddr]; switch (elf_machine) { case EM_S390: switch (rels[i].r_info) { case R_390_RELATIVE: wordval = *(elf_word *)addr; if (must_swab) { bswapSZs(&wordval); } wordval = translate_fn(translate_opaque, wordval); if (must_swab) { bswapSZs(&wordval); } *(elf_word *)addr = wordval; break; default: fprintf(stderr, "Unsupported relocation type %i! ", (int)rels[i].r_info); } } } ret = 0; fail: g_free(rels); g_free(shdr_table); return ret; } static int glue(load_elf, SZ)(const char *name, int fd, uint64_t (*translate_fn)(void *, uint64_t), void *translate_opaque, int must_swab, uint64_t *pentry, uint64_t *lowaddr, uint64_t *highaddr, int elf_machine, int clear_lsb) { struct elfhdr ehdr; struct elf_phdr *phdr = NULL, *ph; int size, i, total_size; elf_word mem_size, file_size; uint64_t addr, low = (uint64_t)-1, high = 0; uint8_t *data = NULL; char label[128]; int ret = ELF_LOAD_FAILED; if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr)) goto fail; if (must_swab) { glue(bswap_ehdr, SZ)(&ehdr); } switch (elf_machine) { case EM_PPC64: if (EM_PPC64 != ehdr.e_machine) if (EM_PPC != ehdr.e_machine) { ret = ELF_LOAD_WRONG_ARCH; goto fail; } break; case EM_X86_64: if (EM_X86_64 != ehdr.e_machine) if (EM_386 != ehdr.e_machine) { ret = ELF_LOAD_WRONG_ARCH; goto fail; } break; case EM_MICROBLAZE: if (EM_MICROBLAZE != ehdr.e_machine) if (EM_MICROBLAZE_OLD != ehdr.e_machine) { ret = ELF_LOAD_WRONG_ARCH; goto fail; } break; default: if (elf_machine != ehdr.e_machine) { ret = ELF_LOAD_WRONG_ARCH; goto fail; } } if (pentry) *pentry = (uint64_t)(elf_sword)ehdr.e_entry; glue(load_symbols, SZ)(&ehdr, fd, must_swab, clear_lsb); size = ehdr.e_phnum * sizeof(phdr[0]); if (lseek(fd, ehdr.e_phoff, SEEK_SET) != ehdr.e_phoff) { goto fail; } phdr = g_malloc0(size); if (!phdr) goto fail; if (read(fd, phdr, size) != size) goto fail; if (must_swab) { for(i = 0; i < ehdr.e_phnum; i++) { ph = &phdr[i]; glue(bswap_phdr, SZ)(ph); } } total_size = 0; for(i = 0; i < ehdr.e_phnum; i++) { ph = &phdr[i]; if (ph->p_type == PT_LOAD) { mem_size = ph->p_memsz; /* Size of the ROM */ file_size = ph->p_filesz; /* Size of the allocated data */ data = g_malloc0(file_size); if (ph->p_filesz > 0) { if (lseek(fd, ph->p_offset, SEEK_SET) < 0) { goto fail; } if (read(fd, data, file_size) != file_size) { goto fail; } } /* address_offset is hack for kernel images that are linked at the wrong physical address. */ if (translate_fn) { addr = translate_fn(translate_opaque, ph->p_paddr); glue(elf_reloc, SZ)(&ehdr, fd, must_swab, translate_fn, translate_opaque, data, ph, elf_machine); } else { addr = ph->p_paddr; } /* the entry pointer in the ELF header is a virtual * address, if the text segments paddr and vaddr differ * we need to adjust the entry */ if (pentry && !translate_fn && ph->p_vaddr != ph->p_paddr && ehdr.e_entry >= ph->p_vaddr && ehdr.e_entry < ph->p_vaddr + ph->p_filesz && ph->p_flags & PF_X) { *pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr; } snprintf(label, sizeof(label), "phdr #%d: %s", i, name); /* rom_add_elf_program() seize the ownership of 'data' */ rom_add_elf_program(label, data, file_size, mem_size, addr); total_size += mem_size; if (addr < low) low = addr; if ((addr + mem_size) > high) high = addr + mem_size; data = NULL; } } g_free(phdr); if (lowaddr) *lowaddr = (uint64_t)(elf_sword)low; if (highaddr) *highaddr = (uint64_t)(elf_sword)high; return total_size; fail: g_free(data); g_free(phdr); return ret; }
其中,bswap16s在文件include/qemu/bswap.h中,其定义如下:
static inline void bswap16s(uint16_t *s) { *s = bswap16(*s); } static inline uint16_t bswap16(uint16_t x) { return bswap_16(x); }
4、load_at
load_at在hw/core/loader.c中,其定义如下:
static void *load_at(int fd, off_t offset, size_t size) { void *ptr; if (lseek(fd, offset, SEEK_SET) < 0) return NULL; ptr = g_malloc(size); if (read(fd, ptr, size) != size) { g_free(ptr); return NULL; } return ptr; }
5、ELF文件格式:关于ELF文件格式的内容摘自百度百科
在计算机科学中,是一种用于二进制文件、可执行文件、目标代码、共享库和核心转储的标准文件格式。 是UNIX系统实验室(USL)作为应用程序二进制接口(Application Binary Interface,ABI)而开发和发布的,也是Linux的主要可执行文件格式。 1999年,被86open项目选为x86架构上的类Unix操作系统的二进制文件标准格式,用来取代COFF。因其可扩展性与灵活性,也可应用在其它处理器、计算机系统架构的操作系统上。
ELF文件由4部分组成,分别是ELF头(ELF header)、程序头表(Program header table)、节(Section)和节头表(Section header table)。实际上,一个文件中不一定包含全部内容,而且他们的位置也未必如同所示这样安排,只有ELF头的位置是固定的,其余各部分的位置、大小等信息有ELF头中的各项值来决定。