在 x86 32位系统下,进程的虚拟地址空间为 232 (4G)大小,其中在windows系统下4G地址空间中0x00000000-0x7FFFFFFF 是用户地址空间,0x80000000-0xFFFFFFFF 是内核空间。在Linux系统下,0xC00000000-0XFFFFFFFF为系统空间,为所有的系统进程所共享,0X00000000-0XBFFFFFFF为用户空间。本文主要研究在Linux系统下的虚拟地址空间。
我们所写的程序都在磁盘上存放,而在运行时向内存中加载的只有指令和数据。而这些指令和数据都不可能直接加载到真实的内存中,而是加载到虚拟地址空间中。每个进制都有自己的虚拟地址空间,并且结构相同都被划分出若干段。其中,用户空间是每个进程私有的,而内核空间是所有进程共享的。
Linux用户进程分段存储内容
Section | 属性 | 存储内容 |
---|---|---|
栈 | 局部变量、const局部常量、函数参数、返回地址等 | |
堆 | 动态分配的内存 | |
BSS段 | 可读;可写 | 未初始化/初始化为0的静态变量/全局变量 |
数据段 | 可读;可写 | 初始化为~0的静态变量/全局变量 |
代码段 | 只读;可执行 | 可执行代码、常量(字符串常量;const全局常量;enum常量;#define常量等) |
下面通过一段代码来初步了解虚拟地址空间。
#include <iostream>
int gdata1 = 10; // 已初始化的全局变量
int gdata2 = 0; // 初始化为0的全局变量
int gdata3; // 未初始化的全局变量
static int gdata4 = 11; // 静态、已初始化的变量
static int gdata5 = 0; // 静态、初始化为0的变量
static int gdata6; // 静态、未初始化的变量
int main()
{
int a = 12; // 已初始化的局部变量
int b = 0; // 初始化为0的局部变量
int c; // 未初始化的局部变量
static int e = 13; // 局部、静态、已初始化
static int f = 0; // 局部、静态、初始化为0
static int g; // 局部、静态、未初始化
return 0;
}
在上述代码中:
gdata1——gdata6 为全局变量,最终是以数据的形式被加载到内存中,并添加到符号表中。其中:
- 未初始化或初始化未0的全局变量存放于 .bss 段
- 已初始化的全局变量存放于 .data 段
在main 函数内部
a,b,c 三个局部变量最终以指令的形式被加载到内存中,不会添加到符号表中。如:mov dword ptr[a], 0ch
指令 ,存放于 .text段,而在该指令运行时会在栈上开辟出一块空间用于存放a的值,因此我们也说局部变量在栈上开辟空间。
e,f,g 静态的局部变量存放于数据段,在程序运行至该行代码时进行初始化。因此,e存放 .data 段,f,g存放于 .bss 段。
查看文件段表
在Linux系统下可执行文件为ELF格式,可以通过readlef -S a.out
或 objdump -h a.out
查看我们之前写的程序的ELF文件的段表
$ objdump -h a.out
a.out: file format elf64-x86-64
Sections:
Idx Name Size VMA LMA File off Algn
0 .interp 0000001c 0000000000400238 0000000000400238 00000238 2**0
CONTENTS, ALLOC, LOAD, READONLY, DATA
1 .note.ABI-tag 00000020 0000000000400254 0000000000400254 00000254 2**2
CONTENTS, ALLOC, LOAD, READONLY, DATA
2 .note.gnu.build-id 00000024 0000000000400274 0000000000400274 00000274 2**2
CONTENTS, ALLOC, LOAD, READONLY, DATA
3 .gnu.hash 00000024 0000000000400298 0000000000400298 00000298 2**3
CONTENTS, ALLOC, LOAD, READONLY, DATA
4 .dynsym 00000090 00000000004002c0 00000000004002c0 000002c0 2**3
CONTENTS, ALLOC, LOAD, READONLY, DATA
5 .dynstr 00000090 0000000000400350 0000000000400350 00000350 2**0
CONTENTS, ALLOC, LOAD, READONLY, DATA
6 .gnu.version 0000000c 00000000004003e0 00000000004003e0 000003e0 2**1
CONTENTS, ALLOC, LOAD, READONLY, DATA
7 .gnu.version_r 00000040 00000000004003f0 00000000004003f0 000003f0 2**3
CONTENTS, ALLOC, LOAD, READONLY, DATA
8 .rela.dyn 00000018 0000000000400430 0000000000400430 00000430 2**3
CONTENTS, ALLOC, LOAD, READONLY, DATA
9 .rela.plt 00000078 0000000000400448 0000000000400448 00000448 2**3
CONTENTS, ALLOC, LOAD, READONLY, DATA
10 .init 0000001a 00000000004004c0 00000000004004c0 000004c0 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
11 .plt 00000060 00000000004004e0 00000000004004e0 000004e0 2**4
CONTENTS, ALLOC, LOAD, READONLY, CODE
12 .text 000001d2 0000000000400540 0000000000400540 00000540 2**4
CONTENTS, ALLOC, LOAD, READONLY, CODE
13 .fini 00000009 0000000000400714 0000000000400714 00000714 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
14 .rodata 00000010 0000000000400720 0000000000400720 00000720 2**3
CONTENTS, ALLOC, LOAD, READONLY, DATA
15 .eh_frame_hdr 00000044 0000000000400730 0000000000400730 00000730 2**2
CONTENTS, ALLOC, LOAD, READONLY, DATA
16 .eh_frame 00000134 0000000000400778 0000000000400778 00000778 2**3
CONTENTS, ALLOC, LOAD, READONLY, DATA
17 .init_array 00000010 0000000000600df8 0000000000600df8 00000df8 2**3
CONTENTS, ALLOC, LOAD, DATA
18 .fini_array 00000008 0000000000600e08 0000000000600e08 00000e08 2**3
CONTENTS, ALLOC, LOAD, DATA
19 .jcr 00000008 0000000000600e10 0000000000600e10 00000e10 2**3
CONTENTS, ALLOC, LOAD, DATA
20 .dynamic 000001e0 0000000000600e18 0000000000600e18 00000e18 2**3
CONTENTS, ALLOC, LOAD, DATA
21 .got 00000008 0000000000600ff8 0000000000600ff8 00000ff8 2**3
CONTENTS, ALLOC, LOAD, DATA
22 .got.plt 00000040 0000000000601000 0000000000601000 00001000 2**3
CONTENTS, ALLOC, LOAD, DATA
23 .data 00000010 0000000000601040 0000000000601040 00001040 2**2
CONTENTS, ALLOC, LOAD, DATA
24 .bss 00000020 0000000000601050 0000000000601050 00001050 2**2
ALLOC
25 .comment 0000002d 0000000000000000 0000000000000000 00001050 2**0
CONTENTS, READONLY
表中的每一列分别对应 section的大小、虚拟地址(Virtual Memory Address)、装载地址(Load Memory Address),文件偏移。
查看符号表
通过readelf -s a.out
或objdump -t a.out
查看该应用程序ELF文件的符号表
a.out: file format elf64-x86-64
SYMBOL TABLE:
0000000000400238 l d .interp 0000000000000000 .interp
0000000000400254 l d .note.ABI-tag 0000000000000000 .note.ABI-tag
0000000000400274 l d .note.gnu.build-id 0000000000000000 .note.gnu.build-id
0000000000400298 l d .gnu.hash 0000000000000000 .gnu.hash
00000000004002c0 l d .dynsym 0000000000000000 .dynsym
0000000000400350 l d .dynstr 0000000000000000 .dynstr
00000000004003e0 l d .gnu.version 0000000000000000 .gnu.version
00000000004003f0 l d .gnu.version_r 0000000000000000 .gnu.version_r
0000000000400430 l d .rela.dyn 0000000000000000 .rela.dyn
0000000000400448 l d .rela.plt 0000000000000000 .rela.plt
00000000004004c0 l d .init 0000000000000000 .init
00000000004004e0 l d .plt 0000000000000000 .plt
0000000000400540 l d .text 0000000000000000 .text
0000000000400714 l d .fini 0000000000000000 .fini
0000000000400720 l d .rodata 0000000000000000 .rodata
0000000000400730 l d .eh_frame_hdr 0000000000000000 .eh_frame_hdr
0000000000400778 l d .eh_frame 0000000000000000 .eh_frame
0000000000600df8 l d .init_array 0000000000000000 .init_array
0000000000600e08 l d .fini_array 0000000000000000 .fini_array
0000000000600e10 l d .jcr 0000000000000000 .jcr
0000000000600e18 l d .dynamic 0000000000000000 .dynamic
0000000000600ff8 l d .got 0000000000000000 .got
0000000000601000 l d .got.plt 0000000000000000 .got.plt
0000000000601040 l d .data 0000000000000000 .data
0000000000601050 l d .bss 0000000000000000 .bss
0000000000000000 l d .comment 0000000000000000 .comment
0000000000000000 l df *ABS* 0000000000000000 crtstuff.c
0000000000600e10 l O .jcr 0000000000000000 __JCR_LIST__
0000000000400570 l F .text 0000000000000000 deregister_tm_clones
00000000004005a0 l F .text 0000000000000000 register_tm_clones
00000000004005e0 l F .text 0000000000000000 __do_global_dtors_aux
0000000000601050 l O .bss 0000000000000001 completed.6355
0000000000600e08 l O .fini_array 0000000000000000 __do_global_dtors_aux_fini_array_entry
0000000000400600 l F .text 0000000000000000 frame_dummy
0000000000600df8 l O .init_array 0000000000000000 __frame_dummy_init_array_entry
0000000000000000 l df *ABS* 0000000000000000 a.cpp
000000000060105c l O .bss 0000000000000001 _ZStL8__ioinit
0000000000601048 l O .data 0000000000000004 _ZL6gdata4
0000000000601060 l O .bss 0000000000000004 _ZL6gdata5
0000000000601064 l O .bss 0000000000000004 _ZL6gdata6
0000000000400641 l F .text 000000000000003d _Z41__static_initialization_and_destruction_0ii
000000000040067e l F .text 0000000000000015 _GLOBAL__sub_I_gdata1
0000000000601068 l O .bss 0000000000000004 _ZZ4mainE1g
000000000060106c l O .bss 0000000000000004 _ZZ4mainE1f
000000000060104c l O .data 0000000000000004 _ZZ4mainE1e
0000000000000000 l df *ABS* 0000000000000000 crtstuff.c
00000000004008a8 l O .eh_frame 0000000000000000 __FRAME_END__
0000000000600e10 l O .jcr 0000000000000000 __JCR_END__
0000000000000000 l df *ABS* 0000000000000000
0000000000400730 l .eh_frame_hdr 0000000000000000 __GNU_EH_FRAME_HDR
0000000000601000 l O .got.plt 0000000000000000 _GLOBAL_OFFSET_TABLE_
0000000000600e08 l .init_array 0000000000000000 __init_array_end
0000000000600df8 l .init_array 0000000000000000 __init_array_start
0000000000600e18 l O .dynamic 0000000000000000 _DYNAMIC
0000000000601040 w .data 0000000000000000 data_start
0000000000400710 g F .text 0000000000000002 __libc_csu_fini
0000000000400540 g F .text 0000000000000000 _start
0000000000000000 w *UND* 0000000000000000 __gmon_start__
0000000000601054 g O .bss 0000000000000004 gdata2
0000000000400714 g F .fini 0000000000000000 _fini
0000000000000000 F *UND* 0000000000000000 _ZNSt8ios_base4InitC1Ev@@GLIBCXX_3.4
0000000000000000 F *UND* 0000000000000000 __libc_start_main@@GLIBC_2.2.5
0000000000000000 F *UND* 0000000000000000 __cxa_atexit@@GLIBC_2.2.5
0000000000400530 F *UND* 0000000000000000 _ZNSt8ios_base4InitD1Ev@@GLIBCXX_3.4
0000000000400720 g O .rodata 0000000000000004 _IO_stdin_used
0000000000601040 g .data 0000000000000000 __data_start
0000000000601044 g O .data 0000000000000004 gdata1
0000000000601050 g O .data 0000000000000000 .hidden __TMC_END__
0000000000400728 g O .rodata 0000000000000000 .hidden __dso_handle
00000000004006a0 g F .text 0000000000000065 __libc_csu_init
0000000000601050 g .bss 0000000000000000 __bss_start
0000000000601070 g .bss 0000000000000000 _end
0000000000601050 g .data 0000000000000000 _edata
0000000000601058 g O .bss 0000000000000004 gdata3
000000000040062d g F .text 0000000000000014 main
00000000004004c0 g F .init 0000000000000000 _init
其中,我们可以看到gdata1-6 以及 e,f,g所属的分段,
0000000000601044 g O .data 0000000000000004 gdata1
0000000000601054 g O .bss 0000000000000004 gdata2
0000000000601058 g O .bss 0000000000000004 gdata3
0000000000601048 l O .data 0000000000000004 _ZL6gdata4
0000000000601060 l O .bss 0000000000000004 _ZL6gdata5
0000000000601064 l O .bss 0000000000000004 _ZL6gdata6
0000000000601068 l O .bss 0000000000000004 _ZZ4mainE1g
000000000060106c l O .bss 0000000000000004 _ZZ4mainE1f
000000000060104c l O .data 0000000000000004 _ZZ4mainE1e
我们所写的程序,不管时C语言或是C++语言最终编译器都会先转化为汇编指令,而后再转化成机器指令存储在磁盘上。在我们运行程序时,把程序的指令和数据加载到虚拟内存空间中,然后根据内存中分段的偏移映射到物理内存上执行。