浅谈c语言中的字符串

浅谈c语言中的字符串

写在前面：最近MM问了我一个问题——字符串在内存中位于哪里？我想当然是位于数据段(data segment)。她又问，那怎么保证它只读呢？我答不上了。有些问题，看似简单，背后却隐藏着天机，后来查了一些资料，对C语言的字符串有了一个全新的认识。但是不是终极认识，最近实在是太忙了，以后再重来认识它吧。进入正文：

注：segment和section两个单词经常混用，在这里我对运行时用segment，对编译时用section。

(1)表示形式
字符数组形式:char str[]="hustcat";
字符指针形式:char *str="hustcat";

(2)sizeof与strlen
sizeof("hustcat")=8; sizeof("")=1,而不是0;
strlen("hustcat")=7; strlent("")=0

(3)单字符字符串常量与字符常量的值大不相同
int x=(int)"A";
得到的结果是将x初始化成指向包含'A'和'"0'两个字符的内存块的指针。
int y=(int)'A';
得到的结果是y=0x41

(4)读写性
char p1[]="always writable"; //p1[0]='1' ,OK
char *p2="possibly not writable"; //p2[0]='1',在vc++ 6.0和gcc下都会出现运行时错误(段错误)
const char p3[]="never writable";//p3[0]='1',总是会造成编译时错误

(5)标准c允许实现对包含相同字符串的两个字符串常量使用同一存储空间。
char *str1,*str2
int main()
{
   str1="hustcat";
   str2="hustcat";
   if(str1==str2)
       printf("strings are shared."n");
   else
       printf("strings are not shared."n");
   return 0;
}
在vc++ 6.0和gcc下输出：
strings are shared.

(6)编译时与运行时

字符串常量编译之后位于哪里?文本区(text section)，数据区(data section)还是bss区?

下面来看几个实际的程序：

以下基于gcc 1.40,输出的可执行文件格式为a.out格式:

第一个程序：

---------c程序--------------------
#include <stdio.h>
int main()
{
        char *ptr="1111";
        int a=12;
        return 0;
}
---------汇编输出-----------------
        .file   "hello.c"
gcc_compiled.:
.text
LC0:
        .ascii "1111\0"
        .align 2
.globl _main
_main:
        pushl %ebp
        movl %esp,%ebp
        subl $8,%esp
        movl $LC0,-4(%ebp)
        movl $12,-8(%ebp)
        xorl %eax,%eax
        jmp L1
        .align 2
L1:
        leave
        ret
---------相应信息----------------------
[/usr/root]# objdump -h hello.o
hello.o:
magic: 0x107 (407)machine type: 0flags: 0x0text 0x24 data 0x0 bss 0x0
nsyms 2 entry 0x0 trsize 0x8 drsize 0x0
[/usr/root]# objdump -h hello
hello:
magic: 0x10b (413)machine type: 0flags: 0x0text 0x1000 data 0x1000 bss 0x0
nsyms 20 entry 0x0 trsize 0x0 drsize 0x0

可以看出，在a.out格式目标文件中,字符串常量位于代码区。

第二个程序:

----------c程序---------------------
#include <stdio.h>
char *p="2222";
int main()
{
        char *ptr="1111";
        int a=12;
        return 0;
}
---------汇编输出------------------
.file   "hello.c"
gcc_compiled.:
.globl _p
.text
LC0:
        .ascii "2222\0"
.data
        .align 2
_p:
        .long LC0
.text
LC1:
        .ascii "1111\0"
        .align 2
.globl _main
_main:
        pushl %ebp
        movl %esp,%ebp
        subl $8,%esp
        movl $LC1,-4(%ebp)
        movl $12,-8(%ebp)
        xorl %eax,%eax
        jmp L1
    .align 2
L1:
    leave
    ret
-----------相应信息-------------------
[/usr/root]# objdump -h hello.o
hello.o:
magic: 0x107 (407)machine type: 0flags: 0x0text 0x28 data 0x4 bss 0x0
nsyms 3 entry 0x0 trsize 0x8 drsize 0x8
[/usr/root]# objdump -h hello
hello:
magic: 0x10b (413)machine type: 0flags: 0x0text 0x1000 data 0x1000 bss 0x0
nsyms 20 entry 0x0 trsize 0x0 drsize 0x0

可以知道，全局变量p位于data section,而字符串仍然位于text section。

第三个程序：

----------c程序-----------------
#include <stdio.h>
char *p1="2222";
char *p2="2222";
char str1[]="3333";
int main()
{
        char *ptr="1111";
        char str2[]="4444";
        int a=12;
        return 0;
}
-----------汇编输出---------------
    .file   "hello.c"
gcc_compiled.:
.globl _p1
.text
LC0:
        .ascii "2222\0"
.data
        .align 2
_p1:
        .long LC0   //p1和p2使用同一存储空间
.globl _p2
        .align 2
_p2:
        .long LC0
.globl _str1
_str1:
        .ascii "3333\0"  //str1所包含的字符串位于data区
.text
LC1:
        .ascii "1111\0"
LC2:
        .ascii "4444\0"
        .align 2
.globl _main
_main:
        pushl %ebp
        movl %esp,%ebp
        subl $16,%esp
        movl $LC1,-4(%ebp)
        leal -12(%ebp),%eax
        movl %eax,%eax
        movl LC2,%edx
        movl %edx,-12(%ebp)
        movb LC2+4,%dl
    movb %dl,-8(%ebp)
        movl $12,-16(%ebp)
        xorl %eax,%eax
        jmp L1
        .align 2
L1:
        leave
        ret

可以知道,p1和p2使用同一存储空间,str1以及它的字符串都位于data section。

但是对于gcc 3.2.2生成的ELF(Executable and Linking Format)目标文件,结果又不同。
来看一个例子:

------------C程序----------------------------------
#include <stdio.h>
char *str1,*str2;
int main()
{
    str1="abcd";
    str2="abcd";
    if (str1==str2)
        printf("string are shared.\n");
    else
        printf("not shared.\n");
    str1[0]='1';
    if (*str1=='1')
        printf("writable.\n");
    else
        printf("not writable.\n");
    return 0;
}
-----------汇编输出--------------------------------
    .file    "test.c"
    .section    .rodata
.LC0:
    .string    "abcd"   //字符串位于rodata section
.LC1:
    .string    "string are shared.\n"
.LC2:
    .string    "not shared.\n"
.LC3:
    .string    "writable.\n"
.LC4:
    .string    "not writable.\n"
    .text
.globl main
    .type    main,@function
main:
    pushl    %ebp
    movl    %esp, %ebp
    subl    $8, %esp
    andl    $-16, %esp
    movl    $0, %eax
    subl    %eax, %esp
    movl    $.LC0, str1
    movl    $.LC0, str2
    movl    str1, %eax
    cmpl    str2, %eax
    jne    .L2
    subl    $12, %esp
    pushl    $.LC1
    call    printf
    addl    $16, %esp
    jmp    .L3
.L2:
    subl    $12, %esp
    pushl    $.LC2
    call    printf
    addl    $16, %esp
.L3:
    movl    str1, %eax
    movb    $49, (%eax)
    movl    str1, %eax
    cmpb    $49, (%eax)
    jne    .L4
    subl    $12, %esp
    pushl    $.LC3
    call    printf
    addl    $16, %esp
    jmp    .L5
.L4:
    subl    $12, %esp
    pushl    $.LC4
    call    printf
    addl    $16, %esp
.L5:
    movl    $0, %eax
    leave
    ret
.Lfe1:
    .size    main,.Lfe1-main
    .comm    str1,4,4
    .comm    str2,4,4
    .ident    "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
----------------相应信息------------------------------------
[root@localhost devlop]# objdump -h test.o

test.o:     file format elf32-i386

Sections:
Idx Name          Size      VMA       LMA       File off  Algn
  0 .text         0000008e  00000000  00000000  00000034  2**2
                  CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
  1 .data         00000000  00000000  00000000  000000c4  2**2
                  CONTENTS, ALLOC, LOAD, DATA
  2 .bss          00000000  00000000  00000000  000000c4  2**2
                  ALLOC
  3 .rodata       00000040  00000000  00000000  000000c4  2**0
                  CONTENTS, ALLOC, LOAD, READONLY, DATA
  4 .comment      00000033  00000000  00000000  00000104  2**0
                  CONTENTS, READONLY

从上可知，在ELF格式中,字符串常量位于rodata section，该区是只读的。

下面来对比一下a.out格式和ELF格式:

a.out头部
int a_magic; // 幻数
int a_text;   // 文本段大小
int a_data;   // 初始化的数据段大小
int a_bss;    // 未初始化的数据段大
int a_syms;   // 符号表大小
int a_entry; // 入口点
int a_trsize; // 文本重定位段大小
int a_drsize; // 数据重定位段大小

ELF文件头部
.text
.data
.rodata
.bss
.sym
.rel.text
.rel.data
.rel.rodata
.line
.debug
.strtab

总的来说，在编译时，字符串的编译后所处的位置与具体的编译器(目标文件格式)相关；
              运行时，则与具体的操作系统(可执行文件格式)和加载器的实现相关。
相关阅读:
Error: unable to connect to node rabbit@mail: nodedown
flash上传控件跨域
 Nginx配置指定媒体类型文件强制下载
 Nginx反向代理配置可跨域
 日志：using the Connector/J connection property 'autoReconnect=true' to avoid this problem
web 安全（一）--web简介及前端技术（js）
Android 自动化之元素定位 xpath
Wireshark入门
 自动化测试工具
 Android App 专项测试--压力测试篇（三）
原文地址：https://www.cnblogs.com/hustcat/p/1453338.html