int 0x80 系统调用
1.用户怎么调用?(建议是间接方式,方便移植)系统如何使用?(直接方式)
2.调用后处理过程?
3.系统怎么设置的?
===============================
1在用户层面来讲,有2中方式,直接和间接
间接,我们使用c库函数,比如 int read(int fd,char *buf,int n);
直接,我们直接使用所要使用功能函数对应的系统调用,//lin/include/unistd.h中,比如上边对应read
_syscall3(int, read, int, fd, char *, buf, int, n);//这个是宏
172 #define _syscall3(type,name,atype,a,btype,b,ctype,c) 173 type name(atype a,btype b,ctype c) 174 { 175 long __res; 176 __asm__ volatile ("int $0x80" 177 : "=a" (__res) 178 : "" (__NR_##name),"b" ((long)(a)),"c" ((long)(b)),"d" ((long)(c))); 179 if (__res>=0) 180 return (type) __res; 181 errno=-__res; 182 return -1; 183 } 184
--------------------------------------------------
2调用后处理过程
上边的2个方式调用,最终以下边方式实现
int read(int fd,char *buf,int n)
{
long _res;
_asm_ volatile (
"int $0x80"
: "=a" (_res)
: "0" (_NR_read), "b" ((long)(fd)),"c"((long)(buf)),"d"((long)(n)));
if(_res>=0)
return int _res;
errno=-_res;
return -1;
}
//最终调用形式在c中,内嵌汇编 参数值4个分别放在eax(对应的系统功能号),ebx,ecx,edx,调用int 0x80;
-----
int 0x80的做了什么
++++++++++++++++++++++++
先看看怎么来的...
//在lin/kernel/sched.c中411行sched_init()中
从这句 set_system_gate(0x80,&system_call);
可以看出
1.system_call实现了系统调用处理过程 (处理过程system_call //lin/kernel/system_call.s)
2.int 0x80中断 的对应
+++++++++
system_call()做了什么 //lin/includ/linux/sys.h
1.处理中断前设置环境的过程
2.找到实际处理在入口
3.返回包括最后其他的一些处理,(这里不是重点)
++++++++++++++++++++++++++++++++
处理的入口
call _sys_call_table(,%eax,4)//lin/includ/linux/sys.h
----------------处理函数在哪里
--其中小部分函数在对应 //lin/kernel/sys.c中,大部分分布在相关的文件中
-----------------------------
比如read()会找到,这里的extern int sys_read(); 函数来处理
//sys_read()实现在lin/fs/read_write.c 55-81行
都会在这里处理,这里是具体的实现
----------------------------------------
-当然这里针对不同类型文件,还有对应的pipe,chr,blk,dir具体的处理方法
--------------------------------------------
55 int sys_read(unsigned int fd,char * buf,int count) 56 { 57 struct file * file; 58 struct m_inode * inode; 59 60 if (fd>=NR_OPEN || count<0 || !(file=current->filp[fd])) 61 return -EINVAL; 62 if (!count) 63 return 0; 64 verify_area(buf,count); 65 inode = file->f_inode; 66 if (inode->i_pipe) 67 return (file->f_mode&1)?read_pipe(inode,buf,count):-EIO; 68 if (S_ISCHR(inode->i_mode)) 69 return rw_char(READ,inode->i_zone[0],buf,count,&file->f_pos); 70 if (S_ISBLK(inode->i_mode)) 71 return block_read(inode->i_zone[0],&file->f_pos,buf,count); 72 if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode)) { 73 if (count+file->f_pos > inode->i_size) 74 count = inode->i_size - file->f_pos; 75 if (count<=0) 76 return 0; 77 return file_read(inode,file,buf,count); 78 } 79 printk("(Read)inode->i_mode=%06o ",inode->i_mode); 80 return -EINVAL; 81 }
//这里假设是对blk相关
则系统会调用block_read()在 lin/fs/block_dev.c中47-73行,代码如下
47 int block_read(int dev, unsigned long * pos, char * buf, int count) 48 { 49 int block = *pos >> BLOCK_SIZE_BITS; 50 int offset = *pos & (BLOCK_SIZE-1); 51 int chars; 52 int read = 0; 53 struct buffer_head * bh; 54 register char * p; 55 56 while (count>0) { 57 chars = BLOCK_SIZE-offset; 58 if (chars > count) 59 chars = count; 60 if (!(bh = breada(dev,block,block+1,block+2,-1))) 61 return read?read:-EIO; 62 block++; 63 p = offset + bh->b_data; 64 offset = 0; 65 *pos += chars; 66 read += chars; 67 count -= chars; 68 while (chars-->0) 69 put_fs_byte(*(p++),buf++); 70 brelse(bh); 71 } 72 return read; 73 } 74
在这个中间还有少量调用,会有具体代码实现,比如put_fs_byte();靠汇编实在来的
完整的调用过程示意....
--------------------------------------------------------
3.系统的设置(实现int 0x80)
---------------------------接上边2
---set_system_gate(0x80,&system_call);
-------------------------------
这个如何做到的
实现了系统调用 (处理过程system_call //lin/linux/kernel/system_call.s)和 int 0x80中断 的对应
+++++++++++++++++++++++++
系统调用,在linux 0.11中有3中 中断 异常 系统调用 描述符都在idt表中
系统调用只用了0x80 一个中断号 用eax实现不同调用子过程
++++++++++++++++++++++++++
============================================
====一下是跟具体的
================================================
//lin/include/asm/system.h
//段代码是用来设置 中断 异常 系统调用 ,可以看出它们都在idt表中
22 #define _set_gate(gate_addr,type,dpl,addr) 23 __asm__ ("movw %%dx,%%ax " 24 "movw %0,%%dx " 25 "movl %%eax,%1 " 26 "movl %%edx,%2" 27 : 28 : "i" ((short) (0x8000+(dpl<<13)+(type<<8))), 29 "o" (*((char *) (gate_addr))), 30 "o" (*(4+(char *) (gate_addr))), 31 "d" ((char *) (addr)),"a" (0x00080000))
//以上段代码把 中断 异常 系统调用,都汇集到这里处理,而且都在idt表中 32 33 #define set_intr_gate(n,addr) 34 _set_gate(&idt[n],14,0,addr)
//4个参数分别是 idt表中的偏移地址,14是type指示了系统类型(描述符系统类型),0是dpl,addr是处理程序地址偏移 35 36 #define set_trap_gate(n,addr) 37 _set_gate(&idt[n],15,0,addr)
//可以看到异常和系统调用除了编号不一样 int n;就是用户级别不一样了 38 39 #define set_system_gate(n,addr) 40 _set_gate(&idt[n],15,3,addr)
//n对应int n,系统调用是用户级,用户级别的程序都可以调用 41
//lin/include/kernel/sched.c
//411 int 0x80 放在idt表中
385 void sched_init(void) 386 { 387 int i; 388 struct desc_struct * p; 389 390 if (sizeof(struct sigaction) != 16) 391 panic("Struct sigaction MUST be 16 bytes"); 392 set_tss_desc(gdt+FIRST_TSS_ENTRY,&(init_task.task.tss)); 393 set_ldt_desc(gdt+FIRST_LDT_ENTRY,&(init_task.task.ldt)); 394 p = gdt+2+FIRST_TSS_ENTRY; 395 for(i=1;i<NR_TASKS;i++) { 396 task[i] = NULL; 397 p->a=p->b=0; 398 p++; 399 p->a=p->b=0; 400 p++; 401 } 402 /* Clear NT, so that we won't have troubles with that later on */ 403 __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); 404 ltr(0); 405 lldt(0); 406 outb_p(0x36,0x43); /* binary, mode 3, LSB/MSB, ch 0 */ 407 outb_p(LATCH & 0xff , 0x40); /* LSB */ 408 outb(LATCH >> 8 , 0x40); /* MSB */ 409 set_intr_gate(0x20,&timer_interrupt); 410 outb(inb_p(0x21)&~0x01,0x21); 411 set_system_gate(0x80,&system_call);
//411行实现了系统调用 和 int 0x80 的对应,0x80放在idt表中的位置,指向system_call处理 412 } 413
//lin/kernel/system_call.s
//这里是汇编所以_system_call ,和c中的system_call对应
//上边的代码指向了_system_call处理系统调用80-128,它用堆栈其他随便详细说明,其他部分暂时不看
/*
------
| ss|
| esp|
|eflags|
| cs|
| eip|以上5个int 0x80 自动压入,一定有ss ,esp是系统调用用户态;而中断服务都是在内核态
| ds|
| es|
| fs|
| edx|
| ecx|
| ebx|以上6个sys_call_table前压入
*/
1 /* 2 * linux/kernel/system_call.s 3 * 4 * (C) 1991 Linus Torvalds 5 */ 6 7 /* 8 * system_call.s contains the system-call low-level handling routines. 9 * This also contains the timer-interrupt handler, as some of the code is 10 * the same. The hd- and flopppy-interrupts are also here. 11 * 12 * NOTE: This code handles signal-recognition, which happens every time 13 * after a timer-interrupt and after each system call. Ordinary interrupts 14 * don't handle signal-recognition, as that would clutter them up totally 15 * unnecessarily. 16 * 17 * Stack layout in 'ret_from_system_call': 18 * 19 * 0(%esp) - %eax 20 * 4(%esp) - %ebx 21 * 8(%esp) - %ecx 22 * C(%esp) - %edx 23 * 10(%esp) - %fs 24 * 14(%esp) - %es 25 * 18(%esp) - %ds 26 * 1C(%esp) - %eip 27 * 20(%esp) - %cs 28 * 24(%esp) - %eflags 29 * 28(%esp) - %oldesp 30 * 2C(%esp) - %oldss 31 */ 32 33 SIG_CHLD = 17 34 35 EAX = 0x00 36 EBX = 0x04 37 ECX = 0x08 38 EDX = 0x0C 39 FS = 0x10 40 ES = 0x14 41 DS = 0x18 42 EIP = 0x1C 43 CS = 0x20 44 EFLAGS = 0x24 45 OLDESP = 0x28 46 OLDSS = 0x2C 47 48 state = 0 # these are offsets into the task-struct. 49 counter = 4 50 priority = 8 51 signal = 12 52 sigaction = 16 # MUST be 16 (=len of sigaction) 53 blocked = (33*16) 54 55 # offsets within sigaction 56 sa_handler = 0 57 sa_mask = 4 58 sa_flags = 8 59 sa_restorer = 12 60 61 nr_system_calls = 72 62 63 /* 64 * Ok, I get parallel printer interrupts while using the floppy for some 65 * strange reason. Urgel. Now I just ignore them. 66 */ 67 .globl _system_call,_sys_fork,_timer_interrupt,_sys_execve 68 .globl _hd_interrupt,_floppy_interrupt,_parallel_interrupt 69 .globl _device_not_available, _coprocessor_error 70 71 .align 2 72 bad_sys_call: 73 movl $-1,%eax 74 iret 75 .align 2 76 reschedule: 77 pushl $ret_from_sys_call 78 jmp _schedule 79 .align 2 80 _system_call:
//因为int 0x80 到这里,所以用内核态堆栈要压入 ss esp eflags cs eip 5个 81 cmpl $nr_system_calls-1,%eax
//在数组中nr_system_calls 减 1
//eax中是对应的系统号(子程序编号)在lin/include/linux/sys.h中有对应编号,
//http://www.cnblogs.com/caesarxu/p/3261218.html
82 ja bad_sys_call 83 push %ds 84 push %es 85 push %fs 86 pushl %edx 87 pushl %ecx # push %ebx,%ecx,%edx as parameters 88 pushl %ebx # to the system call
//内核态堆栈要压入 ds es fs edx ecx ebx 6个,这里是参数 89 movl $0x10,%edx # set up ds,es to kernel space 90 mov %dx,%ds 91 mov %dx,%es 92 movl $0x17,%edx # fs points to local data space 93 mov %dx,%fs 94 call _sys_call_table(,%eax,4)
//暂时多压入一个 地址95行,调用对应编号中的函数sys_xxxxx,在相应的函数中返回,函数处理中对应的函数会认为是none
95 pushl %eax
//压入eax 是保护它在121弹出 96 movl _current,%eax 97 cmpl $0,state(%eax) # state 98 jne reschedule 99 cmpl $0,counter(%eax) # counter 100 je reschedule 101 ret_from_sys_call: 102 movl _current,%eax # task[0] cannot have signals 103 cmpl _task,%eax 104 je 3f 105 cmpw $0x0f,CS(%esp) # was old code segment supervisor ? 106 jne 3f 107 cmpw $0x17,OLDSS(%esp) # was stack segment = 0x17 ? 108 jne 3f 109 movl signal(%eax),%ebx 110 movl blocked(%eax),%ecx 111 notl %ecx 112 andl %ebx,%ecx 113 bsfl %ecx,%ecx 114 je 3f 115 btrl %ecx,%ebx 116 movl %ebx,signal(%eax) 117 incl %ecx 118 pushl %ecx 119 call _do_signal 120 popl %eax 121 3: popl %eax 122 popl %ebx 123 popl %ecx 124 popl %edx 125 pop %fs 126 pop %es 127 pop %ds 128 iret 129 130 .align 2 131 _coprocessor_error:
// 132 push %ds 133 push %es 134 push %fs 135 pushl %edx 136 pushl %ecx 137 pushl %ebx 138 pushl %eax 139 movl $0x10,%eax 140 mov %ax,%ds 141 mov %ax,%es 142 movl $0x17,%eax 143 mov %ax,%fs 144 pushl $ret_from_sys_call 145 jmp _math_error 146 147 .align 2 148 _device_not_available: 149 push %ds 150 push %es 151 push %fs 152 pushl %edx 153 pushl %ecx 154 pushl %ebx 155 pushl %eax 156 movl $0x10,%eax 157 mov %ax,%ds 158 mov %ax,%es 159 movl $0x17,%eax 160 mov %ax,%fs 161 pushl $ret_from_sys_call 162 clts # clear TS so that we can use math 163 movl %cr0,%eax 164 testl $0x4,%eax # EM (math emulation bit) 165 je _math_state_restore 166 pushl %ebp 167 pushl %esi 168 pushl %edi 169 call _math_emulate 170 popl %edi 171 popl %esi 172 popl %ebp 173 ret 174 175 .align 2 176 _timer_interrupt: 177 push %ds # save ds,es and put kernel data space 178 push %es # into them. %fs is used by _system_call 179 push %fs 180 pushl %edx # we save %eax,%ecx,%edx as gcc doesn't 181 pushl %ecx # save those across function calls. %ebx 182 pushl %ebx # is saved as we use that in ret_sys_call 183 pushl %eax 184 movl $0x10,%eax 185 mov %ax,%ds 186 mov %ax,%es 187 movl $0x17,%eax 188 mov %ax,%fs 189 incl _jiffies 190 movb $0x20,%al # EOI to interrupt controller #1 191 outb %al,$0x20 192 movl CS(%esp),%eax 193 andl $3,%eax # %eax is CPL (0 or 3, 0=supervisor) 194 pushl %eax 195 call _do_timer # 'do_timer(long CPL)' does everything from 196 addl $4,%esp # task switching to accounting ... 197 jmp ret_from_sys_call 198 199 .align 2 200 _sys_execve:
//可能会call这里来 201 lea EIP(%esp),%eax 202 pushl %eax 203 call _do_execve 204 addl $4,%esp 205 ret 206 207 .align 2 208 _sys_fork:
//这里也有可能的 209 call _find_empty_process 210 testl %eax,%eax 211 js 1f 212 push %gs 213 pushl %esi 214 pushl %edi 215 pushl %ebp 216 pushl %eax 217 call _copy_process 218 addl $20,%esp 219 1: ret 220 221 _hd_interrupt: 222 pushl %eax 223 pushl %ecx 224 pushl %edx 225 push %ds 226 push %es 227 push %fs 228 movl $0x10,%eax 229 mov %ax,%ds 230 mov %ax,%es 231 movl $0x17,%eax 232 mov %ax,%fs 233 movb $0x20,%al 234 outb %al,$0xA0 # EOI to interrupt controller #1 235 jmp 1f # give port chance to breathe 236 1: jmp 1f 237 1: xorl %edx,%edx 238 xchgl _do_hd,%edx 239 testl %edx,%edx 240 jne 1f 241 movl $_unexpected_hd_interrupt,%edx 242 1: outb %al,$0x20 243 call *%edx # "interesting" way of handling intr. 244 pop %fs 245 pop %es 246 pop %ds 247 popl %edx 248 popl %ecx 249 popl %eax 250 iret 251 252 _floppy_interrupt: 253 pushl %eax 254 pushl %ecx 255 pushl %edx 256 push %ds 257 push %es 258 push %fs 259 movl $0x10,%eax 260 mov %ax,%ds 261 mov %ax,%es 262 movl $0x17,%eax 263 mov %ax,%fs 264 movb $0x20,%al 265 outb %al,$0x20 # EOI to interrupt controller #1 266 xorl %eax,%eax 267 xchgl _do_floppy,%eax 268 testl %eax,%eax 269 jne 1f 270 movl $_unexpected_floppy_interrupt,%eax 271 1: call *%eax # "interesting" way of handling intr. 272 pop %fs 273 pop %es 274 pop %ds 275 popl %edx 276 popl %ecx 277 popl %eax 278 iret 279 280 _parallel_interrupt: 281 pushl %eax 282 movb $0x20,%al 283 outb %al,$0x20 284 popl %eax 285 iret
//对应子过程地址 头文件_ sys_call_table实现
1 extern int sys_setup(); 2 extern int sys_exit(); 3 extern int sys_fork(); 4 extern int sys_read(); 5 extern int sys_write(); 6 extern int sys_open(); 7 extern int sys_close(); 8 extern int sys_waitpid(); 9 extern int sys_creat(); 10 extern int sys_link(); 11 extern int sys_unlink(); 12 extern int sys_execve(); 13 extern int sys_chdir(); 14 extern int sys_time(); 15 extern int sys_mknod(); 16 extern int sys_chmod(); 17 extern int sys_chown(); 18 extern int sys_break(); 19 extern int sys_stat(); 20 extern int sys_lseek(); 21 extern int sys_getpid(); 22 extern int sys_mount(); 23 extern int sys_umount(); 24 extern int sys_setuid(); 25 extern int sys_getuid(); 26 extern int sys_stime(); 27 extern int sys_ptrace(); 28 extern int sys_alarm(); 29 extern int sys_fstat(); 30 extern int sys_pause(); 31 extern int sys_utime(); 32 extern int sys_stty(); 33 extern int sys_gtty(); 34 extern int sys_access(); 35 extern int sys_nice(); 36 extern int sys_ftime(); 37 extern int sys_sync(); 38 extern int sys_kill(); 39 extern int sys_rename(); 40 extern int sys_mkdir(); 41 extern int sys_rmdir(); 42 extern int sys_dup(); 43 extern int sys_pipe(); 44 extern int sys_times(); 45 extern int sys_prof(); 46 extern int sys_brk(); 47 extern int sys_setgid(); 48 extern int sys_getgid(); 49 extern int sys_signal(); 50 extern int sys_geteuid(); 51 extern int sys_getegid(); 52 extern int sys_acct(); 53 extern int sys_phys(); 54 extern int sys_lock(); 55 extern int sys_ioctl(); 56 extern int sys_fcntl(); 57 extern int sys_mpx(); 58 extern int sys_setpgid(); 59 extern int sys_ulimit(); 60 extern int sys_uname(); 61 extern int sys_umask(); 62 extern int sys_chroot(); 63 extern int sys_ustat(); 64 extern int sys_dup2(); 65 extern int sys_getppid(); 66 extern int sys_getpgrp(); 67 extern int sys_setsid(); 68 extern int sys_sigaction(); 69 extern int sys_sgetmask(); 70 extern int sys_ssetmask(); 71 extern int sys_setreuid(); 72 extern int sys_setregid(); 73 74 fn_ptr sys_call_table[] = { sys_setup, sys_exit, sys_fork, sys_read, 75 sys_write, sys_open, sys_close, sys_waitpid, sys_creat, sys_link, 76 sys_unlink, sys_execve, sys_chdir, sys_time, sys_mknod, sys_chmod, 77 sys_chown, sys_break, sys_stat, sys_lseek, sys_getpid, sys_mount, 78 sys_umount, sys_setuid, sys_getuid, sys_stime, sys_ptrace, sys_alarm, 79 sys_fstat, sys_pause, sys_utime, sys_stty, sys_gtty, sys_access, 80 sys_nice, sys_ftime, sys_sync, sys_kill, sys_rename, sys_mkdir, 81 sys_rmdir, sys_dup, sys_pipe, sys_times, sys_prof, sys_brk, sys_setgid, 82 sys_getgid, sys_signal, sys_geteuid, sys_getegid, sys_acct, sys_phys, 83 sys_lock, sys_ioctl, sys_fcntl, sys_mpx, sys_setpgid, sys_ulimit, 84 sys_uname, sys_umask, sys_chroot, sys_ustat, sys_dup2, sys_getppid, 85 sys_getpgrp, sys_setsid, sys_sigaction, sys_sgetmask, sys_ssetmask, 86 sys_setreuid,sys_setregid }; 87
//对应的处理函数 lin/kernel/sys.c 少部分在这里处理
//大部分在分散在响应的文件在 如copy_process(...)
//