内核层:
主要数据结构:
struct poll_table_entry {
struct file filp;
wait_queue_t wait;//内部有一个指针指向一个进程
wait_queue_head_t wait_address;//等待队列头部(等待队列有多个
//wait_queue_t组成,通过双链表连接)
};
struct poll_table_page {
struct poll_table_page next;
struct poll_table_entry entry;
struct poll_table_entry entries[0];
};
struct poll_wqueues {
poll_table pt;//一个函数指针,通常指向__pollwait或NULL
struct poll_table_page * table;
int error;
};
struct poll_list {
struct poll_list *next;//按内存页连接,因为kmalloc有申请数据限制
int len;//用户空间传入fd的数量
struct pollfd entries[0];//存放用户空间存入的数据
};
//poll()函数的系统调用
459asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout)
460{
461 struct poll_wqueues table;
…………
479 poll_initwait(&table);//注册回调函数__pollwait()
…………
483 i = nfds;
485 while(i!=0) {
486 struct poll_list *pp;
//给pp和pp->entries[0]分配空间,用户空间传入的ufds数据是
//存在pp->entries[0]里的
487 pp = kmalloc(sizeof(struct poll_list)+
488 sizeof(struct pollfd)*
489 (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),//注意kmalloc的用法
490 GFP_KERNEL);
491 if(pp==NULL)
492 goto out_fds;
493 pp->next=NULL;
//当前操作文件的个数
494 pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i);
495 if (head == NULL)
496 head = pp;
497 else
498 walk->next = pp;
499
500 walk = pp;
501 if (copy_from_user(pp->entries, ufds + nfds-i,
502 sizeof(struct pollfd)*pp->len)) {
503 err = -EFAULT;
504 goto out_fds;
505 }
506 i -= pp->len;
507 }
508 fdcount = do_poll(nfds, head, &table, timeout);//在后面分析
509
510 /* OK, now copy the revents fields back to user space. */
511 walk = head;
512 err = -EFAULT;
513 while(walk != NULL) {
514 struct pollfd *fds = walk->entries;
515 int j;
516
517 for (j=0; j < walk->len; j++, ufds++) {
518 if(__put_user(fds[j].revents, &ufds->revents))
519 goto out_fds;
520 }
521 walk = walk->next;
522 }
…………
535}
430static int do_poll(unsigned int nfds, struct poll_list *list,
431 struct poll_wqueues *wait, long timeout)
432{
433 int count = 0;
434 poll_table* pt = &wait->pt;
435
436 if (!timeout)
437 pt = NULL;
438
439 for (;;) {
440 struct poll_list *walk;
441 set_current_state(TASK_INTERRUPTIBLE);
442 walk = list;
443 while(walk != NULL) {
//里面会调用我们自己的设备驱动程序,后面分析
444 do_pollfd( walk->len, walk->entries, &pt, &count);
445 walk = walk->next;
446 }
447 pt = NULL;
//循环退出的3个条件:count大于0(表示do_pollfd至少有一个成功),
//超时,有信号等待处理
448 if (count || !timeout || signal_pending(current))
449 break;
450 count = wait->error;
451 if (count)
452 break;
//如果退出循环的条件不满足,进程进入休眠。谁唤醒?除了休眠时间到被
//系统唤醒,还可以被设备驱动程序唤醒(这就是要调用poll_wait()的原因)
453 timeout = schedule_timeout(timeout);
454 }
455 __set_current_state(TASK_RUNNING);
456 return count;
457}
398static void do_pollfd(unsigned int num, struct pollfd * fdpage,
399 poll_table ** pwait, int *count)
400{
401 int i;
402
403 for (i = 0; i < num; i++) {
404 int fd;
405 unsigned int mask;
406 struct pollfd *fdp;
407
408 mask = 0;
409 fdp = fdpage+i;
410 fd = fdp->fd;
411 if (fd >= 0) {
412 struct file * file = fget(fd);
413 mask = POLLNVAL;
414 if (file != NULL) {
415 mask = DEFAULT_POLLMASK;
416 if (file->f_op && file->f_op->poll)
//调用我们自己的驱动程序
417 mask = file->f_op->poll(file, *pwait);
418 mask &= fdp->events | POLLERR | POLLHUP;
419 fput(file);
420 }
421 if (mask) {
422 *pwait = NULL;
423 (*count)++;
424 }
425 }
426 fdp->revents = mask;
427 }
428}
24static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
25{
26 if (p && wait_address)
27 p->qproc(filp, wait_address, p);//调用__pollwait()
28}
89void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *_p)
90{
91 struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);
92 struct poll_table_page *table = p->table;
…………
108
109 /* Add a new entry */
110 {
111 struct poll_table_entry * entry = table->entry;
112 table->entry = entry+1;
113 get_file(filp);
114 entry->filp = filp;
115 entry->wait_address = wait_address;
//等待块中指针指向当前线程
116 init_waitqueue_entry(&entry->wait, current);
//加入等待队列
117 add_wait_queue(wait_address,&entry->wait);
118 }
119}
现在来总结一下poll机制:(转自韦东山poll机制分析)
1. poll > sys_poll > do_sys_poll > poll_initwait,poll_initwait函数注册一下回调函数__pollwait,它就是我们的驱动程序执行poll_wait时,真正被调用的函数。
2. 接下来执行file->f_op->poll,即我们驱动程序里自己实现的poll函数
它会调用poll_wait把自己挂入某个队列,这个队列也是我们的驱动自己定义的;
它还判断一下设备是否就绪。
3. 如果设备未就绪,do_sys_poll里会让进程休眠一定时间
4. 进程被唤醒的条件有2:一是上面说的“一定时间”到了,二是被驱动程序唤醒。驱动程序发现条件就绪时,就把“某个队列”上挂着的进程唤醒,这个队列,就是前面通过poll_wait把本进程挂过去的队列。
5. 如果驱动程序没有去唤醒进程,那么chedule_timeout(__timeou)超时后,会重复2、3动作,直到应用程序的poll调用传入的时间到达。
//我们自己实现的设备驱动程序
static unsigned int scull_p_poll(struct file *filp, poll_table *wait)
{
struct scull_pipe *dev = filp->private_data;
unsigned int mask = 0;
/*
* The buffer is circular; it is considered full
* if "wp" is right behind "rp" and empty if the
* two are equal.
*/
down(&dev->sem);
poll_wait(filp, &dev->inq, wait);
poll_wait(filp, &dev->outq, wait);
if (dev->rp != dev->wp)
mask |= POLLIN | POLLRDNORM; /* readable */
if (spacefree(dev))
mask |= POLLOUT | POLLWRNORM; /* writable */
up(&dev->sem);
return mask;
}