• Linux kernel suspend resume学习:2.6.35与3.0.35比较【转】


    转自:http://blog.csdn.net/njuitjf/article/details/18317149

    Linux kernel suspend resume学习:2.6.35与3.0.35比较

    最近在学习linux kernel中suspend和resume的处理。
    只是一味的看代码,有点枯燥,刚好有两个项目使用了不同的内核版本,就以比较这两个间的处理差别为线索进行学习。
    由于列举了很多代码,为了保持连续性,整篇文章还是放到了一个blog中。

    首先看看公开出去的接口,都是文件/sys/power/state。
    读该文件可以获取可能取值。
    写该文件可以实现状态改变。

    /sys/power/state定义的地方:
    power_attr(state);

    两个内核版本中,power_attr的定义相同。
    power_attr的定义:
    #define power_attr(_name)
    static struct kobj_attribute _name##_attr = { 
     .attr = {    
      .name = __stringify(_name), 
      .mode = 0644,   
     },     
     .show = _name##_show,   
     .store = _name##_store,  
    }

    power_attr就是往sysfs中添加一个文件,操作函数:
     .show = _name##_show,   
     .store = _name##_store, 

    对应到state: 
     .show = state_show,   
     .store = state_store, 
     
    内核中这两个函数的注释:
    /**
     * state - control system power state.
     *
     * show() returns what states are supported, which is hard-coded to
     * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and
     * 'disk' (Suspend-to-Disk).
     *
     * store() accepts one of those strings, translates it into the 
     * proper enumerated value, and initiates a suspend transition.
     */
     
    state_show其实就是将数组pm_states的内容show出来。
    数组pm_states的定义,在两个项目中是不同的。

    kernel 2.6.35项目中:
    const char *const pm_states[PM_SUSPEND_MAX] = {
    #ifdef CONFIG_EARLYSUSPEND
     [PM_SUSPEND_ON]  = "on",
    #endif
     [PM_SUSPEND_STANDBY] = "standby",
     [PM_SUSPEND_MEM] = "mem",
    };

    kernel 3.0.35项目中:
    const char *const pm_states[PM_SUSPEND_MAX] = {
     [PM_SUSPEND_STANDBY] = "standby",
     [PM_SUSPEND_MEM] = "mem",
    };

    关于这些 state 的介绍,请参考 kernel 中稳定:
    Documentationpowerstates.txt

    函数state_store定义,两个项目中的差别也就是kernel 2.6.35项目中多了对CONFIG_EARLYSUSPEND的处理。
    将kernel 2.6.35项目中state_store的定义列出来:
    static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
          const char *buf, size_t n)
    {
    #ifdef CONFIG_SUSPEND
    /*
    看一下PM_SUSPEND_ON及几个相关的定义:
    typedef int __bitwise suspend_state_t;

    #define PM_SUSPEND_ON  ((__force suspend_state_t) 0)
    #define PM_SUSPEND_STANDBY ((__force suspend_state_t) 1)
    #define PM_SUSPEND_MEM  ((__force suspend_state_t) 3)
    #define PM_SUSPEND_MAX  ((__force suspend_state_t) 4)
    不难理解,如果有on的话,肯定要从on开始遍历
    */
    #ifdef CONFIG_EARLYSUSPEND
     suspend_state_t state = PM_SUSPEND_ON;
    #else
     suspend_state_t state = PM_SUSPEND_STANDBY;
    #endif
     const char * const *s;
    #endif
     char *p;
     int len;
     int error = -EINVAL;

     p = memchr(buf, ' ', n);
     len = p ? p - buf : n;

     /* First, check if we are requested to hibernate */
     if (len == 4 && !strncmp(buf, "disk", len)) {
      error = hibernate();
      goto Exit;
     }

    #ifdef CONFIG_SUSPEND
     for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
      if (*s && len == strlen(*s) && !strncmp(buf, *s, len))
       break;
     }
     if (state < PM_SUSPEND_MAX && *s)
    #ifdef CONFIG_EARLYSUSPEND
      if (state == PM_SUSPEND_ON || valid_state(state)) {
       error = 0;
       request_suspend_state(state);
      }
    #else
      error = enter_state(state);
    #endif
    #endif

     Exit:
     return error ? error : n;
    }

    函数valid_state是判断是否对指定的state进行了支持:
    bool valid_state(suspend_state_t state)
    {
     /*
      * All states need lowlevel support and need to be valid to the lowlevel
      * implementation, no valid callback implies that none are valid.
      */
     return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
    }

    suspend_ops在函数suspend_set_ops中被赋值:
    /**
     * suspend_set_ops - Set the global suspend method table.
     * @ops: Pointer to ops structure.
     */
    void suspend_set_ops(struct platform_suspend_ops *ops)
    {
     mutex_lock(&pm_mutex);
     suspend_ops = ops;
     mutex_unlock(&pm_mutex);
    }
    一般会在 architecture 相关的pm模块的 init 或者 probe 函数中会调用 suspend_set_ops 。
    valid 一般是判断该 architecture 中是否支持该 state 。

    接下来看看差异中的 request_suspend_state 函数:
    void request_suspend_state(suspend_state_t new_state)
    {
     unsigned long irqflags;
     int old_sleep;

     spin_lock_irqsave(&state_lock, irqflags);
     old_sleep = state & SUSPEND_REQUESTED;
    ...

     if (new_state == PM_SUSPEND_STANDBY) {
      mode = EARLY_SUSPEND_MODE_EINK;
      new_state = PM_SUSPEND_MEM;
     } else
      mode = EARLY_SUSPEND_MODE_NORMAL;

     if (!old_sleep && new_state != PM_SUSPEND_ON) {
      if ((state & SUSPENDED) && (last_mode != mode)) {
       /* flush the workqueue */
       spin_unlock_irqrestore(&state_lock, irqflags);
       flush_workqueue(suspend_work_queue);
       spin_lock_irqsave(&state_lock, irqflags);
      }
      state |= SUSPEND_REQUESTED;
      queue_work(suspend_work_queue, &early_suspend_work); // 将 early_suspend_work 添加到 suspend_work_queue 中
     } else if (old_sleep && new_state == PM_SUSPEND_ON) {    // 这一次走的是这个分支
      state &= ~SUSPEND_REQUESTED;
      wake_lock(&main_wake_lock);
      queue_work(suspend_work_queue, &late_resume_work);    // 将 late_resume_work 添加到 suspend_work_queue 
     }

     if (new_state != PM_SUSPEND_ON)
      last_mode = mode;

     requested_suspend_state = new_state;
     spin_unlock_irqrestore(&state_lock, irqflags);
    }

    suspend_work_queue 在函数 wakelocks_init 中被创建:
     suspend_work_queue = create_singlethread_workqueue("suspend");
    函数 wakelocks_init 为 core_initcall :
    core_initcall(wakelocks_init);

    early_suspend_work 和 late_resume_work 的定义:
    static DECLARE_WORK(early_suspend_work, early_suspend);
    static DECLARE_WORK(late_resume_work, late_resume);

    #define DECLARE_WORK(n, f)     
     struct work_struct n = __WORK_INITIALIZER(n, f)
     
    #define __WORK_INITIALIZER(n, f) {    
     .data = WORK_DATA_STATIC_INIT(),   
     .entry = { &(n).entry, &(n).entry },   
     .func = (f),      
     __WORK_INIT_LOCKDEP_MAP(#n, &(n))   
     }
     
    early_suspend 和 late_resume 是处理函数。
    两个函数中都对 early_suspend_handlers 进行了处理。
    early_suspend 函数中依次调用了 early_suspend_handlers 中的 suspend 函数:
     list_for_each_entry(pos, &early_suspend_handlers, link) {
      if (pos->suspend != NULL) {
       pos->pm_mode = pwr_mode;
       pos->suspend(pos);
      }
     }
    late_resume 函数中一次调用了 early_suspend_handlers 中的 resume 函数:
     list_for_each_entry_reverse(pos, &early_suspend_handlers, link)
      if (pos->resume != NULL)
       pos->resume(pos);
       
    early_suspend_handlers 的定义:
    static LIST_HEAD(early_suspend_handlers);

    函数 register_early_suspend 将 handler 注册到 early_suspend_handlers :
    void register_early_suspend(struct early_suspend *handler)
    {
     struct list_head *pos;

     mutex_lock(&early_suspend_lock);
     list_for_each(pos, &early_suspend_handlers) {
      struct early_suspend *e;
      e = list_entry(pos, struct early_suspend, link);
      if (e->level > handler->level)
       break;
     }
     list_add_tail(&handler->link, pos);
     if ((state & SUSPENDED) && handler->suspend)
      handler->suspend(handler);
     mutex_unlock(&early_suspend_lock);
    }
    需要进行early suspend处理的模块调用函数 register_early_suspend 注册 handler .
    static struct early_suspend mxc_epdc_earlysuspend = {
     .level = EARLY_SUSPEND_LEVEL_DISABLE_FB,
     .suspend = mxc_epdc_early_suspend,
     .resume = mxc_epdc_late_resume,
    };

     register_early_suspend(&mxc_epdc_earlysuspend);
     
    看看 queue_work 的实现:
    /**
     * queue_work - queue work on a workqueue
     * @wq: workqueue to use
     * @work: work to queue
     *
     * Returns 0 if @work was already on a queue, non-zero otherwise.
     *
     * We queue the work to the CPU on which it was submitted, but if the CPU dies
     * it can be processed by another CPU.
     */
    int queue_work(struct workqueue_struct *wq, struct work_struct *work)
    {
     int ret;

     ret = queue_work_on(get_cpu(), wq, work);
     put_cpu();

     return ret;
    }
    /**
     * queue_work_on - queue work on specific cpu
     * @cpu: CPU number to execute work on
     * @wq: workqueue to use
     * @work: work to queue
     *
     * Returns 0 if @work was already on a queue, non-zero otherwise.
     *
     * We queue the work to a specific CPU, the caller must ensure it
     * can't go away.
     */
    int
    queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
    {
     int ret = 0;

     if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
      BUG_ON(!list_empty(&work->entry));
      // __queue_work 最终将 work 添加到指定 cpu_workqueue_struct 中的 worklist 中
      __queue_work(wq_per_cpu(wq, cpu), work);
      ret = 1;
     }
     return ret;
    }

    flush_workqueue 的实现:
    /**
     * flush_workqueue - ensure that any scheduled work has run to completion.
     * @wq: workqueue to flush
     *
     * Forces execution of the workqueue and blocks until its completion.
     * This is typically used in driver shutdown handlers.
     *
     * We sleep until all works which were queued on entry have been handled,
     * but we are not livelocked by new incoming ones.
     *
     * This function used to run the workqueues itself.  Now we just wait for the
     * helper threads to do it.
     */
    void flush_workqueue(struct workqueue_struct *wq)
    {
     const struct cpumask *cpu_map = wq_cpu_map(wq);
     int cpu;

     might_sleep();
     lock_map_acquire(&wq->lockdep_map);
     lock_map_release(&wq->lockdep_map);
     for_each_cpu(cpu, cpu_map)
      flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
    }

    回到 state_store 函数。
    我们发现函数中最初有个判断,如果设置的状态为 "disk" ,则调用函数 hibernate 。
    根据state.txt中的说明,disk即进入深度睡眠,更省电。
    因为其将 snapshot 写入到了 disk 。之后可以 power down 。

    来看看 hibernate 的实现。
    两个内核版本中稍有差别,kernel 3.0.35中多了一些处理。下面把 kernel 3.0.35的实现列了出来:
    /**
     * hibernate - Carry out system hibernation, including saving the image.
     */
    int hibernate(void)
    {
     int error;

     mutex_lock(&pm_mutex);
     /* The snapshot device should not be opened while we're running */
     if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
      error = -EBUSY;
      goto Unlock;
     }

     pm_prepare_console();
     error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
     if (error)
      goto Exit;

     error = usermodehelper_disable();
     if (error)
      goto Exit;

     /* Allocate memory management structures */
     error = create_basic_memory_bitmaps();
     if (error)
      goto Exit;

     printk(KERN_INFO "PM: Syncing filesystems ... ");
     sys_sync();
     printk("done. ");

     error = prepare_processes();
     if (error)
      goto Finish;

     if (hibernation_test(TEST_FREEZER))
      goto Thaw;

     if (hibernation_testmode(HIBERNATION_TESTPROC))
      goto Thaw;

     error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
     if (error)
      goto Thaw;

     if (in_suspend) {
      unsigned int flags = 0;

      if (hibernation_mode == HIBERNATION_PLATFORM)
       flags |= SF_PLATFORM_MODE;
      if (nocompress)                      // kernel 2.6.35中无
       flags |= SF_NOCOMPRESS_MODE;     // kernel 2.6.35中无
      pr_debug("PM: writing image. ");
      error = swsusp_write(flags);
      swsusp_free();
      if (!error)
       power_down();
      in_suspend = 0;                      // kernel 2.6.35中无
      pm_restore_gfp_mask();               // kernel 2.6.35中无
     } else {
      pr_debug("PM: Image restored successfully. ");
     }

     Thaw:
     thaw_processes();
     Finish:
     free_basic_memory_bitmaps();
     usermodehelper_enable();
     Exit:
     pm_notifier_call_chain(PM_POST_HIBERNATION);
     pm_restore_console();
     atomic_inc(&snapshot_device_available);
     Unlock:
     mutex_unlock(&pm_mutex);
     return error;
    }

    先看第一处差别, NOCOMPRESS 相关。
    kernel 2.6.35中没有定义 SF_NOCOMPRESS_MODE 。
    搜索代码发现,kernel 3.0.35中有3个地方使用了 SF_NOCOMPRESS_MODE 。
    分别是判断 swap 是否有足够空间、写入 snapshot 、读取 snapshot 时。

    第一处:
    在函数 enough_swap 中。功能在注释中已经体现。该函数的实现:
    /**
     * enough_swap - Make sure we have enough swap to save the image.
     *
     * Returns TRUE or FALSE after checking the total amount of swap
     * space avaiable from the resume partition.
     */

    static int enough_swap(unsigned int nr_pages, unsigned int flags)
    {
     unsigned int free_swap = count_swap_pages(root_swap, 1);
     unsigned int required;

     pr_debug("PM: Free swap pages: %u ", free_swap);
     // 如果为非压缩模式,请求多少页即需要多少页;压缩模式下需乘以最坏情况下的压缩比
     required = PAGES_FOR_IO + ((flags & SF_NOCOMPRESS_MODE) ?
      nr_pages : (nr_pages * LZO_CMP_PAGES) / LZO_UNC_PAGES + 1);
     return free_swap > required;
    }
    看了下 kernel 2.6.35 中的实现,关键代码如下:
     return free_swap > nr_pages + PAGES_FOR_IO;
    比较两个版本的 kernel 可知,2.6.35中只支持非压缩模式;3.0.35中增加了压缩模式的支持。

    第二处:
    在函数 swsusp_write 中。关键代码:
    /**
     * swsusp_write - Write entire image and metadata.
     * @flags: flags to pass to the "boot" kernel in the image header
     *
     * It is important _NOT_ to umount filesystems at this point. We want
     * them synced (in case something goes wrong) but we DO not want to mark
     * filesystem clean: it is not. (And it does not matter, if we resume
     * correctly, we'll mark system clean, anyway.)
     */

    int swsusp_write(unsigned int flags)
    {
    ...
     pages = snapshot_get_image_size();
     error = get_swap_writer(&handle);
    ...
     // 判断 swap 中是否有足够空间
     if (!enough_swap(pages, flags)) {
    ...
     }
     memset(&snapshot, 0, sizeof(struct snapshot_handle));
     error = snapshot_read_next(&snapshot);
    ...
     header = (struct swsusp_info *)data_of(snapshot);
     // 写入元数据
     error = swap_write_page(&handle, header, NULL);
     if (!error) {
      // 写入 snapshot 数据。根据是否设置了 SF_NOCOMPRESS_MODE 分别调用 save_image 和 save_image_lzo 
      error = (flags & SF_NOCOMPRESS_MODE) ?
       save_image(&handle, &snapshot, pages - 1) :
       save_image_lzo(&handle, &snapshot, pages - 1);
     }
    out_finish:
     error = swap_writer_finish(&handle, flags, error);
     return error;
    }
    函数 save_image 的注释:
    /**
     * save_image - save the suspend image data
     */
    函数 save_image_lzo 的注释:
    /**
     * save_image_lzo - Save the suspend image data compressed with LZO.
     * @handle: Swap mam handle to use for saving the image.
     * @snapshot: Image to read data from.
     * @nr_to_write: Number of pages to save.
     */
    lzo 压缩算法就不介绍了。具体写入的实现这儿也不深入了。
    2.6.35 中函数 swsusp_write 的实现类似,只是在调用 enough_swap 时不会传入 flags 参数;
    另外,没有函数 save_image_lzo ,只会调用 save_image 。
    函数 swsusp_write 中调用的另外一个重要函数 snapshot_read_next 。其注释:
    /**
     * snapshot_read_next - used for reading the system memory snapshot.
     *
     * On the first call to it @handle should point to a zeroed
     * snapshot_handle structure.  The structure gets updated and a pointer
     * to it should be passed to this function every next time.
     *
     * On success the function returns a positive number.  Then, the caller
     * is allowed to read up to the returned number of bytes from the memory
     * location computed by the data_of() macro.
     *
     * The function returns 0 to indicate the end of data stream condition,
     * and a negative number is returned on error.  In such cases the
     * structure pointed to by @handle is not updated and should not be used
     * any more.
     */
    swsusp_write 中第一次调用了函数 snapshot_read_next ,函数 save_image/save_image_lzo 中循环调用 snapshot_read_next 函数,直到读取完 snapshot 。

    第三处:
    在函数 swsusp_read 中,关键代码:
    /**
     * swsusp_read - read the hibernation image.
     * @flags_p: flags passed by the "frozen" kernel in the image header should
     *    be written into this memory location
     */

    int swsusp_read(unsigned int *flags_p)
    {
    ...
     memset(&snapshot, 0, sizeof(struct snapshot_handle));
     error = snapshot_write_next(&snapshot);
    ...
     header = (struct swsusp_info *)data_of(snapshot);
     error = get_swap_reader(&handle, flags_p);
    ...
     if (!error)
      error = swap_read_page(&handle, header, NULL);
     if (!error) {
      error = (*flags_p & SF_NOCOMPRESS_MODE) ?
       load_image(&handle, &snapshot, header->pages - 1) :
       load_image_lzo(&handle, &snapshot, header->pages - 1);
     }
     swap_reader_finish(&handle);
    end:
     if (!error)
      pr_debug("PM: Image successfully loaded ");
     else
      pr_debug("PM: Error %d resuming ", error);
     return error;
    }
    load_image 的注释:
    /**
     * load_image - load the image using the swap map handle
     * @handle and the snapshot handle @snapshot
     * (assume there are @nr_pages pages to load)
     */
     
    load_image_lzo 的注释:
    /**
     * load_image_lzo - Load compressed image data and decompress them with LZO.
     * @handle: Swap map handle to use for loading data.
     * @snapshot: Image to copy uncompressed data into.
     * @nr_to_read: Number of pages to load.
     */

     2.6.35 中函数 swsusp_read 的实现类似,只是少了对 SF_NOCOMPRESS_MODE 的处理,没有实现函数 load_image_lzo 。
    swsusp_read 函数中调用了另外一个重要函数 snapshot_write_next ,其注释:
    /**
     * snapshot_write_next - used for writing the system memory snapshot.
     *
     * On the first call to it @handle should point to a zeroed
     * snapshot_handle structure.  The structure gets updated and a pointer
     * to it should be passed to this function every next time.
     *
     * On success the function returns a positive number.  Then, the caller
     * is allowed to write up to the returned number of bytes to the memory
     * location computed by the data_of() macro.
     *
     * The function returns 0 to indicate the "end of file" condition,
     * and a negative number is returned on error.  In such cases the
     * structure pointed to by @handle is not updated and should not be used
     * any more.
     */
    swsusp_read 函数中第一次调用了 snapshot_write_next , load_image/load_image_lzo 中循环调用 snapshot_write_next ,直到处理完所有的 snapshot 。


    第二处差别是在3.0.35中多了:
      in_suspend = 0; 
    搜索代码,发现函数 create_image 中将 in_suspend 设置为了1.
    函数 hibernate 调用了函数 hibernation_snapshot 。
    函数 hibernation_snapshot 的注释 :
    /**
     * hibernation_snapshot - Quiesce devices and create a hibernation image.
     * @platform_mode: If set, use platform driver to prepare for the transition.
     *
     * This routine must be called with pm_mutex held.
     */
    函数 hibernation_snapshot 调用了函数 create_image 。
    函数 create_image 的注释:
    /**
     * create_image - Create a hibernation image.
     * @platform_mode: Whether or not to use the platform driver.
     *
     * Execute device drivers' .freeze_noirq() callbacks, create a hibernation image
     * and execute the drivers' .thaw_noirq() callbacks.
     *
     * Control reappears in this routine after the subsequent restore.
     */
    3.0.35中多了这么一句,难道只是为了防止 hibernate 函数中重复进入 if(hibernate)...
     
    第三处差别是在3.0.35中多了:
      pm_restore_gfp_mask(); 
    pm_restore_gfp_mask 的实现及相关定义:
    #ifdef CONFIG_PM_SLEEP
    /*
     * The following functions are used by the suspend/hibernate code to temporarily
     * change gfp_allowed_mask in order to avoid using I/O during memory allocations
     * while devices are suspended.  To avoid races with the suspend/hibernate code,
     * they should always be called with pm_mutex held (gfp_allowed_mask also should
     * only be modified with pm_mutex held, unless the suspend/hibernate code is
     * guaranteed not to run in parallel with that modification).
     */

    static gfp_t saved_gfp_mask;

    void pm_restore_gfp_mask(void)
    {
     WARN_ON(!mutex_is_locked(&pm_mutex));
     if (saved_gfp_mask) {
      gfp_allowed_mask = saved_gfp_mask;
      saved_gfp_mask = 0;
     }
    }

    void pm_restrict_gfp_mask(void)
    {
     WARN_ON(!mutex_is_locked(&pm_mutex));
     WARN_ON(saved_gfp_mask);
     saved_gfp_mask = gfp_allowed_mask;
     gfp_allowed_mask &= ~GFP_IOFS;
    }
    #endif /* CONFIG_PM_SLEEP */


    回头看看 hibernate 函数。

     /* The snapshot device should not be opened while we're running */
     if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
      error = -EBUSY;
      goto Unlock;
     }
    注释的意思是,我们在执行 hibernate 的时候,不允许别人再打开 snapshot 设备。
    static struct miscdevice snapshot_device = {
     .minor = SNAPSHOT_MINOR,
     .name = "snapshot",
     .fops = &snapshot_fops,
    };
    snapshot_fops 的定义:
    static const struct file_operations snapshot_fops = {
     .open = snapshot_open,
     .release = snapshot_release,
     .read = snapshot_read,
     .write = snapshot_write,
     .llseek = no_llseek,
     .unlocked_ioctl = snapshot_ioctl,
    };
    打开 snapshot 设备用的就是函数 snapshot_open 了。
    实现在我们 running 的时候不让 snapshot 设备被打开的方法是通过变量 snapshot_device_available ,其定义:
    atomic_t snapshot_device_available = ATOMIC_INIT(1);
    snapshot_open 函数中有以下语句:
     if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
      error = -EBUSY;
      goto Unlock;
     }
    add 个 -1 ,也就相当于减1操作。

    继续 hibernate 函数。
     // console 相关处理
     pm_prepare_console();
     
     error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
    pm_notifier_call_chain 函数经过多层调用,调用到了函数 __blocking_notifier_call_chain ,其注释:
    /**
     * __blocking_notifier_call_chain - Call functions in a blocking notifier chain
     * @nh: Pointer to head of the blocking notifier chain
     * @val: Value passed unmodified to notifier function
     * @v: Pointer passed unmodified to notifier function
     * @nr_to_call: See comment for notifier_call_chain.
     * @nr_calls: See comment for notifier_call_chain.
     *
     * Calls each function in a notifier chain in turn.  The functions
     * run in a process context, so they are allowed to block.
     *
     * If the return value of the notifier can be and'ed
     * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
     * will return immediately, with the return value of
     * the notifier function which halted execution.
     * Otherwise the return value is the return value
     * of the last notifier function called.
     */
     
    继续 hibernate 函数。
    /**
     * usermodehelper_disable - prevent new helpers from being started
     */
     error = usermodehelper_disable();
     
     /* Allocate memory management structures */
     error = create_basic_memory_bitmaps();
    函数 create_basic_memory_bitmaps 的注释:
    /**
     * create_basic_memory_bitmaps - create bitmaps needed for marking page
     * frames that should not be saved and free page frames.  The pointers
     * forbidden_pages_map and free_pages_map are only modified if everything
     * goes well, because we don't want the bits to be used before both bitmaps
     * are set up.
     */
     
    继续 hibernate 函数。
     error = prepare_processes();
    prepare_processes 函数的实现:
    static int prepare_processes(void)
    {
     int error = 0;

     if (freeze_processes()) {
      error = -EBUSY;
      thaw_processes();
     }
     return error;
    }
    可见函数 prepare_processes 的功能为:
    尝试冷冻进程,如果失败,则解冻进程,并返回 -EBUSY 。
    如何冷冻进程的先不看了。

    继续 hibernate 函数。
     // 如果只是 debug ,那就只简单 delay 一会
     if (hibernation_test(TEST_FREEZER))
      goto Thaw;
     // 与上面类似 
     if (hibernation_testmode(HIBERNATION_TESTPROC))
      goto Thaw;
     // 此函数前面见到过,功能是让 devices 都静止,并创建 hibernation image 。它还将 in_suspend 设置为了1
     error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
     
     // hibernation_snapshot 被成功调用, in_suspend 肯定非0
     if (in_suspend) {
      unsigned int flags = 0;

      if (hibernation_mode == HIBERNATION_PLATFORM)
       flags |= SF_PLATFORM_MODE;
      if (nocompress)  // 是否是非压缩的
       flags |= SF_NOCOMPRESS_MODE;
      pr_debug("PM: writing image. ");
      // 函数 swsusp_write 前面见过,将 snapshot 写入到 swap
      error = swsusp_write(flags);
      /**
       * swsusp_free - free pages allocated for the suspend.
       *
       * Suspend pages are alocated before the atomic copy is made, so we
       * need to release them after the resume.
       */
      swsusp_free();
      if (!error)
       power_down();
      in_suspend = 0;
      pm_restore_gfp_mask();
     } else {
      pr_debug("PM: Image restored successfully. ");
     }

    函数 power_down 的实现:
    /**
     * power_down - Shut the machine down for hibernation.
     *
     * Use the platform driver, if configured, to put the system into the sleep
     * state corresponding to hibernation, or try to power it off or reboot,
     * depending on the value of hibernation_mode.
     */
    static void power_down(void)
    {
     switch (hibernation_mode) {
     case HIBERNATION_TEST:
     case HIBERNATION_TESTPROC:
      break;
     case HIBERNATION_REBOOT:
      /**
       * kernel_restart - reboot the system
       * @cmd: pointer to buffer containing command to execute for restart
       *  or %NULL
       *
       * Shutdown everything and perform a clean reboot.
       * This is not safe to call in interrupt context.
       */
      kernel_restart(NULL);
      break;
     case HIBERNATION_PLATFORM:
      /**
       * hibernation_platform_enter - Power off the system using the platform driver.
       */
      hibernation_platform_enter();
     case HIBERNATION_SHUTDOWN:
      /**
       * kernel_power_off - power_off the system
       *
       * Shutdown everything and perform a clean system power_off.
       */
      kernel_power_off();
      break;
     }
     /**
      * kernel_halt - halt the system
      *
      * Shutdown everything and perform a clean system halt.
      */
     kernel_halt();
     /*
      * Valid image is on the disk, if we continue we risk serious data
      * corruption after resume.
      */
     printk(KERN_CRIT "PM: Please power down manually ");
     while(1);
    }

    到 power_down 函数,已经 power down 了,后面的代码怎么跑?
    自然是重新 power up 之后继续跑了。
    首先是紧跟在 power_down 之后的下两句代码:
      in_suspend = 0;
      pm_restore_gfp_mask();
      
    然后是:
    Thaw:
     // 春回大地,冰雪消融
     thaw_processes();
     Finish:
      /**
      * free_basic_memory_bitmaps - free memory bitmaps allocated by
      * create_basic_memory_bitmaps().  The auxiliary pointers are necessary
      * so that the bitmaps themselves are not referred to while they are being
      * freed.
      */
     free_basic_memory_bitmaps();
     // 前面 disable 了,现在要 enable 回来
     usermodehelper_enable();
     Exit:
     pm_notifier_call_chain(PM_POST_HIBERNATION);
     // 对应于前面 pm_prepare_console 的处理
     pm_restore_console();
     // 我们已经不 running 了,别人可以再使用 snapshot device 了
     atomic_inc(&snapshot_device_available);
     Unlock:
     mutex_unlock(&pm_mutex);
     return error;
    }

    看完了 hibernate 函数,即 Suspend-to-disk 的处理。
    回到 state_store 函数继续。
    根据写入的字符串,找到对应的 state ,并以该 state 为参数调用函数 enter_state 。
    看看 enter_state 函数的实现:
    /**
     * enter_state - Do common work of entering low-power state.
     * @state:  pm_state structure for state we're entering.
     *
     * Make sure we're the only ones trying to enter a sleep state. Fail
     * if someone has beat us to it, since we don't want anything weird to
     * happen when we wake up.
     * Then, do the setup for suspend, enter the state, and cleaup (after
     * we've woken up).
     */
    int enter_state(suspend_state_t state)
    {
     int error;

     // 这个函数前面看到过,判断当前 architecture 是否支持该 state
     if (!valid_state(state))
      return -ENODEV;

     if (!mutex_trylock(&pm_mutex))
      return -EBUSY;

     printk(KERN_INFO "PM: Syncing filesystems ... ");
     sys_sync();
     printk("done. ");

     pr_debug("PM: Preparing system for %s sleep ", pm_states[state]);
     // 实现见后文
     error = suspend_prepare();
     if (error)
      goto Unlock;

     if (suspend_test(TEST_FREEZER))
      goto Finish;

     pr_debug("PM: Entering %s sleep ", pm_states[state]);
     pm_restrict_gfp_mask();
     error = suspend_devices_and_enter(state);
     pm_restore_gfp_mask();

     Finish:
     pr_debug("PM: Finishing wakeup. ");
     suspend_finish();
     Unlock:
     mutex_unlock(&pm_mutex);
     return error;
    }

    函数 suspend_prepare 的实现:
    /**
     * suspend_prepare - Do prep work before entering low-power state.
     *
     * This is common code that is called for each state that we're entering.
     * Run suspend notifiers, allocate a console and stop all processes.
     */
    static int suspend_prepare(void)
    {
     int error;

     if (!suspend_ops || !suspend_ops->enter)
      return -EPERM;

     // 前面见过,console相关的处理
     pm_prepare_console();

     // 前面也见过,只是参数略有不同
     error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
     if (error)
      goto Finish;

     // 前面也见过
     error = usermodehelper_disable();
     if (error)
      goto Finish;

     // 如果支持 suspend freezer ,该函数直接调用函数 freeze_processes 。
     // 与前面看过的 hibernate 中的 prepare_processes 函数类似
     error = suspend_freeze_processes();
     // 一切 OK ,返回 0
     if (!error)
      return 0;

     suspend_thaw_processes();
     usermodehelper_enable();
     Finish:
     pm_notifier_call_chain(PM_POST_SUSPEND);
     pm_restore_console();
     return error;
    }

    回到函数 enter_state ,
     // 只有定义了 CONFIG_PM_DEBUG ,并且是在测试的时候,suspend_test 才返回1,否则返回0,继续 suspend
     if (suspend_test(TEST_FREEZER))
      goto Finish;
      
     // 让 gfp mask 变得更严格,这是 suspend 前最后调用的一个函数
     // 后面的 pm_restore_gfp_mask 函数是 resume 回来调用的第一个函数,功能是恢复 gfp mask
     // 这个在前文中也有看到,是 3.0.35 kernel 的 hibernate 函数中新加的处理
     pm_restrict_gfp_mask();
     // 函数的实现在后面
     error = suspend_devices_and_enter(state);
     
    suspend_devices_and_enter 函数的实现:
    /**
     * suspend_devices_and_enter - suspend devices and enter the desired system
     *        sleep state.
     * @state:    state to enter
     */
    int suspend_devices_and_enter(suspend_state_t state)
    {
     int error;

     if (!suspend_ops)
      return -ENOSYS;

     trace_machine_suspend(state);
     // suspend_ops 在前文出现过,在 architecture 的 pm 模块的 init 或者 probe 函数中会设置 suspend_ops
     if (suspend_ops->begin) {
      error = suspend_ops->begin(state);
      if (error)
       goto Close;
     }
     // 函数 suspend_console 的实现见后文
     suspend_console();
     // 函数 suspend_test_start 和函数 suspend_test_finish 用于计时,并输出时间信息
     suspend_test_start();
     // 函数 dpm_suspend_start 的实现见后文
     error = dpm_suspend_start(PMSG_SUSPEND);
     if (error) {
      printk(KERN_ERR "PM: Some devices failed to suspend ");
      goto Recover_platform;
     }
     suspend_test_finish("suspend devices");
     // 此函数前面说过
     if (suspend_test(TEST_DEVICES))
      goto Recover_platform;

     // 函数 suspend_enter 的实现见后文
     error = suspend_enter(state);

     Resume_devices:
     suspend_test_start();
     /**
      * dpm_resume_end - Execute "resume" callbacks and complete system transition.
      * @state: PM transition of the system being carried out.
      *
      * Execute "resume" callbacks for all devices and complete the PM transition of
      * the system.
      */
     // dpm_resume_end 调用 dpm_resume 和 dpm_complete
     /**
      * dpm_resume - Execute "resume" callbacks for non-sysdev devices.
      * @state: PM transition of the system being carried out.
      *
      * Execute the appropriate "resume" callback for all devices whose status
      * indicates that they are suspended.
      */
     /**
      * dpm_complete - Complete a PM transition for all non-sysdev devices.
      * @state: PM transition of the system being carried out.
      *
      * Execute the ->complete() callbacks for all devices whose PM status is not
      * DPM_ON (this allows new devices to be registered).
      */
     dpm_resume_end(PMSG_RESUME);
     suspend_test_finish("resume devices");
     // console 处理
     resume_console();
     Close:
     if (suspend_ops->end)
      suspend_ops->end();
     trace_machine_suspend(PWR_EVENT_EXIT);
     return error;

     Recover_platform:
     if (suspend_ops->recover)
      suspend_ops->recover();
     goto Resume_devices;
    }

    函数 suspend_console 的实现:
    /**
     * suspend_console - suspend the console subsystem
     *
     * This disables printk() while we go into suspend states
     */
    void suspend_console(void)
    {
     if (!console_suspend_enabled)
      return;
     printk("Suspending console(s) (use no_console_suspend to debug) ");
     console_lock();
     console_suspended = 1;
     up(&console_sem);
    }

    函数 dpm_suspend_start 的实现:
    /**
     * dpm_suspend_start - Prepare devices for PM transition and suspend them.
     * @state: PM transition of the system being carried out.
     *
     * Prepare all non-sysdev devices for system PM transition and execute "suspend"
     * callbacks for them.
     */
    int dpm_suspend_start(pm_message_t state)
    {
     int error;

     /**
      * dpm_prepare - Prepare all non-sysdev devices for a system PM transition.
      * @state: PM transition of the system being carried out.
      *
      * Execute the ->prepare() callback(s) for all devices.
      */
     error = dpm_prepare(state);
     if (!error)
      /**
       * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices.
       * @state: PM transition of the system being carried out.
       */
      error = dpm_suspend(state);
     return error;
    }
    函数 dpm_prepare 和 函数 dpm_suspend 中的处理涉及到那些 devices 的 callback 被调用。
    在函数 dpm_prepare 中,会遍历队列 dpm_list ,依次调用其中设备的 ->prepare() callback(s) ,
    如果成功,则将其添加到 dpm_prepared_list 队列。
    函数 dpm_suspend 遍历 dpm_prepared_list 队列,依次调用其中设备的 ->suspend() callback(s) 。
    如果成功,则将其添加到 dpm_suspended_list 队列,后面调用到的 dpm_resume 函数会使用该队列。
    dpm_resume 处理之后又将成员 move 到 dpm_prepared_list 队列。
    函数 dpm_complete 中会处理 dpm_prepared_list 队列。
    接下来的问题是, dpm_list 里的成员是谁添加到?
    函数 device_pm_add  中会往 dpm_list 中添加成员:
    /**
     * device_pm_add - Add a device to the PM core's list of active devices.
     * @dev: Device to add to the list.
     */
    void device_pm_add(struct device *dev)
    {
    ...
     list_add_tail(&dev->power.entry, &dpm_list);
    ...
    }
    函数 device_add 中调用了函数 device_pm_add :
    /**
     * device_add - add device to device hierarchy.
     * @dev: device.
     *
     * This is part 2 of device_register(), though may be called
     * separately _iff_ device_initialize() has been called separately.
     *
     * This adds @dev to the kobject hierarchy via kobject_add(), adds it
     * to the global and sibling lists for the device, then
     * adds it to the other relevant subsystems of the driver model.
     *
     * NOTE: _Never_ directly free @dev after calling this function, even
     * if it returned an error! Always use put_device() to give up your
     * reference instead.
     */
    int device_add(struct device *dev)
    {
    ...
     device_pm_add(dev);
    ...
    }
    举一个 audio device driver 的例子。 driver 的 init 函数中调用了函数 platform_device_add 。
    函数 platform_device_add 中调用了函数 device_add :
    /**
     * platform_device_add - add a platform device to device hierarchy
     * @pdev: platform device we're adding
     *
     * This is part 2 of platform_device_register(), though may be called
     * separately _iff_ pdev was allocated by platform_device_alloc().
     */
    int platform_device_add(struct platform_device *pdev)
    {
    ...

     ret = device_add(&pdev->dev);
    ...
    }


    函数 suspend_enter 的实现:
    /**
     * suspend_enter - enter the desired system sleep state.
     * @state:  state to enter
     *
     * This function should be called after devices have been suspended.
     */
    static int suspend_enter(suspend_state_t state)
    {
     int error;

     // suspend_ops 已经见过多次
     if (suspend_ops->prepare) {
      error = suspend_ops->prepare();
      if (error)
       goto Platform_finish;
     }

     /**
      * dpm_suspend_noirq - Execute "late suspend" callbacks for non-sysdev devices.
      * @state: PM transition of the system being carried out.
      *
      * Prevent device drivers from receiving interrupts and call the "noirq" suspend
      * handlers for all non-sysdev devices.
      */
     error = dpm_suspend_noirq(PMSG_SUSPEND);
     if (error) {
      printk(KERN_ERR "PM: Some devices failed to power down ");
      goto Platform_finish;
     }

     if (suspend_ops->prepare_late) {
      error = suspend_ops->prepare_late();
      if (error)
       goto Platform_wake;
     }

     if (suspend_test(TEST_PLATFORM))
      goto Platform_wake;

     // 把 first_cpu 之外的 cpu 都通过函数 _cpu_down down 掉
     error = disable_nonboot_cpus();
     if (error || suspend_test(TEST_CPUS))
      goto Enable_cpus;

     arch_suspend_disable_irqs();
     BUG_ON(!irqs_disabled());

     /**
      * syscore_suspend - Execute all the registered system core suspend callbacks.
      *
      * This function is executed with one CPU on-line and disabled interrupts.
      */
      // 遍历 syscore_ops_list 队列,依次调用成员的 suspend 函数
      // 函数 register_syscore_ops 会往队列 syscore_ops_list 上添加成员
      /**
      * register_syscore_ops - Register a set of system core operations.
      * @ops: System core operations to register.
      */
     error = syscore_suspend();
     // 成功则进入 if 语句
     if (!error) {
      if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
       // enter 函数的实现一般在pm.c中。fsl的imx6的pm.c中,enter suspend的时候调用了调用了iRam中的suspend处理函数,最后等待中断的到来。
       // iRam中的函数是pm模块初始化的时候,copy过去的。
       // 在举例中,imx6上用的是kernel 3.0.35。
       // kernel 2.6.35 对应的 imx5上的处理类似,只是细节上稍有不同。
       error = suspend_ops->enter(state);
       events_check_enabled = false;
      }
      /**
       * syscore_resume - Execute all the registered system core resume callbacks.
       *
       * This function is executed with one CPU on-line and disabled interrupts.
       */
      // 重新上电了,处理的队列也是 syscore_ops_list
      syscore_resume();
     }

     // 与 arch_suspend_disable_irqs  对应
     arch_suspend_enable_irqs();
     BUG_ON(irqs_disabled());

     Enable_cpus:
     // 与 disable_nonboot_cpus 对应
     enable_nonboot_cpus();

     Platform_wake:
     if (suspend_ops->wake)
      suspend_ops->wake();

     // 与 dpm_suspend_noirq 对应
     dpm_resume_noirq(PMSG_RESUME);

     Platform_finish:
     if (suspend_ops->finish)
      suspend_ops->finish();

     return error;
    }

  • 相关阅读:
    PhoneGap 3.0 官方 安装 方法
    计算机组成
    软件开发瀑布模型
    国内源码下载地址(转载)
    阿里云ECS 介绍
    云端搭建Linux学习环境 链接https://edu.aliyun.com/article/19 (阿里云ECS服务器 )课堂
    阿里云服务器 ECS 部署lamp:centos+apache+mysql+php安装配置方法 (centos7)
    thinkphp 连接数据库 & 实例化模型操作 (下接thinkphp CURD 操作)/慕课
    PHP实现微信公众平台开发—基础篇
    ThinkPHP3.2.3完整版创建前后台入口文件 http://jingyan.baidu.com/article/7e4409533fc1092fc1e2ef53.html
  • 原文地址:https://www.cnblogs.com/sky-heaven/p/5333487.html
Copyright © 2020-2023  润新知