• BUG: scheduling while atomic 分析【转】


    本文转载自:https://blog.csdn.net/cfy_phonex/article/details/12090943

    遇到一个典型的schedule问题。

    1.  
      <3>[26578.636839] C1 [ swapper/1] BUG: scheduling while atomic: swapper/1/0/0x00000002
    2.  
      <6>[26578.636869] C0 [ kworker/u:1] CPU1 is up
    3.  
      <4>[26578.636900] C1 [ swapper/1] Modules linked in: bcm15500_i2c_ts
    4.  
      <4>[26578.636961] C1 [ swapper/1] [<c00146d0>] (unwind_backtrace+0x0/0x11c) from [<c0602684>] (__schedule+0x70/0x6e0)
    5.  
      <4>[26578.636991] C1 [ swapper/1] [<c0602684>] (__schedule+0x70/0x6e0) from [<c06030ec>] (schedule_preempt_disabled+0x14/0x20)
    6.  
      <4>[26578.637052] C1 [ swapper/1] [<c06030ec>] (schedule_preempt_disabled+0x14/0x20) from [<c000f05c>] (cpu_idle+0xf0/0x104)
    7.  
      <4>[26578.637083] C1 [ swapper/1] [<c000f05c>] (cpu_idle+0xf0/0x104) from [<c05e98e0>] (cpu_die+0x2c/0x5c)
    8.  
      <3>[26578.637510] C1 [ swapper/1] BUG: scheduling while atomic: swapper/1/0/0x00000002
    9.  
      <4>[26578.637510] C1 [ swapper/1] Modules linked in: bcm15500_i2c_ts
    10.  
      <4>[26578.637602] C1 [ swapper/1] [<c00146d0>] (unwind_backtrace+0x0/0x11c) from [<c0602684>] (__schedule+0x70/0x6e0)
    11.  
      <4>[26578.637663] C1 [ swapper/1] [<c0602684>] (__schedule+0x70/0x6e0) from [<c06030ec>] (schedule_preempt_disabled+0x14/0x20)
    12.  
      <4>[26578.637724] C1 [ swapper/1] [<c06030ec>] (schedule_preempt_disabled+0x14/0x20) from [<c000f05c>] (cpu_idle+0xf0/0x104)
    13.  
      <4>[26578.637754] C1 [ swapper/1] [<c000f05c>] (cpu_idle+0xf0/0x104) from [<c05e98e0>] (cpu_die+0x2c/0x5c)
    14.  
      <3>[26578.648069] C1 [ swapper/1] BUG: scheduling while atomic: swapper/1/0/0x00000002


    查看源代码

    1.  
      /*
    2.  
      * __schedule() is the main scheduler function.
    3.  
      */
    4.  
      static void __sched __schedule(void)
    5.  
      {
    6.  
      struct task_struct *prev, *next;
    7.  
      unsigned long *switch_count;
    8.  
      struct rq *rq;
    9.  
      int cpu;
    10.  
       
    11.  
      need_resched:
    12.  
      preempt_disable();
    13.  
      cpu = smp_processor_id();
    14.  
      rq = cpu_rq(cpu);
    15.  
      rcu_note_context_switch(cpu);
    16.  
      prev = rq->curr;
    17.  
       
    18.  
      schedule_debug(prev);
    19.  
          ....
    20.  
    1.  
      /*
    2.  
      * Print scheduling while atomic bug:
    3.  
      */
    4.  
      static noinline void __schedule_bug(struct task_struct *prev)
    5.  
      {
    6.  
      if (oops_in_progress)
    7.  
      return;
    8.  
       
    9.  
      printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x ",
    10.  
      prev->comm, prev->pid, preempt_count());
    11.  
       
    12.  
      debug_show_held_locks(prev);
    13.  
      print_modules();
    14.  
      if (irqs_disabled())
    15.  
      print_irqtrace_events(prev);
    16.  
       
    17.  
      dump_stack();
    18.  
      }
    19.  
       
    20.  
      /*
    21.  
      * Various schedule()-time debugging checks and statistics:
    22.  
      */
    23.  
      static inline void schedule_debug(struct task_struct *prev)
    24.  
      {
    25.  
      /*
    26.  
      * Test if we are atomic. Since do_exit() needs to call into
    27.  
      * schedule() atomically, we ignore that path for now.
    28.  
      * Otherwise, whine if we are scheduling when we should not be.
    29.  
      */
    30.  
      if (unlikely(in_atomic_preempt_off() && !prev->exit_state))
    31.  
      __schedule_bug(prev);
    32.  
      rcu_sleep_check();
    33.  
       
    34.  
      profile_hit(SCHED_PROFILING, __builtin_return_address(0));
    35.  
       
    36.  
      schedstat_inc(this_rq(), sched_count);
    37.  
      }


    可以看出, 满足如下条件将会打印该出错信息

    unlikely(in_atomic_preempt_off() && !prev->exit_state

    为0表示TASK_RUNNING状态,当前进程在运行; 并且处于原子状态,,那么就不能切换给其它的进程

    1.  
      Linux/include/linux/sched.h
    2.  
       
    3.  
      /*
    4.  
      * Task state bitmask. NOTE! These bits are also
    5.  
      * encoded in fs/proc/array.c: get_task_state().
    6.  
      *
    7.  
      * We have two separate sets of flags: task->state
    8.  
      * is about runnability, while task->exit_state are
    9.  
      * about the task exiting. Confusing, but this way
    10.  
      * modifying one set can't modify the other one by
    11.  
      * mistake.
    12.  
      */
    13.  
      #define TASK_RUNNING 0
    14.  
      #define TASK_INTERRUPTIBLE 1
    15.  
      #define TASK_UNINTERRUPTIBLE 2
    16.  
      #define __TASK_STOPPED 4
    17.  
      #define __TASK_TRACED 8
    18.  
      /* in tsk->exit_state */
    19.  
      #define EXIT_ZOMBIE 16
    20.  
      #define EXIT_DEAD 32
    21.  
      /* in tsk->state again */
    22.  
      #define TASK_DEAD 64
    23.  
      #define TASK_WAKEKILL 128
    24.  
      #define TASK_WAKING 256
    25.  
      #define TASK_STATE_MAX 512
    1.  
      kernel/include/linux/hardirq.h
    2.  
       
    3.  
      #if defined(CONFIG_PREEMPT_COUNT)
    4.  
      # define PREEMPT_CHECK_OFFSET 1
    5.  
      #else
    6.  
      # define PREEMPT_CHECK_OFFSET 0
    7.  
      #endif
    8.  
       
    9.  
      /*
    10.  
      * Are we running in atomic context? WARNING: this macro cannot
    11.  
      * always detect atomic context; in particular, it cannot know about
    12.  
      * held spinlocks in non-preemptible kernels. Thus it should not be
    13.  
      * used in the general case to determine whether sleeping is possible.
    14.  
      * Do not use in_atomic() in driver code.
    15.  
      */
    16.  
      #define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
    17.  
       
    18.  
      /*
    19.  
      * Check whether we were atomic before we did preempt_disable():
    20.  
      * (used by the scheduler, *after* releasing the kernel lock)
    21.  
      */
    22.  
      #define in_atomic_preempt_off()
    23.  
      ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)

    结论整理

    linux内核打印"BUG: scheduling while atomic"和"bad: scheduling from the idle thread"错误的时候,

    通常是在中断处理函数中调用了可以休眠的函数,如semaphore,mutex,sleep之类的可休眠的函数,

    而linux内核要求在中断处理的时候,不允许系统调度,不允许抢占,要等到中断处理完成才能做其他事情。

    因此,要充分考虑中断处理的时间,一定不能太久。

    另外一个能产生此问题的是在idle进程里面,做了不该做的事情。现在Linux用于很多手持式设备,为了降低功耗,

    通常的作法是在idle进程里面降低CPU或RAM的频率、关闭一些设备等等。要保证这些动作的原子性才能确保

    不发生"bad: scheduling from the idle thread"这样的错误!

    禁止内核抢占是指内核不会主动的抢占你的process,但是现在是你在自己的程序中主动call schedule(),

    kernel并不能阻止你这么作。

    Scheduling while atomic" means that a thread has called schedule() during an operation which is supposed to be atomic (ie uninterrupted).

    1.  
      190 NOTE: ***** WARNING *****
    2.  
      191 NEVER SLEEP IN A COMPLETION HANDLER. These are normally called
    3.  
      192 during hardware interrupt processing. If you can, defer substantial
    4.  
      193 work to a tasklet (bottom half) to keep system latencies low. You'll
    5.  
      194 probably need to use spinlocks to protect data structures you manipulate
    6.  
      195 in completion handlers.
      1.  
        GFP_ATOMIC is used when
      2.  
        (a) you are inside a completion handler, an interrupt, bottom half, tasklet or timer, or
      3.  
        (b) you are holding a spinlock or rwlock (does not apply to semaphores), or
      4.  
        (c) current->state != TASK_RUNNING, this is the case only after you've changed it.
      5.  

  • 相关阅读:
    7、MyBatis动态SQL
    6、MyBatis的SQL映射(mapper)文件
    5、MyBatis传统Dao开发 & Dao动态代理开发
    4、MyBatis使用的对象 & 封装工具类
    3、MyBatis入门实例
    2、MyBatis概述
    matlab 向量操作作业
    matlab 数组操作作业
    css子选择器 :frist-child :nth-child(n) :nth-of-type(n) ::select选择器
    6.3蓝牙透传数据与微信小程序通信
  • 原文地址:https://www.cnblogs.com/zzb-Dream-90Time/p/9394248.html
Copyright © 2020-2023  润新知