在排查一个nvme的的workqueue的问题的时候,发现nvme的queue的进程名被截断了,
[root@localhost caq]# ps -ef |grep -i nvme root 1699 2 0 10:05 ? 00:00:00 [nvme] root 29705 106743 0 18:44 pts/31 00:00:00 grep --color=auto -i nvme root 142222 2 0 17:42 ? 00:00:00 [xfs-buf/nvme0n1] root 142223 2 0 17:42 ? 00:00:00 [xfs-data/nvme0n] root 142224 2 0 17:42 ? 00:00:00 [xfs-conv/nvme0n] root 142225 2 0 17:42 ? 00:00:00 [xfs-cil/nvme0n1] root 142227 2 0 17:42 ? 00:00:00 [xfs-log/nvme0n1] root 142229 2 0 17:42 ? 00:00:00 [xfsaild/nvme0n1] root 142231 2 0 17:42 ? 00:00:00 [xfs-buf/nvme1n1] root 142232 2 0 17:42 ? 00:00:00 [xfs-data/nvme1n] root 142233 2 0 17:42 ? 00:00:00 [xfs-conv/nvme1n] root 142234 2 0 17:42 ? 00:00:00 [xfs-cil/nvme1n1] root 142236 2 0 17:42 ? 00:00:00 [xfs-log/nvme1n1] root 142238 2 0 17:42 ? 00:00:00 [xfsaild/nvme1n1] root 142240 2 0 17:42 ? 00:00:00 [xfs-buf/nvme2n1] root 142241 2 0 17:42 ? 00:00:00 [xfs-data/nvme2n] root 142242 2 0 17:42 ? 00:00:00 [xfs-conv/nvme2n] root 142243 2 0 17:42 ? 00:00:00 [xfs-cil/nvme2n1] root 142245 2 0 17:42 ? 00:00:00 [xfs-log/nvme2n1] root 142247 2 0 17:42 ? 00:00:00 [xfsaild/nvme2n1] root 142391 2 0 17:42 ? 00:00:00 [xfs-buf/nvme3n1] root 142392 2 0 17:42 ? 00:00:00 [xfs-data/nvme3n] root 142393 2 0 17:42 ? 00:00:00 [xfs-conv/nvme3n] root 142394 2 0 17:42 ? 00:00:00 [xfs-cil/nvme3n1] root 142396 2 0 17:42 ? 00:00:00 [xfs-log/nvme3n1] root 142398 2 0 17:42 ? 00:00:00 [xfsaild/nvme3n1] root 143771 2 0 17:43 ? 00:00:00 [dio/nvme0n1] root 144510 2 0 17:43 ? 00:00:00 [dio/nvme2n1] root 144533 2 0 17:43 ? 00:00:00 [dio/nvme1n1] root 146048 2 0 17:44 ? 00:00:00 [dio/nvme3n1]
一开始怀疑是ps命令截断了,然后看一下crash中对应的长度,发现也是截断的。
crash> ps |grep xfs-reclaim/sda 29844 2 64 ffff885f5eab3f40 IN 0.0 0 0 [xfs-reclaim/sda] 29881 2 64 ffff8857066f8fd0 IN 0.0 0 0 [xfs-reclaim/sda] 29996 2 64 ffff885705eb8000 IN 0.0 0 0 [xfs-reclaim/sda] 43802 2 66 ffff8855553b8000 IN 0.0 0 0 [xfs-reclaim/sda] 43816 2 66 ffff88291574eeb0 IN 0.0 0 0 [xfs-reclaim/sda] 43859 2 41 ffff885555e5eeb0 IN 0.0 0 0 [xfs-reclaim/sda] 43886 2 41 ffff8855556adee0 IN 0.0 0 0 [xfs-reclaim/sda]
一个工作队列的创建流程如下,比如使用alloc_workqueue--->__alloc_workqueue_key--->
rescuer->rescue_wq = wq; rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
char name[WQ_NAME_LEN]; /* I: workqueue name */
WQ_NAME_LEN = 24,
所以按道理应该是23字节的长度,再继续看
#define kthread_create(threadfn, data, namefmt, arg...) kthread_create_on_node(threadfn, data, -1, namefmt, ##arg) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, int node, const char namefmt[], ...) { struct kthread_create_info create; create.threadfn = threadfn; create.data = data; create.node = node; init_completion(&create.done); spin_lock(&kthread_create_lock); list_add_tail(&create.list, &kthread_create_list); spin_unlock(&kthread_create_lock); wake_up_process(kthreadd_task); wait_for_completion(&create.done); if (!IS_ERR(create.result)) { static const struct sched_param param = { .sched_priority = 0 }; va_list args; va_start(args, namefmt); vsnprintf(create.result->comm, sizeof(create.result->comm), namefmt, args);-----------被截断了 va_end(args);
char comm[TASK_COMM_LEN]; /* executable name excluding path
- access with [gs]et_task_comm (which lock
it with task_lock())
- initialized normally by setup_new_exec */
/* Task command name length */
#define TASK_COMM_LEN 16
所以又被截断成了15字节。所以只能展示这么多。