• /dev/mem同步写不能使用msync的MS_SYNC选项探究


    问题

    做了个测试板子的程序,里面有一项写铁电的功能,要求写入之后立即断电,重启后校验数据准确性;铁电设计是通过内存地址直接映射的,于是,使用mmap直接映射了/dev/mem文件,自然地写入之后使用msync进行同步,最后使用munmap解映射;

    然而,当我运行这段程序的时候,发现msync的MS_SYNC选项进行同步的时候会返回错误,错误码是EINVAL;这就奇怪了;

    查原因

    1. 查看MAN手册,如下:当地址不是页的整数倍,或者参数传递错误时才返回这个结果;

    1 EINVAL addr  is not a multiple of PAGESIZE; or any bit other than MS_ASYNC | MS_INVALIDATE | MS_SYNC is set in flags; or both MS_SYNC
    2 and MS_ASYNC are set in flags.

    反复验证,发现地址没问题,而且将MS_SYNC换成MS_ASYNC就没问题了,所以怀疑是内核不支持这个同步选项;为了求证,查看内核代码:

    2. sys_msync这个系统调用,在校验参数时,如果不合法会返回-EINVAL,这点如上述MAN手册所描述;

     1 asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
     2 {
     3     unsigned long end;
     4     struct mm_struct *mm = current->mm;
     5     struct vm_area_struct *vma;
     6     int unmapped_error = 0;
     7     int error = -EINVAL;
     8 
     9     if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
    10         goto out;
    11     if (start & ~PAGE_MASK)
    12         goto out;
    13     if ((flags & MS_ASYNC) && (flags & MS_SYNC))
    14         goto out;
    15         ....
    16 }

    3. 继续往下看代码,有这么一句,如果有MS_SYNC标记的话,会执行do_fsync(),出错会返回error;

     1 asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
     2 {
     3     ...
     4         if ((flags & MS_SYNC) && file &&
     5                 (vma->vm_flags & VM_SHARED)) {
     6             get_file(file);
     7             up_read(&mm->mmap_sem);
     8             error = do_fsync(file, 0);
     9             fput(file);
    10             if (error || start >= end)
    11                 goto out;
    12             down_read(&mm->mmap_sem);
    13             vma = find_vma(mm, start);
    14         } else {
    15             if (start >= end) {
    16                 error = 0;
    17                 goto out_unlock;
    18             }
    19             vma = vma->vm_next;
    20         }
    21     }
    22 out_unlock:
    23     up_read(&mm->mmap_sem);
    24 out:
    25     return error ? : unmapped_error;
    26 }

    4. 在do_fsync函数中,会对file_operations和里面的fsync函数做校验,如果没有,则返回-EINVAL,基本上可以确定,正是因为该文件没有实现file_operations里面的fsync函数,所以返回参数错误了;

     1 long do_fsync(struct file *file, int datasync)
     2 {
     3     int ret;
     4     int err;
     5     struct address_space *mapping = file->f_mapping;
     6 
     7     if (!file->f_op || !file->f_op->fsync) {
     8         /* Why?  We can still call filemap_fdatawrite */
     9         ret = -EINVAL;
    10         goto out;
    11     }
    12 
    13     ret = filemap_fdatawrite(mapping);
    14 
    15     /*
    16      * We need to protect against concurrent writers, which could cause
    17      * livelocks in fsync_buffers_list().
    18      */
    19     mutex_lock(&mapping->host->i_mutex);
    20     err = file->f_op->fsync(file, file->f_path.dentry, datasync);
    21     if (!ret)
    22         ret = err;
    23     mutex_unlock(&mapping->host->i_mutex);
    24     err = filemap_fdatawait(mapping);
    25     if (!ret)
    26         ret = err;
    27 out:
    28     return ret;
    29 }

    5. 我们来看看内存设备是在什么时候初始化的,如下代码,在device_create函数调用中会对一系列的内存设备进行初始化,其中包括/dev/mem;

     1 static int __init chr_dev_init(void)
     2 {
     3     int i;
     4     int err;
     5 
     6     err = bdi_init(&zero_bdi);
     7     if (err)
     8         return err;
     9 
    10     if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
    11         printk("unable to get major %d for memory devs
    ", MEM_MAJOR);
    12 
    13     mem_class = class_create(THIS_MODULE, "mem");
    14     for (i = 0; i < ARRAY_SIZE(devlist); i++)
    15         device_create(mem_class, NULL,
    16                   MKDEV(MEM_MAJOR, devlist[i].minor),
    17                   devlist[i].name);
    18 
    19     return 0;
    20 }

    6. 这个/dev/mem对应着一个操作函数,如下代码中的mem_fops:

     1 static const struct {
     2     unsigned int        minor;
     3     char            *name;
     4     umode_t            mode;
     5     const struct file_operations    *fops;
     6 } devlist[] = { /* list of minor devices */
     7     {1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
     8     {2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
     9     {3, "null",    S_IRUGO | S_IWUGO,           &null_fops},
    10 #ifdef CONFIG_DEVPORT
    11     {4, "port",    S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
    12 #endif
    13     {5, "zero",    S_IRUGO | S_IWUGO,           &zero_fops},
    14     {7, "full",    S_IRUGO | S_IWUGO,           &full_fops},
    15     {8, "random",  S_IRUGO | S_IWUSR,           &random_fops},
    16     {9, "urandom", S_IRUGO | S_IWUSR,           &urandom_fops},
    17     {11,"kmsg",    S_IRUGO | S_IWUSR,           &kmsg_fops},
    18 #ifdef CONFIG_CRASH_DUMP
    19     {12,"oldmem",    S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops},
    20 #endif
    21 };

    7. 看看这个mem_fops的实现,如下,可见其并没有实现fsync函数;

    1 static const struct file_operations mem_fops = {
    2     .llseek        = memory_lseek,
    3     .read        = read_mem,
    4     .write        = write_mem,
    5     .mmap        = mmap_mem,
    6     .open        = open_mem,
    7     .get_unmapped_area = get_unmapped_area_mem,
    8 };

    到这,问题总算水落石出了;

    8. 再来看看mmap函数的实现,里面调用了这个函数phys_mem_access_prot;

     1 static int mmap_mem(struct file * file, struct vm_area_struct * vma)
     2 {
     3     size_t size = vma->vm_end - vma->vm_start;
     4 
     5     if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
     6         return -EINVAL;
     7 
     8     if (!private_mapping_ok(vma))
     9         return -ENOSYS;
    10 
    11     vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
    12                          size,
    13                          vma->vm_page_prot);
    14 
    15     /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
    16     if (remap_pfn_range(vma,
    17                 vma->vm_start,
    18                 vma->vm_pgoff,
    19                 size,
    20                 vma->vm_page_prot))
    21         return -EAGAIN;
    22     return 0;
    23 }

    9. 上面提到的这个函数,如下,其中有个是否支持不缓存的方式判断,uncached_access;

     1 #ifndef __HAVE_PHYS_MEM_ACCESS_PROT
     2 static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
     3                      unsigned long size, pgprot_t vma_prot)
     4 {
     5 #ifdef pgprot_noncached
     6     unsigned long offset = pfn << PAGE_SHIFT;
     7 
     8     if (uncached_access(file, offset))
     9         return pgprot_noncached(vma_prot);
    10 #endif
    11     return vma_prot;
    12 }
    13 #endif

    10. 进入uncached_access非缓存访问函数,可见其内部根据文件的O_SYNC选项来判断是否支持不缓存的写;

     1 static inline int uncached_access(struct file *file, unsigned long addr)
     2 {
     3 #if defined(__i386__) && !defined(__arch_um__)
     4     /*
     5      * On the PPro and successors, the MTRRs are used to set
     6      * memory types for physical addresses outside main memory,
     7      * so blindly setting PCD or PWT on those pages is wrong.
     8      * For Pentiums and earlier, the surround logic should disable
     9      * caching for the high addresses through the KEN pin, but
    10      * we maintain the tradition of paranoia in this code.
    11      */
    12     if (file->f_flags & O_SYNC)
    13         return 1;
    14      return !( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
    15           test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
    16           test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
    17           test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability) )
    18       && addr >= __pa(high_memory);
    19 #elif defined(__x86_64__) && !defined(__arch_um__)
    20     /* 
    21      * This is broken because it can generate memory type aliases,
    22      * which can cause cache corruptions
    23      * But it is only available for root and we have to be bug-to-bug
    24      * compatible with i386.
    25      */
    26     if (file->f_flags & O_SYNC)
    27         return 1;
    28     /* same behaviour as i386. PAT always set to cached and MTRRs control the
    29        caching behaviour. 
    30        Hopefully a full PAT implementation will fix that soon. */       
    31     return 0;
    32 #elif defined(CONFIG_IA64)
    33     /*
    34      * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases.
    35      */
    36     return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
    37 #elif defined(CONFIG_MIPS)
    38     {
    39         extern int __uncached_access(struct file *file,
    40                          unsigned long addr);
    41 
    42         return __uncached_access(file, addr);
    43     }
    44 #else
    45     /*
    46      * Accessing memory above the top the kernel knows about or through a file pointer
    47      * that was marked O_SYNC will be done non-cached.
    48      */
    49     if (file->f_flags & O_SYNC)
    50         return 1;
    51     return addr >= __pa(high_memory);
    52 #endif
    53 }

    好了,分析完毕;

    解决办法

    在打开/dev/mem时,使用如下方式,即open增加O_SYNC选项,这个选项即上面uncached_access函数使用的判断标记,表示每次写操作都要等到数据和文件属性都同步到物理存储才返回;

    1 int fd = open("/dev/mem", O_RDWR|O_SYNC);

    参考文章:

    https://blog.csdn.net/wlp600/article/details/6893636

    http://www.armadeus.org/wiki/index.php?title=FPGA_registers_access_from_Linux_userspace

    https://stackoverflow.com/questions/20750176/how-to-get-writes-via-an-mmap-mapped-memory-pointer-to-flush-immediately

    https://blog.csdn.net/tiantao2012/article/details/52168383?locationNum=2&fps=1

  • 相关阅读:
    sql 行转列
    wm_concat函数 用法
    PL/SQL如何调试Oracle存储过程
    Oracle&SQLServer中实现跨库查询
    Oracle 中 decode 函数用法
    Oracle中给用户赋予debug权限
    Oracle中的NVL函数
    oracle 触发器 pragma autonomous_transaction
    ORACLE中%TYPE和%ROWTYPE的使用
    A complete example using RAISE_APPLICATION_ERROR : raise_application_error
  • 原文地址:https://www.cnblogs.com/wanpengcoder/p/11767208.html
Copyright © 2020-2023  润新知