Day8中并没有代码的更新,只是介绍了一下struct file
,以及open
等系统调用关联文件系统中的很多操作。struct file
保存内核看到的文件的特征信息。
Day9中实现了支持page cache的文件操作,重点涉及file_operations
的实现以及页缓存的相关知识。
file_opearions
file_operations
的定义如下:
struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **, void **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
void (*show_fdinfo)(struct seq_file *m, struct file *f);
#ifndef CONFIG_MMU
unsigned (*mmap_capabilities)(struct file *);
#endif
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
loff_t, size_t, unsigned int);
int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
u64);
int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
u64);
int (*fadvise)(struct file *, loff_t, loff_t, int);
} __randomize_layout;
而下面是file.c中的实现:
struct file_operations sfs_file_operations = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = simple_sync_file,
.sendfile = generic_file_sendfile,
.llseek = generic_file_llseek,
};
read
和write
分别负责读写数据,参数分别为文件描述符、数据缓冲区、数据大小和文件偏移量。这里追一下read_write.c中read系统调用的代码就会发现,do_sync_read
可用new_sync_read
替代,do_sync_write
类似。而且我们发现,实际上new_sync_read
也调用了generic_file_read_iter
;同时查看了ext2,ext4和通用块设备的file_operations
,发现他们都没有设置read
和write
成员,所以我们在实验中也不设置read
和write
。
aio_read
和aio_write
用于异步读写操作,在新版本内核中,更名为read_iter
和write_iter
,对应的generic函数在filemap.c中,更名为generic_file_read_iter
和generic_file_write_iter
。这两个函数在这里不做具体分析。
mmap
将文件内容映射到虚拟地址空间,减少了一次拷贝操作。举例来说,通常情况下,一个文件写操作需要经过以下几次拷贝:用户态->内核态->写文件,而mmap则省略了用户态到内核态的拷贝,可以直接从用户态访问具体文件。具体实现是generic_file_mmap
,没变化。
fsync
用于同步内存中和存储介质上的数据。这里不妨使用libfs.c中的generic_file_sync
。
sendfile
这个成员函数在新版本内核中被去掉了,关于sendfile
的相关知识,可以查看这篇文章,总体上来讲,就是绕过用户态内核态的切换和复制,直接用DMA从一个内核传到另一个内核缓冲区。所以在本次实验中直接去掉。
llseek
很简单,就是调整文件描述符到指定的位置,改变当前的读写位置。generic_file_llseek
在read_write.c中,具体代码在这里不细说了。
address_space
个人理解是:地址空间是内核针对缓存的一个统一抽象。
地址空间是内核中最关键的数据结构之一,可以认为是内核最基本的抽象机制之一,其重要性堪比进程、文件等抽象结构。
address_space
定义如下:
struct address_space {
struct inode *host; /* owner: inode, block_device */
struct radix_tree_root i_pages; /* cached pages */
atomic_t i_mmap_writable;/* count VM_SHARED mappings */
struct rb_root_cached i_mmap; /* tree of private and shared mappings */
struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
/* Protected by the i_pages lock */
unsigned long nrpages; /* number of total pages */
/* number of shadow or DAX exceptional entries */
unsigned long nrexceptional;
pgoff_t writeback_index;/* writeback starts here */
const struct address_space_operations *a_ops; /* methods */
unsigned long flags; /* error bits */
spinlock_t private_lock; /* for use by the address_space */
gfp_t gfp_mask; /* implicit gfp mask for allocations */
struct list_head private_list; /* for use by the address_space */
void *private_data; /* ditto */
errseq_t wb_err;
} __attribute__((aligned(sizeof(long)))) __randomize_layout;
host
保存当前地址空间的所有者,inode或者是块设备。
i_pages
指向一个基数树的跟,这个基数树列出了当前地址空间中所有的物理内存页。
nrpages
保存缓存页的总数。
a_ops
指向关于address_space
的操作结构,定义了一些处理地址空间的特定操作。下文详述。
其他的内容我们目前不关注。必须指出的是,新内核中移除了backing_dev_info
这个成员,主要保存了后备存储器中的设备信息,而后备存储器指定了地址空间中页的数据的来源,这部分功能在新内核中是怎样实现的还不清楚。
address_space_operations
address_space_operations
定义如下:
struct address_space_operations {
int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*readpage)(struct file *, struct page *);
/* Write back some dirty pages from this mapping. */
int (*writepages)(struct address_space *, struct writeback_control *);
/* Set a page dirty. Return true if this dirtied it */
int (*set_page_dirty)(struct page *page);
/*
* Reads in the requested pages. Unlike ->readpage(), this is
* PURELY used for read-ahead!.
*/
int (*readpages)(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages);
int (*write_begin)(struct file *, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata);
int (*write_end)(struct file *, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata);
/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
sector_t (*bmap)(struct address_space *, sector_t);
void (*invalidatepage) (struct page *, unsigned int, unsigned int);
int (*releasepage) (struct page *, gfp_t);
void (*freepage)(struct page *);
ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
/*
* migrate the contents of a page to the specified target. If
* migrate_mode is MIGRATE_ASYNC, it must not block.
*/
int (*migratepage) (struct address_space *,
struct page *, struct page *, enum migrate_mode);
bool (*isolate_page)(struct page *, isolate_mode_t);
void (*putback_page)(struct page *);
int (*launder_page) (struct page *);
int (*is_partially_uptodate) (struct page *, unsigned long,
unsigned long);
void (*is_dirty_writeback) (struct page *, bool *, bool *);
int (*error_remove_page)(struct address_space *, struct page *);
/* swapfile support */
int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
sector_t *span);
void (*swap_deactivate)(struct file *file);
};
在file.c中,具体实现如下:
struct address_space_operations sfs_aops = {
.readpage = simple_readpage,
.write_begin = simple_write_begin,
.write_end = simple_write_end
};
readpage
从存储器将一页读入页帧。
write_begin
和write_end
执行由write系统调用触发的写操作,begin将事务数据存储到日志,end将执行实际的写操作,在写入时,内核必须保证两个函数成对使用,并且顺序正确,否则日志机制会失效。当前将写操作划分为两部分已经成为一个约定俗成的传统。
注意,原来的老版本代码中write_begin
和write_end
分别命名为:prepare_write
和commit_write
。
实验结果
可见,基本的文件夹创建、文件读写操作目前都已经支持。
Day10 11
samplefs整体已经完成构建,在day10和11中也只是对address_space_operations
中的readpages
和writepages
进行了介绍,以及在inode_operations
中加入了对硬链接和软链接的处理。