1. 预备内容
在开始这个 lab 前需要先了解 Sv39 RISC-V 的虚拟地址转换物理地址流程。
1.1 Sv39 RISC-V
xv6 为 64 位的操作系统,在其采用的 Sv39 RISC-V 机制下,该操作系统的虚拟地址位长只有 39 位(也可选择为 48 位),剩余的 25 位为扩展位,默认情况下不使用。其物理地址由 44 位的物理页号 PPN 和 12 位的页内偏移组成(4kb)。
1.2 虚拟地址翻译
首先,需要在 satp
寄存器中设置 MODE
开启分页,并且将页目录载入该寄存器 PPN
字段,该寄存器结构如下:
MODE
字段取值范围如下,xv6 中该值为 8,即开启 39 bit 虚拟地址:
加载完页表开启分页后,其虚拟地址翻译流程大致如下:
可以看到分为 3 级页表(假如 MODE
设置为 9,开启 48bit 的虚拟地址,则为 4 级页表,39 ~ 47 位为 4 级页表的索引位置),首先忽略低 12 位的物理地址偏移,接下来的 27 位拆分为 3 组,每组 9 位,每一组的值依次对应在每一级页表中的索引位置。
假如以伪代码的形式,模拟翻译流程,不考虑页目录未创建的情况,则流程大致如下:
uint64 vaddr; // 要翻译的虚拟地址
pagetable_t pagetable = get_pagetable(); // 获取根页表
// 获取次级页表
int L2 = get_va_L2_value(va); // 获取 27 位中,高 9 位的值
pagetable = pagetable[L2]; // 根据 L2 索引根页表,获取次级页表
// 获取最终页表
int L1 = get_va_L1_value(va); // 获取 27 位中,中 9 位的值
pagetable = pagetable[L1]; // 获取最终页表
// 此时已经获取到最后一个页表,接下来获取目标页
int L0 = get_va_L0_value(va); // 获取 27 位中,低 9 位的值
pte_t* pte = &pagetable[L0];
// 将获取到的目标页右移 10 位,去掉属性位
// 然后左移动 12 位,这部分是实际物理地址的偏移,需要赋值为 va 的低 12 位偏移
uint64 ppn = *pte >> 10 << 12;
// 加上虚拟地址低 12 位偏移,最终得到物理地址
uint64 paddr= ppn + (vaddr & 0x1ff);
xv6 内核的虚拟地址空间与物理地址空间映射图
2. Speed up system calls(easy)
2.1 要求
Some operating systems (e.g., Linux) speed up certain system calls by sharing data in a read-only region between userspace and the kernel. This eliminates the need for kernel crossings when performing these system calls. To help you learn how to insert mappings into a page table, your first task is to implement this optimization for the getpid() system call in xv6.
When each process is created, map one read-only page at USYSCALL (a VA defined in memlayout.h). At the start of this page, store a struct usyscall (also defined in memlayout.h), and initialize it to store the PID of the current process. For this lab, ugetpid() has been provided on the userspace side and will automatically use the USYSCALL mapping. You will receive full credit for this part of the lab if the ugetpid test case passes when running pgtbltest.
简单来说就是提速一些系统调用,在内核空间中将其结果保存到结构体 USYSCALL
,然后在创建用户进程时,将 USYSCALL
的地址空间映射到用户空间,并设置为只读。
以 getpid()
系统调用为例,将进程 pid 保存到 USYSCALL
中,然后通过 ugetpid()
调用获取 pid
,这样可以避免用户态与内核态的切换消耗。
2.2 分析
这次实验主要需要修改的地方如下:
- 保存
USYSCALL
,并初始化其结果,由于USYSCALL
的内容与进程息息相关,故将USYSCALL
结构定义在struct proc
中 - 将
USYSCALL
所在的内核地址空间映射到用户地址空间
2.3 实现
- USYSCALL 保存
给其分配一页,方便后续做页映射
// Per-process state
struct proc {
struct usyscall* usyscall;
// some code ...
};
struct usyscall {
int pid; // Process ID
};
// Look in the process table for an UNUSED proc.
// If found, initialize state required to run in the kernel,
// and return with p->lock held.
// If there are no free procs, or a memory allocation fails, return 0.
static struct proc* allocproc(void)
{
struct proc *p;
// some code ...
// Allocate a usyscall page.
if((p->usyscall = (struct usyscall *)kalloc()) == 0){
freeproc(p);
release(&p->lock);
return 0;
}
p->usyscall->pid = p->pid;
// some code ...
return p;
}
- 映射 USYSCALL
其核心接口为 mappages
和 walk
,mappages
为物理页和虚拟页建立映射,并设置页属性。walk
为根据虚拟地址,找到其页表位置,其流程类似前文 1.2 虚拟地址翻译。
由于 USYSCALL
还是在内核空间,因此设置页属性时需要设置 PTE_R | PTE_U
,用户空间可访问且只有读权限。
#define USYSCALL (TRAPFRAME - PGSIZE)
// Create a user page table for a given process,
// with no user memory, but with trampoline pages.
pagetable_t proc_pagetable(struct proc *p)
{
pagetable_t pagetable;
// some code ...
// map the usyscall below TRAPFRAME
// map the trapframe just below TRAMPOLINE, for trampoline.S.
if(mappages(pagetable, USYSCALL, PGSIZE,
(uint64)(p->usyscall), PTE_R | PTE_U) < 0){
uvmunmap(pagetable, TRAMPOLINE, 1, 0);
uvmunmap(pagetable, TRAPFRAME, 1, 0);
uvmfree(pagetable, 0);
return 0;
}
return pagetable;
}
int mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm)
{
uint64 a, last;
pte_t *pte;
if(size == 0)
panic("mappages: size");
a = PGROUNDDOWN(va);
last = PGROUNDDOWN(va + size - 1);
for(;;){
if((pte = walk(pagetable, a, 1)) == 0)
return -1;
if(*pte & PTE_V)
panic("mappages: remap");
*pte = PA2PTE(pa) | perm | PTE_V;
if(a == last)
break;
a += PGSIZE;
pa += PGSIZE;
}
return 0;
}
pte_t * walk(pagetable_t pagetable, uint64 va, int alloc)
{
if(va >= MAXVA)
panic("walk");
for(int level = 2; level > 0; level--) {
pte_t *pte = &pagetable[PX(level, va)];
if(*pte & PTE_V) {
pagetable = (pagetable_t)PTE2PA(*pte);
} else {
if(!alloc || (pagetable = (pde_t*)kalloc()) == 0)
return 0;
memset(pagetable, 0, PGSIZE);
*pte = PA2PTE(pagetable) | PTE_V;
}
}
return &pagetable[PX(0, va)];
}
3. Print a page table(easy)
3.1 要求
Define a function called vmprint(). It should take a pagetable_t argument, and print that pagetable in the format described below. Insert if(p->pid==1) vmprint(p->pagetable) in exec.c just before the return argc, to print the first process's page table. You receive full credit for this part of the lab if you pass the pte printout test of make grade.
简单来说就是给一个页表,打印其页表结构。格式如下:
page table 0x0000000087f6e000
..0: pte 0x0000000021fda801 pa 0x0000000087f6a000
.. ..0: pte 0x0000000021fda401 pa 0x0000000087f69000
.. .. ..0: pte 0x0000000021fdac1f pa 0x0000000087f6b000
.. .. ..1: pte 0x0000000021fda00f pa 0x0000000087f68000
.. .. ..2: pte 0x0000000021fd9c1f pa 0x0000000087f67000
..255: pte 0x0000000021fdb401 pa 0x0000000087f6d000
.. ..511: pte 0x0000000021fdb001 pa 0x0000000087f6c000
.. .. ..509: pte 0x0000000021fdd813 pa 0x0000000087f76000
.. .. ..510: pte 0x0000000021fddc07 pa 0x0000000087f77000
.. .. ..511: pte 0x0000000020001c0b pa 0x0000000080007000
3.2 实现
这里流程与地址翻译有些类似,找到 3 级页表,依次递归打印。
char* prefix[] = {".. .. ..", ".. ..", ".."};
void _vmprint(pagetable_t pagetable, int level)
{
int idx = 0;
int pgtbl_size = 512;
while(idx < pgtbl_size)
{
pte_t *pte = &pagetable[idx++];
// 检查该页是否 valid
if (!(*pte & PTE_V))
{
continue;
}
printf("%s%d: pte %p pa %p\n", prefix[level], idx - 1, *pte, PTE2PA(*pte));
if (level)
{
_vmprint((pagetable_t)PTE2PA(*pte), level - 1);
}
}
}
void vmprint(pagetable_t pagetable)
{
printf("page table %p\n", pagetable);
_vmprint(pagetable, 2);
}
4. Detecting which pages have been accessed (hard)
4.1 要求
Your job is to implement pgaccess(), a system call that reports which pages have been accessed. The system call takes three arguments. First, it takes the starting virtual address of the first user page to check. Second, it takes the number of pages to check. Finally, it takes a user address to a buffer to store the results into a bitmask (a datastructure that uses one bit per page and where the first page corresponds to the least significant bit). You will receive full credit for this part of the lab if the pgaccess test case passes when running pgtbltest.
实现接口int pgaccess(void* base, int len, void* mask)
,其功能为,检查以 base
为起始地址,连续 len
页,是否有被访问过,即 PTE_A
的页属性是否为 1,并将结果通过 mask
返回,mask 值为 32 位,第 0 位表示第 1 页近期有被访问过。
页属性可参考下图:
这里需要注意的是,每次执行 pgaccess
的时候,需要清空被检查页的 PTE_A
属性,因为该属性一旦被设置,则永远为 1。
4.2 分析
难点主要有 2 个
- 获取检查的虚拟地址页的
pte
,这里可以仿照前文所说的walk
接口 - 返回
mask
结果,这里参考copyout
接口,该接口主要将内核空间的数据传回用户空间
4.3 实现
pte_t * _walk(pagetable_t pagetable, uint64 va)
{
for(int level = 2; level > 0; level--) {
pte_t *pte = &pagetable[PX(level, va)];
if(*pte & PTE_V) {
pagetable = (pagetable_t)PTE2PA(*pte);
} else {
return 0;
}
}
return &pagetable[PX(0, va)];
}
int
sys_pgaccess(void)
{
// lab pgtbl: your code here.
int page_cnt;
uint64 start_addr;
uint64 out_result;
if (argaddr(0, &start_addr) < 0)
return -1;
if (argint(1, &page_cnt) < 0)
return -1;
if (argaddr(2, &out_result) < 0)
return -1;
//printf("start addr %p \n ", start_addr);
uint32 scan_result = 0;
pagetable_t pagetable = myproc()->pagetable;
for (int i = 0; i < page_cnt; i++)
{
pte_t* pte = _walk(pagetable, start_addr);
if (pte == 0)
continue;
//printf("found pte %p no.%d\n", *pte, i);
if ((*pte & PTE_A))
{
scan_result |= (1 << i);
*pte &= (~PTE_A); // need to clear bit , becasue if set PTE_A , will exists forever
//printf("scan %d is valid \n", i);
}
start_addr += PGSIZE;
}
if(copyout(pagetable, out_result, (char *)&scan_result, sizeof(uint32)) < 0)
return -1;
return 0;
}