MIT 6.828 - 9. Lab 09: mmap

Tags: MIT 6.828

实验总结

  1. 本次实验用时约 20 个小时。
  2. 收获是写了一些和 mmap 相关的代码,尝试实现了一个 mmap 的子集,并且通过了课程测试。

实验结束后的全部代码在:https://github.com/monkey2000/xv6-riscv/tree/mmap/

测试结果:

1
2
3
4
5
6
7
8
9
10
running mmaptest: 
$ make qemu-gdb
(2.3s)
mmaptest: mmap_test: OK
mmaptest: fork_test: OK
usertests:
$ make qemu-gdb
OK (25.5s)
time: OK
Score: 100/100

0. 实验准备

实验指导链接

上来直接:

1
2
$ cd xv6-riscv-fall19
$ git checkout mmap

1. 实现

Mmap 细节好多…

首先我在这里详述一下都需要实现哪些功能:

  1. 实现 mmap 和 munmap 两个系统调用
  2. 懒加载,只有触发 page fault 时才真正加载
  3. 支持 fork ,和子进程应该共享 Physical Pages (这块还要加引用计数,我从之前 cow 的里面抄了一个)
  4. 实现写回,即实现 MAP_SHAERD/MAP_PRIVATE
  5. 实现 PROT_READ/PROT_WRITE

系统调用(以及写回):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
uint64 sys_mmap(void) {
struct proc *p = myproc();
uint64 addr;
int length, prot, flags, fd, offset;

if(argaddr(0, &addr) < 0)
return -1;

if(argint(1, &length) < 0 || argint(2, &prot) < 0 || argint(3, &flags) < 0
|| argint(4, &fd) < 0 || argint(5, &offset) < 0)
return -1;

// assumption: addr and offset shall all be zero
if(addr != 0 || offset != 0)
return -1;

if(p->ofile[fd] == 0 || p->ofile[fd]->type != FD_INODE)
return -1;

struct file *f = p->ofile[fd];
if((flags & MAP_SHARED) && (prot & PROT_WRITE) && !f->writable)
return -1;

struct map_info *map;
if((map = map_alloc()) == 0)
return -1;

map->vend = PGROUNDUP(map->vstart + length);
map->length = length;
map->prot = prot;
map->flags = flags;
map->file = f;
map->offset = offset;
map->used = 1;

filedup(map->file);

return map->vstart;
}

uint64 sys_munmap(void) {
struct proc *p = myproc();
uint64 addr;
int length;

if(argaddr(0, &addr) < 0 || argint(1, &length) < 0)
return -1;

if(addr < MMAP_VSTART || addr > MMAP_VEND)
return -1;

uint64 vpage_base = PGROUNDDOWN(addr);
int map_sel = (vpage_base - MMAP_VSTART) / MMAP_SIZE;
struct map_info *map = &p->minfo[map_sel];

if(map->used == 0)
return -1;

static pte_t *pte;
uint64 pa;
for(uint64 va = vpage_base; va < addr + length; va += PGSIZE) {
if((pte = walk(p->pagetable, va, 0)) && (*pte & PTE_V)) {
pa = PTE2PA(*pte);
if((map->flags & MAP_SHARED) && (*pte & PTE_D)) { // dirty page
// printf("dirty!\n");
uint64 file_start = va - vpage_base;
uint64 write_length = PGSIZE;
if(write_length > map->length - file_start)
write_length = map->length - file_start;
struct file *f = map->file;

begin_op(f->ip->dev);
ilock(f->ip);
writei(f->ip, 0, pa, file_start, write_length);
// printf("%d bytes wrote, start = %d, length = %d\n", p, file_start, write_length);
iunlock(f->ip);
end_op(f->ip->dev);
}
uvmunmap(p->pagetable, va, PGSIZE, 0);
kderef((void*)pa);
}
}

if(map->vstart == addr && map->length == length) {
fileclose(map->file);
map->used = 0;
} else {
if(map->vstart == addr)
map->vstart = addr + length;
if(map->vend == addr + length)
map->vend = addr;
}

return 0;
}

void mmap_dup(pagetable_t pagetable, struct map_info *m) {
static pte_t *pte;
uint64 pa;
for (uint64 va = m->vstart; va < m->vend; va += PGSIZE) {
if ((pte = walk(pagetable, va, 0)) && (*pte & PTE_V)) {
pa = PTE2PA(*pte);
kref((void *)pa);
}
}
}

void mmap_dedup(pagetable_t pagetable, struct map_info *m) {
static pte_t *pte;
uint64 pa;
for (uint64 va = m->vstart; va < m->vend; va += PGSIZE) {
if ((pte = walk(pagetable, va, 0)) && (*pte & PTE_V)) {
pa = PTE2PA(*pte);
uvmunmap(pagetable, va, PGSIZE, 0);
kderef((void *)pa);
}
}
}

懒加载(在 kernel/trap.c > usertrap() 里):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
if(r_scause() == 13 || r_scause() == 15) {
// 13 for load page fault, 15 for store page fault
uint64 fault_vaddr = r_stval();
if(fault_vaddr >= MMAP_VSTART && fault_vaddr < MMAP_VEND) {
pagetable_t pagetable = p->pagetable;
uint64 vpage_base = PGROUNDDOWN(fault_vaddr);
int map_sel = (vpage_base - MMAP_VSTART) / MMAP_SIZE;
struct map_info *map = &p->minfo[map_sel];
uint64 map_start = map->vstart;

// check for invalid mapping & access
if(map->used == 0 || fault_vaddr >= map->vend) {
printf("usertrap(): segfault pid=%d epc=%p\n", p->pid, r_sepc());
p->killed = 1;
goto end;
}

uint64 file_start = vpage_base - map_start;
uint64 read_length = PGSIZE;
if(read_length > map->length - file_start)
read_length = map->length - file_start;

struct file *f = map->file;
int prot = ((map->prot & PROT_READ) ? PTE_R : 0) | ((map->prot & PROT_WRITE) ? PTE_W : 0);

char *mem = kalloc();
if(mem == 0) {
printf("usertrap(): segfault: no more physical page available, killing process due to a OOM\n");
p->killed = 1;
goto end;
}

memset(mem, 0, PGSIZE);
ilock(f->ip);
readi(f->ip, 0, (uint64)mem, file_start, read_length);
iunlock(f->ip);

if(mappages(pagetable, vpage_base, PGSIZE, (uint64)mem, prot|PTE_U) != 0){
printf("usertrap(): segfault: cannot map a page");
kfree(mem);
p->killed = 1;
goto end;
}
} else {
printf("usertrap(): unexpected scause %p (%s) pid=%d\n", r_scause(), scause_desc(r_scause()), p->pid);
printf(" sepc=%p stval=%p\n", r_sepc(), r_stval());
p->killed = 1;
}
}

支持 fork 和 exit:

1
2
3
4
5
6
7
8
// kernel/proc.c > fork()
// share mmap with parent
for(i = 0; i < MMAP_NUM; i++) {
if(p->minfo[i].used) {
np->minfo[i] = p->minfo[i];
mmap_dup(np->pagetable, np->minfo);
}
}
1
2
3
4
5
6
7
8
9
// kernel/proc.c > exit()
// remove all pte related to mmap
for(int i = 0; i < MMAP_NUM; i++) {
if(p->minfo[i].used) {
struct map_info *m = &p->minfo[i];
m->used = 0;
mmap_dedup(p->pagetable, m);
}
}


知识共享许可协议 2000 - 2099 MonKey's Blog | 自豪地采用 Hexo + Pandoc + KaTeX + Highlight.js