MIT 6.828 - 4. Lab 04: Xv6 lazy page allocation

实验总结

本次实验用时约八个小时。
收获是对线性地址的理解更深入了。

遇到的困难包括：

懒。
xv6-riscv 默认开了 kpti（内核和用户态页表分离），故需要在各种系统调用头部手动模拟 traverse 页表的过程，以及模拟处理缺页异常。（我现在觉得这不是一个很好的设计）

测试结果：

$ make grade
running lazytests: 
(3.7s) 
  lazy: pte printout: OK 
  lazy: map: OK 
  lazy: unmap: OK 
usertests: 
(95.8s) 
  usertests: pgbug: OK 
  usertests: sbrkbugs: OK 
  usertests: argptest: OK 
  usertests: sbrkmuch: OK 
  usertests: sbrkfail: OK 
  usertests: sbrkarg: OK 
  usertests: stacktest: OK 
  usertests: all tests: OK 
Score: 100/100

0. 实验准备

实验指导链接

上来直接：

1 2	`$ cd xv6-riscv-fall19 $ git checkout lazy`

实验分为四个子任务（实际更多个）：

设计一个输出页表的调试程序 vmprint(pagetable_t) 。
实现不立即分配内存的 sbrk 调用。
冒烟。
各种修复。

1. vmprint

void printwalk(pagetable_t pagetable, int depth)
{
  // there are 2^9 = 512 PTEs in a page table.
  for (int i = 0; i < 512; i++)
  {
    pte_t pte = pagetable[i];

    if (pte & PTE_V)
    {
      for (int j = 0; j < depth; j++)
        printf(" ..");
      // printf("%d: pte %p pa %p %s%s%s\n", i, pte, PTE2PA(pte), (pte) & PTE_R ? "r" : "-", (pte) & PTE_W ? "w" : "-", (pte) & PTE_X ? "x" : "-");
      printf("%d: pte %p pa %p\n", i, pte, PTE2PA(pte));
    }

    if ((pte & PTE_V) && (pte & (PTE_R | PTE_W | PTE_X)) == 0)
    {
      // this PTE points to a lower-level page table.
      uint64 child = PTE2PA(pte);
      printwalk((pagetable_t)child, depth + 1);
    }
  }
}

void vmprint(pagetable_t t)
{
  printf("page table %p\n", t);
  printwalk(t, 1);
}

2. 实现 lazy sbrk

非常好改，但通过 sbrk 输入负数来归还线性空间时，需要及时 demalloc 。

uint64
sys_sbrk(void)
{
  int addr;
  int n;

  if(argint(0, &n) < 0)
    return -1;
  addr = myproc()->sz;
  // if(growproc(n) < 0)
  //  return -1;

  myproc()->sz += n;
  if(n < 0)
    uvmdealloc(myproc()->pagetable, addr, myproc()->sz);
  
  return addr;
}

3 & 4. 冒烟 & 修复

首先需要正确处理缺页异常，完成必要的检查（访问非法的线性地址和栈溢出）：

//
// handle an interrupt, exception, or system call from user space.
// called from trampoline.S
//
void
usertrap(void)
{
  int which_dev = 0;

  if((r_sstatus() & SSTATUS_SPP) != 0)
    panic("usertrap: not from user mode");

  // send interrupts and exceptions to kerneltrap(),
  // since we're now in the kernel.
  w_stvec((uint64)kernelvec);

  struct proc *p = myproc();
  
  // save user program counter.
  p->tf->epc = r_sepc();
  
  if(r_scause() == 8){
    // system call

    if(p->killed)
      exit(-1);

    // sepc points to the ecall instruction,
    // but we want to return to the next instruction.
    p->tf->epc += 4;

    // an interrupt will change sstatus &c registers,
    // so don't enable until done with those registers.
    intr_on();

    syscall();
  } else if((which_dev = devintr()) != 0){
    // ok
  } else if(r_scause() == 13 || r_scause() == 15) {
    // ------------------------------------- here -------------------------------------
    // 13: Page load fault, 15: Page store fault
    // printf("usertrap(): page fault, scause %p pid=%d\n", r_scause(), p->pid);
    // printf("            sepc=%p vaddr=%p\n", r_sepc(), r_stval());

    struct proc *p = myproc();
    pagetable_t pagetable = p->pagetable;
    uint64 fault_vaddr = (uint64) r_stval();
    uint64 vpage_base = PGROUNDDOWN(fault_vaddr);

    if(fault_vaddr >= p->sz) {
      // printf("usertrap(): page fault: invalid memory access to vaddr %p\n", fault_vaddr);
      p->killed = 1;
      goto end;
    }

    if(fault_vaddr < p->ustack) {
      // printf("usertrap(): page fault: segfault on vaddr %p below stack %p\n", fault_vaddr, p->ustack);
      p->killed = 1;
      goto end;
    }

    char *mem = kalloc();
    if(mem == 0) {
      // printf("usertrap(): page fault: no more physical page available, killing process due to a OOM\n");
      p->killed = 1;
      goto end;
    }

    memset(mem, 0, PGSIZE);

    if(mappages(pagetable, vpage_base, PGSIZE, (uint64)mem, PTE_W|PTE_X|PTE_R|PTE_U) != 0){
      // printf("usertrap(): page fault: cannot map a page");
      kfree(mem);
      p->killed = 1;
      goto end;
    }
  } else {
    printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);
    printf("            sepc=%p stval=%p\n", r_sepc(), r_stval());
    p->killed = 1;
  }

end:
  if(p->killed)
    exit(-1);

  // give up the CPU if this is a timer interrupt.
  if(which_dev == 2)
    yield();

  usertrapret();
}

注意到为了能够正确找到栈空间顶部的位置，我给 proc 数据结构加了一个新的 entry 叫做 ustack 。对应需要修改 fork 和 exec 的代码，因为在这里有新 proc 结构的生成，故需要维护这个属性。

模拟页表和缺页的检查代码实现如下：

（注意到需要特判 pid = 1 的情况，是因为 xv6 的 initcode 中引用了大于用户线性空间最大值 proc->sz（这个属性由 sbrk 维护）的内存地址，会触发一个误判，此为特例）

经过与万呆呆讨论，无需特判，但我很懒我不改了。

int uvmchkaddr(pagetable_t pagetable, uint64 addr, uint64 size) {
  struct proc* p = myproc();
  char *mem;
  pte_t *pte;
  uint64 a, end;

  if(p->pid > 1) {
    if(addr >= p->sz) {
      printf("uvmchkaddr(): page fault: invalid memory access to vaddr %p\n", addr);
      return -1;
    }

    if(addr < p->ustack && addr >= p->ustack - PGSIZE) {
      printf("uvmchkaddr(): page fault: segfault on vaddr %p on stack guard\n", addr);
      return -1;
    }
  }

  a = PGROUNDDOWN(addr);
  end = PGROUNDUP(a + size);
  for(; a < end; a += PGSIZE) {
    if((pte = walk(pagetable, a, 1)) != 0 && (*pte & PTE_V))
      continue;

    mem = kalloc();
    if(mem == 0)
      return -1;
    memset(mem, 0, PGSIZE);
    if(mappages(pagetable, a, PGSIZE, (uint64)mem, PTE_W|PTE_X|PTE_R|PTE_U) != 0){
      kfree(mem);
      return -1;
    }
  }

  return 0;
}

2000 - 2099 MonKey's Blog | 自豪地采用 Hexo + Pandoc + KaTeX + Highlight.js