【Linux】Linux下查看cpu信息指令（top/mpstat/iostat/pidstat）说明-CFANZ编程社区

//
// File-system system calls.
// Mostly argument checking, since we don't trust
// user code, and calls into file.c and fs.c.
//

#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "param.h"
#include "stat.h"
#include "spinlock.h"
#include "proc.h"
#include "fs.h"
#include "sleeplock.h"
#include "file.h"
#include "fcntl.h"

// Fetch the nth word-sized system call argument as a file descriptor
// and return both the descriptor and the corresponding struct file.
static int
argfd(int n, int *pfd, struct file **pf)
{
  int fd;
  struct file *f;

  argint(n, &fd);
  if(fd < 0 || fd >= NOFILE || (f=myproc()->ofile[fd]) == 0)
    return -1;
  if(pfd)
    *pfd = fd;
  if(pf)
    *pf = f;
  return 0;
}

// Allocate a file descriptor for the given file.
// Takes over file reference from caller on success.
static int
fdalloc(struct file *f)
{
  int fd;
  struct proc *p = myproc();

  for(fd = 0; fd < NOFILE; fd++){
    if(p->ofile[fd] == 0){
      p->ofile[fd] = f;
      return fd;
    }
  }
  return -1;
}

uint64
sys_dup(void)
{
  struct file *f;
  int fd;

  if(argfd(0, 0, &f) < 0)
    return -1;
  if((fd=fdalloc(f)) < 0)
    return -1;
  filedup(f);
  return fd;
}

uint64
sys_read(void)
{
  struct file *f;
  int n;
  uint64 p;

  argaddr(1, &p);
  argint(2, &n);
  if(argfd(0, 0, &f) < 0)
    return -1;
  return fileread(f, p, n);
}

uint64
sys_write(void)
{
  struct file *f;
  int n;
  uint64 p;
  
  argaddr(1, &p);
  argint(2, &n);
  if(argfd(0, 0, &f) < 0)
    return -1;

  return filewrite(f, p, n);
}

uint64
sys_close(void)
{
  int fd;
  struct file *f;

  if(argfd(0, &fd, &f) < 0)
    return -1;
  myproc()->ofile[fd] = 0;
  fileclose(f);
  return 0;
}

uint64
sys_fstat(void)
{
  struct file *f;
  uint64 st; // user pointer to struct stat

  argaddr(1, &st);
  if(argfd(0, 0, &f) < 0)
    return -1;
  return filestat(f, st);
}

// Create the path new as a link to the same inode as old.
uint64
sys_link(void)
{
  char name[DIRSIZ], new[MAXPATH], old[MAXPATH];
  struct inode *dp, *ip;

  if(argstr(0, old, MAXPATH) < 0 || argstr(1, new, MAXPATH) < 0)
    return -1;

  begin_op();
  if((ip = namei(old)) == 0){
    end_op();
    return -1;
  }

  ilock(ip);
  if(ip->type == T_DIR){
    iunlockput(ip);
    end_op();
    return -1;
  }

  ip->nlink++;
  iupdate(ip);
  iunlock(ip);

  if((dp = nameiparent(new, name)) == 0)
    goto bad;
  ilock(dp);
  if(dp->dev != ip->dev || dirlink(dp, name, ip->inum) < 0){
    iunlockput(dp);
    goto bad;
  }
  iunlockput(dp);
  iput(ip);

  end_op();

  return 0;

bad:
  ilock(ip);
  ip->nlink--;
  iupdate(ip);
  iunlockput(ip);
  end_op();
  return -1;
}

// Is the directory dp empty except for "." and ".." ?
static int
isdirempty(struct inode *dp)
{
  int off;
  struct dirent de;

  for(off=2*sizeof(de); off<dp->size; off+=sizeof(de)){
    if(readi(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
      panic("isdirempty: readi");
    if(de.inum != 0)
      return 0;
  }
  return 1;
}

uint64
sys_unlink(void)
{
  struct inode *ip, *dp;
  struct dirent de;
  char name[DIRSIZ], path[MAXPATH];
  uint off;

  if(argstr(0, path, MAXPATH) < 0)
    return -1;

  begin_op();
  if((dp = nameiparent(path, name)) == 0){
    end_op();
    return -1;
  }

  ilock(dp);

  // Cannot unlink "." or "..".
  if(namecmp(name, ".") == 0 || namecmp(name, "..") == 0)
    goto bad;

  if((ip = dirlookup(dp, name, &off)) == 0)
    goto bad;
  ilock(ip);

  if(ip->nlink < 1)
    panic("unlink: nlink < 1");
  if(ip->type == T_DIR && !isdirempty(ip)){
    iunlockput(ip);
    goto bad;
  }

  memset(&de, 0, sizeof(de));
  if(writei(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de))
    panic("unlink: writei");
  if(ip->type == T_DIR){
    dp->nlink--;
    iupdate(dp);
  }
  iunlockput(dp);

  ip->nlink--;
  iupdate(ip);
  iunlockput(ip);

  end_op();

  return 0;

bad:
  iunlockput(dp);
  end_op();
  return -1;
}

// TODO: complete mmap()
uint64
sys_mmap(void)
{
  return 0;
}

// TODO: complete munmap()
uint64
sys_munmap(void)
{
  return 0;
}


static struct inode*
create(char *path, short type, short major, short minor)
{
  struct inode *ip, *dp;
  char name[DIRSIZ];

  if((dp = nameiparent(path, name)) == 0)
    return 0;

  ilock(dp);

  if((ip = dirlookup(dp, name, 0)) != 0){
    iunlockput(dp);
    ilock(ip);
    if(type == T_FILE && (ip->type == T_FILE || ip->type == T_DEVICE))
      return ip;
    iunlockput(ip);
    return 0;
  }

  if((ip = ialloc(dp->dev, type)) == 0){
    iunlockput(dp);
    return 0;
  }

  ilock(ip);
  ip->major = major;
  ip->minor = minor;
  ip->nlink = 1;
  iupdate(ip);

  if(type == T_DIR){  // Create . and .. entries.
    // No ip->nlink++ for ".": avoid cyclic ref count.
    if(dirlink(ip, ".", ip->inum) < 0 || dirlink(ip, "..", dp->inum) < 0)
      goto fail;
  }

  if(dirlink(dp, name, ip->inum) < 0)
    goto fail;

  if(type == T_DIR){
    // now that success is guaranteed:
    dp->nlink++;  // for ".."
    iupdate(dp);
  }

  iunlockput(dp);

  return ip;

 fail:
  // something went wrong. de-allocate ip.
  ip->nlink = 0;
  iupdate(ip);
  iunlockput(ip);
  iunlockput(dp);
  return 0;
}

uint64
sys_open(void)
{
  char path[MAXPATH];
  int fd, omode;
  struct file *f;
  struct inode *ip;
  int n;

  argint(1, &omode);
  if((n = argstr(0, path, MAXPATH)) < 0)
    return -1;

  begin_op();

  if(omode & O_CREATE){
    ip = create(path, T_FILE, 0, 0);
    if(ip == 0){
      end_op();
      return -1;
    }
  } else {
    if((ip = namei(path)) == 0){
      end_op();
      return -1;
    }
    ilock(ip);
    if(ip->type == T_DIR && omode != O_RDONLY){
      iunlockput(ip);
      end_op();
      return -1;
    }
  }

  if(ip->type == T_DEVICE && (ip->major < 0 || ip->major >= NDEV)){
    iunlockput(ip);
    end_op();
    return -1;
  }

  if((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0){
    if(f)
      fileclose(f);
    iunlockput(ip);
    end_op();
    return -1;
  }

  if(ip->type == T_DEVICE){
    f->type = FD_DEVICE;
    f->major = ip->major;
  } else {
    f->type = FD_INODE;
    f->off = 0;
  }
  f->ip = ip;
  f->readable = !(omode & O_WRONLY);
  f->writable = (omode & O_WRONLY) || (omode & O_RDWR);

  if((omode & O_TRUNC) && ip->type == T_FILE){
    itrunc(ip);
  }

  iunlock(ip);
  end_op();

  return fd;
}

uint64
sys_mkdir(void)
{
  char path[MAXPATH];
  struct inode *ip;

  begin_op();
  if(argstr(0, path, MAXPATH) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){
    end_op();
    return -1;
  }
  iunlockput(ip);
  end_op();
  return 0;
}

uint64
sys_mknod(void)
{
  struct inode *ip;
  char path[MAXPATH];
  int major, minor;

  begin_op();
  argint(1, &major);
  argint(2, &minor);
  if((argstr(0, path, MAXPATH)) < 0 ||
     (ip = create(path, T_DEVICE, major, minor)) == 0){
    end_op();
    return -1;
  }
  iunlockput(ip);
  end_op();
  return 0;
}

uint64
sys_chdir(void)
{
  char path[MAXPATH];
  struct inode *ip;
  struct proc *p = myproc();
  
  begin_op();
  if(argstr(0, path, MAXPATH) < 0 || (ip = namei(path)) == 0){
    end_op();
    return -1;
  }
  ilock(ip);
  if(ip->type != T_DIR){
    iunlockput(ip);
    end_op();
    return -1;
  }
  iunlock(ip);
  iput(p->cwd);
  end_op();
  p->cwd = ip;
  return 0;
}

uint64
sys_exec(void)
{
  char path[MAXPATH], *argv[MAXARG];
  int i;
  uint64 uargv, uarg;

  argaddr(1, &uargv);
  if(argstr(0, path, MAXPATH) < 0) {
    return -1;
  }
  memset(argv, 0, sizeof(argv));
  for(i=0;; i++){
    if(i >= NELEM(argv)){
      goto bad;
    }
    if(fetchaddr(uargv+sizeof(uint64)*i, (uint64*)&uarg) < 0){
      goto bad;
    }
    if(uarg == 0){
      argv[i] = 0;
      break;
    }
    argv[i] = kalloc();
    if(argv[i] == 0)
      goto bad;
    if(fetchstr(uarg, argv[i], PGSIZE) < 0)
      goto bad;
  }

  int ret = exec(path, argv);

  for(i = 0; i < NELEM(argv) && argv[i] != 0; i++)
    kfree(argv[i]);

  return ret;

 bad:
  for(i = 0; i < NELEM(argv) && argv[i] != 0; i++)
    kfree(argv[i]);
  return -1;
}

uint64
sys_pipe(void)
{
  uint64 fdarray; // user pointer to array of two integers
  struct file *rf, *wf;
  int fd0, fd1;
  struct proc *p = myproc();

  argaddr(0, &fdarray);
  if(pipealloc(&rf, &wf) < 0)
    return -1;
  fd0 = -1;
  if((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0){
    if(fd0 >= 0)
      p->ofile[fd0] = 0;
    fileclose(rf);
    fileclose(wf);
    return -1;
  }
  if(copyout(p->pagetable, fdarray, (char*)&fd0, sizeof(fd0)) < 0 ||
     copyout(p->pagetable, fdarray+sizeof(fd0), (char *)&fd1, sizeof(fd1)) < 0){
    p->ofile[fd0] = 0;
    p->ofile[fd1] = 0;
    fileclose(rf);
    fileclose(wf);
    return -1;
  }
  return 0;
}

这段代码是 xv6 操作系统内核中的一部分，主要实现了与文件系统相关的系统调用（如 open、read、write、unlink 等）。每个系统调用函数从用户进程中接收参数，执行安全检查，然后调用文件系统接口（如 file.c 和 fs.c 中的函数）来完成具体的文件操作。以下是代码的概述和关键点分析：

主要功能概述

文件描述符管理
- argfd 和 fdalloc：用于从用户传递的文件描述符中提取文件信息并分配新文件描述符。
- sys_dup：实现文件描述符的复制功能。
文件操作系统调用
- sys_read 和 sys_write：分别用于从文件中读取数据和写入数据。
- sys_close：关闭文件描述符并释放文件资源。
- sys_fstat：获取文件状态信息。
文件系统操作
- sys_link 和 sys_unlink：用于创建和删除文件链接。
- sys_open 和 sys_mkdir：用于打开文件和创建目录。
- sys_mknod：用于创建设备文件。
- sys_chdir：更改当前工作目录。
- sys_exec：用于执行新程序。
- sys_pipe：创建一个管道并返回一对文件描述符。
内存映射相关系统调用
- sys_mmap 和 sys_munmap：这些函数目前是 TODO 部分，将来会实现内存映射和取消映射功能。
```
// TODO: complete mmap()
uint64
sys_mmap(void)
{
  return 0;
}

// TODO: complete munmap()
uint64
sys_munmap(void)
{
  return 0;
}
```

在 sys_mmap 和 sys_munmap 中，我们将实现类似于 Unix 中的 mmap 和 munmap 系统调用。这些函数将允许用户进程将文件映射到进程的虚拟内存空间中，并在不需要时解除该映射。这种机制对于共享内存、内存映射文件等应用场景非常有用。

思路概述

sys_mmap（内存映射）
- 根据传入的参数（文件描述符、大小、权限等），在进程的地址空间中找到合适的虚拟地址区间。
- 为映射区分配物理内存页，并将文件的内容加载到这些物理内存中。
- 在进程的虚拟地址空间中创建相应的映射区域（VMA，即虚拟内存区域），记录该映射的相关信息，包括起始地址、大小、权限等。
- 返回映射区域的起始地址，供用户进程访问该文件内容。
sys_munmap（取消内存映射）
- 找到要解除映射的虚拟地址区间。
- 将该区间内的内存页面释放，解除对文件的映射。
- 更新 VMA 列表，移除该映射记录。

思路概述

sys_mmap（内存映射）
- 根据传入的参数（文件描述符、大小、权限等），在进程的地址空间中找到合适的虚拟地址区间。
- 为映射区分配物理内存页，并将文件的内容加载到这些物理内存中。
- 在进程的虚拟地址空间中创建相应的映射区域（VMA，即虚拟内存区域），记录该映射的相关信息，包括起始地址、大小、权限等。
- 返回映射区域的起始地址，供用户进程访问该文件内容。
sys_munmap（取消内存映射）
- 找到要解除映射的虚拟地址区间。
- 将该区间内的内存页面释放，解除对文件的映射。
- 更新 VMA 列表，移除该映射记录。

代码示例

以下是实现 sys_mmap 和 sys_munmap 的基本代码框架及思路。

uint64
sys_mmap(void)
{
  struct file *f;
  uint64 addr;
  int length, prot, flags, fd;
  struct proc *p = myproc();

  // 获取 mmap 的参数
  if(argaddr(0, &addr) < 0 || argint(1, &length) < 0 || argint(2, &prot) < 0 || argint(3, &flags) < 0 || argfd(4, &fd, &f) < 0)
    return -1;

  if(length <= 0)
    return -1;

  // 找到可用的虚拟内存区域 VMA
  struct vma *vma = 0;
  for(int i = 0; i < VMASIZE; i++) {
    if(p->vma[i].used == 0) {
      vma = &p->vma[i];
      break;
    }
  }
  if(vma == 0)
    return -1; // 没有空闲 VMA

  // 设置 VMA 信息
  vma->addr = addr;
  vma->len = length;
  vma->prot = prot;
  vma->flags = flags;
  vma->f = f;
  vma->used = 1;

  // 为映射区域分配物理内存并建立页表映射
  for(int i = 0; i < length; i += PGSIZE) {
    uint64 va = addr + i;
    char *mem = kalloc();
    if(mem == 0) {
      // 如果分配失败，释放已经分配的页
      for(int j = 0; j < i; j += PGSIZE) {
        va = addr + j;
        uvmunmap(p->pagetable, va, 1, 1);
      }
      vma->used = 0;
      return -1;
    }
    memset(mem, 0, PGSIZE);

    // 将文件内容映射到内存
    if(readi(f->ip, 0, (uint64)mem, i, PGSIZE) < 0) {
      kfree(mem);
      return -1;
    }

    int perm = PTE_U;
    if(prot & PROT_READ) perm |= PTE_R;
    if(prot & PROT_WRITE) perm |= PTE_W;
    if(prot & PROT_EXEC) perm |= PTE_X;

    if(mappages(p->pagetable, va, PGSIZE, (uint64)mem, perm) != 0) {
      kfree(mem);
      return -1;
    }
  }
  
  filedup(f); // 增加文件的引用计数，保持文件句柄有效
  return addr;
}

`sys_mmap` 的解释

参数检查：首先从用户传入的参数中提取目标地址 addr、映射长度 length、权限 prot、标志 flags 和文件描述符 fd。
寻找空闲 VMA：在当前进程的 VMA 列表中寻找一个空闲的 VMA 条目，用于记录该映射的详细信息。
设置 VMA 信息：将该 VMA 的起始地址、长度、权限等信息记录下来，以便以后对该映射区进行管理。
内存分配与映射：对于映射区域的每一页：
- 使用 kalloc 分配物理内存页。
- 将文件内容加载到内存。
- 使用 mappages 将虚拟地址映射到物理地址，并设置合适的访问权限。
文件引用计数：增加文件的引用计数，以确保文件在映射期间不会被关闭。

uint64
sys_munmap(void)
{
  uint64 addr;
  int length;
  struct proc *p = myproc();

  if(argaddr(0, &addr) < 0 || argint(1, &length) < 0)
    return -1;

  struct vma *vma = 0;
  for(int i = 0; i < VMASIZE; i++) {
    if(p->vma[i].used && p->vma[i].addr == addr) {
      vma = &p->vma[i];
      break;
    }
  }
  if(vma == 0)
    return -1; // 找不到对应的 VMA

  // 遍历并释放该区域的物理内存页
  for(int i = 0; i < length; i += PGSIZE) {
    uint64 va = addr + i;
    if(va >= vma->addr && va < vma->addr + vma->len) {
      // 写回文件内容（如果是 MAP_SHARED）
      if(vma->flags & MAP_SHARED && (vma->prot & PROT_WRITE)) {
        struct file *f = vma->f;
        if(f && filewrite(f, va, PGSIZE) != PGSIZE) {
          return -1;
        }
      }
      uvmunmap(p->pagetable, va, 1, 1);
    }
  }

  vma->used = 0; // 标记 VMA 为未使用
  fileclose(vma->f); // 减少文件引用计数
  return 0;
}

`sys_munmap` 的解释

参数检查：从用户传入的参数中获取起始地址 addr 和映射长度 length。
查找对应 VMA：在 VMA 列表中找到与 addr 对应的 VMA。
释放物理内存页：
- 对于每一页，检查是否需要将内容写回文件（如果是 MAP_SHARED 且具有写权限）。
- 使用 uvmunmap 解除该页的映射，并释放物理内存。
更新 VMA 和文件引用计数：将 VMA 标记为未使用，并减少文件的引用计数。

总结

以上实现的 sys_mmap 和 sys_munmap 涉及了一些基本步骤，如查找 VMA、设置内存映射、加载文件内容、释放内存等。在实际实现中，可能需要根据具体的需求添加更多的错误处理和检查。

// TODO: complete struct of VMA
struct vma {
  uint64 addr;//starting address
  int len;//length
  int prot;// protection level
  int flags;//share or private or so on
  int fd;//file descriptor
  int offset;
  struct file *f;//file
};

在这个 vma 结构体中，我们看到一个关键的差异：len 字段的类型不同。你使用了 int，而我建议使用 uint64。这两者的选择各有优缺点，具体分析如下：

1. `int` vs `uint64` 的区别

int：通常为 32 位（在大多数 64 位架构上），因此它能表示的最大正数是 2^31 - 1（大约 2GB）。
uint64：为 64 位无符号整数，能表示的最大值是 2^64 - 1（大约 16EB），远远超过 int 的范围。

使用 `int len` 的情况

使用 int 定义 len 有以下优点和缺点：

优点：
- 更节省内存，特别是在 32 位架构下，int 占 4 字节，而 uint64 占 8 字节。
- 在部分系统调用中，使用 int 更符合参数格式，特别是对于较小的映射范围。
缺点：
- 当映射区域的长度超过 2GB 时，int 的值会溢出，不足以表示更大的内存范围。
- 如果未来扩展系统并支持更大内存空间，可能会需要修改成更大的数据类型。