在Linux中,struct vm_area_struct表示的虚拟地址是给进程使用的,在虚拟地址中,给定一个地址,试图找到结束地址高于addr的第一个区间
经常用到。
find_vma()它扫描当前进程地址空间的vm_area_struct结构所形成的红黑树,试图找到结束地址高于addr的第一个区间;
1578 /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
1579 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1580 {
1581 struct vm_area_struct *vma = NULL;
1582
1583 if (mm) {
1584 /* Check the cache first. */
1585 /* (Cache hit rate is typically around 35%.) */
1586 vma = mm->mmap_cache;
1587 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1588 struct rb_node * rb_node;
1589
1590 rb_node = mm->mm_rb.rb_node;
1591 vma = NULL;
1592
1593 while (rb_node) {
1594 struct vm_area_struct * vma_tmp;
1595
1596 vma_tmp = rb_entry(rb_node,
1597 struct vm_area_struct, vm_rb);
1598
1599 if (vma_tmp->vm_end > addr) {
1600 vma = vma_tmp;
1601 if (vma_tmp->vm_start <= addr)
1602 break;
1603 rb_node = rb_node->rb_left;
1604 } else
1605 rb_node = rb_node->rb_right;
1606 }
1607 if (vma)
1608 mm->mmap_cache = vma;
1609 }
1610 }
1611 return vma;
1612 }
代码通过首先查看mm中缓存的vma是否满足要求,如果满足,则返回。正如注释所说,有35%的命中率。
如果缓存的vma不满足要求,则通过遍历红黑树来查找。
rb_entry跟list_entry功能一样。要注意的是这一句:
vma_tmp->vm_end > addr
因为vm_area_struct的vm_start是包函在此vm_area_struct中的,而vm_end则不包函在些vm_area_struct。
函数find_vma_prepare()与find_vma()基本相同,它扫描当前进程地址空间的vm_area_struct结构所形成的红黑树,试图找到结束地址高于addr的第一个区间:
350 static struct vm_area_struct *
351 find_vma_prepare(struct mm_struct *mm, unsigned long addr,
352 struct vm_area_struct **pprev, struct rb_node ***rb_link,
353 struct rb_node ** rb_parent)
354 {
355 struct vm_area_struct * vma;
356 struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
357
358 __rb_link = &mm->mm_rb.rb_node;
359 rb_prev = __rb_parent = NULL;
360 vma = NULL;
361
362 while (*__rb_link) {
363 struct vm_area_struct *vma_tmp;
364
365 __rb_parent = *__rb_link;
366 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
367
368 if (vma_tmp->vm_end > addr) {
369 vma = vma_tmp;
370 if (vma_tmp->vm_start <= addr)
371 break;
372 __rb_link = &__rb_parent->rb_left;
373 } else {
374 rb_prev = __rb_parent;
375 __rb_link = &__rb_parent->rb_right;
376 }
377 }
378
379 *pprev = NULL;
380 if (rb_prev)
381 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
382 *rb_link = __rb_link;
383 *rb_parent = __rb_parent;
384 return vma;
385 }
还有一个与此相关的函数是insert_vm_struct(),这是把vma插入到mm的VMA链表和mm的红黑树中,如果映射了文件,也进行了与文件相关的操作。
insert_vm_struct():
2283 int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2284 {
2285 struct vm_area_struct * __vma, * prev;
2286 struct rb_node ** rb_link, * rb_parent;
2287
2288 /*
2289 * The vm_pgoff of a purely anonymous vma should be irrelevant
2290 * until its first write fault, when page's anon_vma and index
2291 * are set. But now set the vm_pgoff it will almost certainly
2292 * end up with (unless mremap moves it elsewhere before that
2293 * first wfault), so /proc/pid/maps tells a consistent story.
2294 *
2295 * By setting it to reflect the virtual start address of the
2296 * vma, merges and splits can happen in a seamless way, just
2297 * using the existing file pgoff checks and manipulations.
2298 * Similarly in do_mmap_pgoff and in do_brk.
2299 */
2300 if (!vma->vm_file) {
2301 BUG_ON(vma->anon_vma);
2302 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2303 }
2304 __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
2305 if (__vma && __vma->vm_start < vma->vm_end)
2306 return -ENOMEM;
2307 if ((vma->vm_flags & VM_ACCOUNT) &&
2308 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2309 return -ENOMEM;
2310 vma_link(mm, vma, prev, rb_link, rb_parent);
2311 return 0;
2312 }
而struct vm_struct表示的虚拟地址是给内核使用的,它们对应的物理页面都可以是不连续的。
struct vm_struct表示的地址空间范围为什么不是3G~4G呢?
原来,3G ~ (3G + 896M)范围的地址是用来映射连续的物理页面的,
这个范围的虚拟地址和对应的实际物理地址有着简单的对应关系,
即对应0~896M的物理地址空间,而(3G + 896M) ~ (3G + 896M + 8M)是安全保护区域(例如,所有指向这8M地址空间的指针都是非法的),
因此struct vm_struct使用(3G + 896M + 8M) ~ 4G地址空间来映射非连续的物理页面。