今天繼續我們的內核研究,即然我們選擇了2.6.26內核做為研究對象的版本,所以請朋友們還是手中有這個(gè)源代碼比較好,有的地方可能需要朋友們自己去研讀,特別是有一定基礎的朋友可以更寬泛一些的閱讀,如果你是新手就請跟著(zhù)我的步驟進(jìn)行,必竟主線(xiàn)我們是牢牢抓住不放的,好了,接著(zhù)上一節我們談到的應用程序,里面有這樣一句代碼: | shmat(shmid, (void *)0, 0); | 這個(gè)函數就是將我們前邊建立的共享內存映射到了本進(jìn)程,我們跟著(zhù)進(jìn)入內核看一下,首先還是在sys_ipc()函數處 | case SHMAT: switch (version) { default: { ulong raddr; ret = do_shmat (first, (char __user *) ptr, second, &raddr); if (ret) return ret; return put_user (raddr, (ulong __user *) third); } case 1: /* iBCS2 emulator entry point */ if (!segment_eq(get_fs(), get_ds())) return -EINVAL; /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */ return do_shmat (first, (char __user *) ptr, second, (ulong *) third); } | 看起很復雜重要的函數只有一個(gè)那就是do_shmat(),它在ipc/shm.c中的816行處,我們看一下它,函數比較長(cháng),我們分段看它 | long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) { struct shmid_kernel *shp; unsigned long addr; unsigned long size; struct file * file; int err; unsigned long flags; unsigned long prot; int acc_mode; unsigned long user_addr; struct ipc_namespace *ns; struct shm_file_data *sfd; struct path path; mode_t f_mode;
err = -EINVAL; if (shmid < 0) goto out; else if ((addr = (ulong)shmaddr)) { if (addr & (SHMLBA-1)) { if (shmflg & SHM_RND) addr &= ~(SHMLBA-1); /* round down */ else #ifndef __ARCH_FORCE_SHMLBA if (addr & ~PAGE_MASK) #endif goto out; } flags = MAP_SHARED | MAP_FIXED; } else { if ((shmflg & SHM_REMAP)) goto out;
flags = MAP_SHARED; }
if (shmflg & SHM_RDONLY) { prot = PROT_READ; acc_mode = S_IRUGO; f_mode = FMODE_READ; } else { prot = PROT_READ | PROT_WRITE; acc_mode = S_IRUGO | S_IWUGO; f_mode = FMODE_READ | FMODE_WRITE; } if (shmflg & SHM_EXEC) { prot |= PROT_EXEC; acc_mode |= S_IXUGO; } | 我們看到參數中第一個(gè)參數是上一節中講到已經(jīng)建立的共享內存的id標識號,第二、三個(gè)參數在應用程序分別傳過(guò)一個(gè)0指針和0,第四個(gè)參數就是用于返回到sys_ipc()函數用的一個(gè)指針。函數中首先是對第二個(gè)參數也就是共享內存的映射地址進(jìn)行檢查,看是否能被 SHMLBA整除,即是否按頁(yè)的大小對齊。如果不能被整除這里就會(huì )進(jìn)行對齊處理,我們看到應用程序傳遞過(guò)來(lái)的參數是0指針,這里就會(huì )由內核分配一個(gè)地址,此后就是對標志的一些處理。我們接著(zhù)往下看 | /* * We cannot rely on the fs check since SYSV IPC does have an * additional creator id... */ ns = current->nsproxy->ipc_ns; shp = shm_lock_check(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); goto out; }
err = -EACCES; if (ipcperms(&shp->shm_perm, acc_mode)) goto out_unlock;
err = security_shm_shmat(shp, shmaddr, shmflg); if (err) goto out_unlock;
path.dentry = dget(shp->shm_file->f_path.dentry); path.mnt = shp->shm_file->f_path.mnt; shp->shm_nattch++; size = i_size_read(path.dentry->d_inode); shm_unlock(shp);
| 這段代碼中,通過(guò)shm_lock_check()函數找到struct shmid_kernel 結構,并賦值給這里的局部結構變量shp,我們看到shm_lock_check()函數是與ipc機制相關(guān)的,這里我們看一下 | static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, int id) { struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
if (IS_ERR(ipcp)) return (struct shmid_kernel *)ipcp;
return container_of(ipcp, struct shmid_kernel, shm_perm); } | 函數內部與消息隊列的函數 msg_lock_check()一模一樣,無(wú)非找到我們已經(jīng)建立的共享內存并上鎖。我們已經(jīng)在消除隊列中講過(guò) msg_lock_check()函數,所以這里不跟進(jìn)了。此后我們進(jìn)行了一些權限的檢查,這些檢查與文件系統的權限檢查類(lèi)似,我們以后在看文件系統的時(shí)候就會(huì )講到,下面是關(guān)鍵的部分了 | err = -ENOMEM; sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); if (!sfd) goto out_put_dentry;
file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations); if (!file) goto out_free;
file->private_data = sfd; file->f_mapping = shp->shm_file->f_mapping; sfd->id = shp->shm_perm.id; sfd->ns = get_ipc_ns(ns); sfd->file = shp->shm_file; sfd->vm_ops = NULL;
down_write(¤t->mm->mmap_sem); if (addr && !(shmflg & SHM_REMAP)) { err = -EINVAL; if (find_vma_intersection(current->mm, addr, addr + size)) goto invalid; /* * If shm segment goes below stack, make sure there is some * space left for the stack to grow (at least 4 pages). */ if (addr < current->mm->start_stack && addr > current->mm->start_stack - size - PAGE_SIZE * 5) goto invalid; } user_addr = do_mmap (file, addr, size, prot, flags, 0); *raddr = user_addr; err = 0; if (IS_ERR_VALUE(user_addr)) err = (long)user_addr; invalid: up_write(¤t->mm->mmap_sem);
fput(file);
out_nattch: down_write(&shm_ids(ns).rw_mutex); shp = shm_lock_down(ns, shmid); BUG_ON(IS_ERR(shp)); shp->shm_nattch--; if(shp->shm_nattch == 0 && shp->shm_perm.mode & SHM_DEST) shm_destroy(ns, shp); else shm_unlock(shp); up_write(&shm_ids(ns).rw_mutex);
out: return err;
out_unlock: shm_unlock(shp); goto out;
out_free: kfree(sfd); out_put_dentry: dput(path.dentry); goto out_nattch; } | 看似上面的過(guò)程很復雜其實(shí)實(shí)質(zhì)作用的函數只有一個(gè)do_mmap()函數,這個(gè)函數與內存管理的映射相關(guān),我們先預先看一下為好,也好了解一下內存映射的過(guò)程這個(gè)函數在include/linux/mm.h的1098行,函數非常短 | static inline unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { unsigned long ret = -EINVAL; if ((offset + PAGE_ALIGN(len)) < offset) goto out; if (!(offset & ~PAGE_MASK)) ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); out: return ret; } | 我們看到他轉交給了do_mmap_pgoff()函數,它在mm/mmap.c的908行處,這個(gè)函數也很長(cháng),我們將在內存管理的分析中詳細論述,這里只看其關(guān)鍵的部分,也就是我們應用程序要去的地方 | unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff) { 。。。。。。 addr = get_unmapped_area(file, addr, len, pgoff, flags); | 我們的映射地址是0,所以?xún)群藭?huì )get_unmapped_area自動(dòng)分配一個(gè)地址,這個(gè)函數我們不跟進(jìn)了,它里面直接相關(guān)內存管理的細節,我們放一放,在do_mmap_pgoff函數中會(huì )再進(jìn)一步調用 | return mmap_region(file, addr, len, flags, vm_flags, pgoff, accountable); | 這是關(guān)鍵的映射地方,我們跟進(jìn)看一看 | unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, unsigned int vm_flags, unsigned long pgoff, int accountable) { 我們只看關(guān)鍵的部分。。。。。。 if (file) { error = -EINVAL; if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) goto free_vma; if (vm_flags & VM_DENYWRITE) { error = deny_write_access(file); if (error) goto free_vma; correct_wcount = 1; } vma->vm_file = file; get_file(file); error = file->f_op->mmap(file, vma); if (error) goto unmap_and_free_vma; if (vm_flags & VM_EXECUTABLE) added_exe_file_vma(mm); } else if (vm_flags & VM_SHARED) { error = shmem_zero_setup(vma); if (error) goto free_vma; } | 實(shí)際最起作用的就只有上面紅色的部分代碼,這里的file也就是我們上一節講到創(chuàng )建共享內存中在共享內存的文件系統中創(chuàng )建的那個(gè)文件指針,這里就取得了共享內存文件系統file指針后實(shí)際上就通過(guò)文件系統的file_operations鉤子函數,為什么叫鉤子函數,其實(shí)這些函數就是一些指針即地址,我們之所以稱(chēng)之為鉤子就是因為經(jīng)常要有東西往上掛,即把自己的地址放在file_operations中,這些地址是函數的地址,當代碼訪(fǎng)問(wèn)到file_operations中的這些地址時(shí)就自動(dòng)跳到我們掛到鉤子上的函數中去了。更詳細的要等到我們論述文件系統時(shí)了,我們先知道有這么一個(gè)鉤子函數就成了,這里會(huì )執行到哪里呢,肯定是會(huì )執行到共享內存文件系統中的鉤子函數中,我們看看他到了哪里,我們看到在ipc/shm.c的322行處有共享內存這個(gè)鉤子函數的定義 | static const struct file_operations shmem_file_operations = { .mmap = shmem_mmap, #ifdef CONFIG_TMPFS .llseek = generic_file_llseek, .read = shmem_file_read, .write = do_sync_write, .aio_write = generic_file_aio_write, .fsync = simple_sync_file, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, #endif };
| 很明顯他會(huì )跳轉到shmem_mmap()函數去執行 | static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &shmem_vm_ops; vma->vm_flags |= VM_CAN_NONLINEAR; return 0; }
| 重要的關(guān)鍵點(diǎn)是 vma->vm_ops = &shmem_vm_ops;這里出現了另一個(gè)內存管理中的虛擬空間的鉤子函數 | static struct vm_operations_struct shmem_vm_ops = { .fault = shmem_fault, #ifdef CONFIG_NUMA .set_policy = shmem_set_policy, .get_policy = shmem_get_policy, #endif }; | 到這里我們可以看到實(shí)際上就已經(jīng)映射完成了,可能朋友們會(huì )說(shuō)怎么完成的,沒(méi)看明白,要知道內核是一個(gè)環(huán)環(huán)相扣的架構,這里其實(shí)交給了內存管理的去處理了,那一部分我們以后會(huì )進(jìn)一步論述和分析,在共享內存的映射這里只是建立了內存管理中的必要的映射機制,也就是把鉤子函數與這里的共享內存掛上鉤。使其在執行到關(guān)于共享內存的映射部分時(shí)自動(dòng)到這里提供的機制中去進(jìn)一步分配和處理,我們只能粗略的這樣簡(jiǎn)介一下,真正發(fā)生映射發(fā)生時(shí)是在“缺頁(yè)異常”,什么是缺頁(yè)異常,我們說(shuō)過(guò)內存是頁(yè)為單位管理的,缺頁(yè)就是指未建立管理的單位內存,我們以后論述這個(gè)名詞,現在是假設內存管理中要映射并且走到了缺頁(yè)異常處理程序中,會(huì )自動(dòng)跳轉到上面的鉤子函數.fault = shmem_fault,根據這里跳轉到shmem_fault()函數 | static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; int error; int ret; if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) return VM_FAULT_SIGBUS; error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); if (error) return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); mark_page_accessed(vmf->page); return ret | VM_FAULT_LOCKED; } | 我們看到他進(jìn)一步從shmem_getpage中查找或者交換頁(yè),那是內存管理部分的內容了我們以后會(huì )講述,我們暫且講到這里,至些我們明白了共享內存的映射實(shí)際上最后是與內存管理密切相關(guān)的。有興趣朋友可以自己順著(zhù)上面的函數看一下與內存管理的交換、分配機制。 | |