• mm/memory


    /*
     *  linux/mm/memory.c
     *
     *  Copyright (C) 1991, 1992  Linus Torvalds
     */

    /*
     * demand-loading started 01.12.91 - seems it is high on the list of
     * things wanted, and it should be easy to implement. - Linus
     */

    /*
     * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
     * pages started 02.12.91, seems to work. - Linus.
     *
     * Tested sharing by executing about 30 /bin/sh: under the old kernel it
     * would have taken more than the 6M I have free, but it worked well as
     * far as I could see.
     *
     * Also corrected some "invalidate()"s - I wasn't doing enough of them.
     */

    /*
     * Real VM (paging to/from disk) started 18.12.91. Much more work and
     * thought has to go into this. Oh, well..
     * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
     *        Found it. Everything seems to work now.
     * 20.12.91  -  Ok, making the swap-device changeable like the root.
     */

    #include <asm/system.h>
    #include <linux/config.h>

    #include <linux/signal.h>
    #include <linux/sched.h>
    #include <linux/head.h>
    #include <linux/kernel.h>
    #include <linux/errno.h>
    #include <linux/string.h>
    #include <linux/types.h>
    #include <linux/ptrace.h>
    #include <linux/mman.h>

    //内存高地址
    unsigned long high_memory = 0;

    //0页表
    extern unsigned long pg0[1024];        /* page table for 0-4MB for everybody */

    //声卡内存初始化
    extern void sound_mem_init(void);
    //内核错误
    extern void die_if_kernel(char *,struct pt_regs *,long);

    //交换页数量
    int nr_swap_pages = 0;
    //空闲页数量
    int nr_free_pages = 0;
    //空闲链表
    unsigned long free_page_list = 0;
    /*
     * The secondary free_page_list is used for malloc() etc things that
     * may need pages during interrupts etc. Normal get_free_page() operations
     * don't touch it, so it stays as a kind of "panic-list", that can be
     * accessed when all other mm tricks have failed.
     */
     
    int nr_secondary_pages = 0;
    unsigned long secondary_page_list = 0;

    //拷贝页面
    #define copy_page(from,to)
    __asm__("cld ; rep ; movsl": :"S" (from),"D" (to),"c" (1024):"cx","di","si")

    unsigned short * mem_map = NULL;

    //代码段空间
    #define CODE_SPACE(addr,p) ((addr) < (p)->end_code)

    /*
     * oom() prints a message (so that the user knows why the process died),
     * and gives the process an untrappable SIGSEGV.
     */
     //输出内存越界消息,并发送信号,关闭进程----------------------------------------------ok
    void oom(struct task_struct * task)
    {
        printk(" out of memory ");
        task->sigaction[SIGKILL-1].sa_handler = NULL;
        task->blocked &= ~(1<<(SIGKILL-1));
        send_sig(SIGKILL,task,1);
    }

    //释放一个目录项对应的页表------------------------------------------------------------ok
    static void free_one_table(unsigned long * page_dir)
    {
        int j;
        //页目录中指定项,这里操作会发生拷贝,实际上就是页表的地址
        unsigned long pg_table = *page_dir;
        unsigned long * page_table;

        if (!pg_table)
            return;
        *page_dir = 0;//页目录中指定项清零
        if (pg_table >= high_memory || !(pg_table & PAGE_PRESENT)) {
            printk("Bad page table: [%p]=%08lx ",page_dir,pg_table);
            return;
        }
        //页表所在的内存页,校验是否保留
        if (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
            return;
        //页表地址
        page_table = (unsigned long *) (pg_table & PAGE_MASK);
        //遍历页表中的每一项,进行释放
        for (j = 0 ; j < PTRS_PER_PAGE ; j++,page_table++) {
            //从页表中取出内存页
            unsigned long pg = *page_table;
            
            //如果页不存在,跳出
            if (!pg)
                continue;
            //归还页表中的页项
            *page_table = 0;
            //如果也存在,释放内存页,否则释放交换页
            if (pg & PAGE_PRESENT)
                free_page(PAGE_MASK & pg);
            else
                swap_free(pg);
        }
        //释放页表所在页面
        free_page(PAGE_MASK & pg_table);
    }

    /*
     * This function clears all user-level page tables of a process - this
     * is needed by execve(), so that old pages aren't in the way. Note that
     * unlike 'free_page_tables()', this function still leaves a valid
     * page-table-tree in memory: it just removes the user pages. The two
     * functions are similar, but there is a fundamental difference.
     */
     //清空页表---清空进程所占的用户空间页表----------------------------------------------ok
    void clear_page_tables(struct task_struct * tsk)
    {
        int i;
        unsigned long pg_dir;
        unsigned long * page_dir;

        if (!tsk)
            return;
        if (tsk == task[0])
            panic("task[0] (swapper) doesn't support exec() ");
        //获取到页表目录地址
        pg_dir = tsk->tss.cr3;
        //页表目录中第一项
        page_dir = (unsigned long *) pg_dir;
        //页表目录不正确
        if (!page_dir || page_dir == swapper_pg_dir) {
            printk("Trying to clear kernel page-directory: not good ");
            return;
        }
        //页表目录所在内存页,被多个进程所共享
        if (mem_map[MAP_NR(pg_dir)] > 1) {
            unsigned long * new_pg;
            
            //申请一页内存
            if (!(new_pg = (unsigned long*) get_free_page(GFP_KERNEL))) {
                oom(tsk);
                return;
            }
            //遍历后面的256项,循环拷贝
            for (i = 768 ; i < 1024 ; i++)
                new_pg[i] = page_dir[i];
            //释放页目录表
            free_page(pg_dir);
            //重新指向页目录表
            tsk->tss.cr3 = (unsigned long) new_pg;
            return;
        }
        //遍历释放虚拟低地址空间的0~3G,768项,非共享
        for (i = 0 ; i < 768 ; i++,page_dir++)
            free_one_table(page_dir);
        invalidate();
        return;
    }

    /*
     * This function frees up all page tables of a process when it exits.
     */
     //释放页表---进程退出,释放所有页表--------------------------------------------------ok
    void free_page_tables(struct task_struct * tsk)
    {
        int i;
        unsigned long pg_dir;
        unsigned long * page_dir;

        if (!tsk)
            return;
        if (tsk == task[0]) {
            printk("task[0] (swapper) killed: unable to recover ");
            panic("Trying to free up swapper memory space");
        }
        //页表目录地址
        pg_dir = tsk->tss.cr3;
        if (!pg_dir || pg_dir == (unsigned long) swapper_pg_dir) {
            printk("Trying to free kernel page-directory: not good ");
            return;
        }
        //交换页目录地址
        tsk->tss.cr3 = (unsigned long) swapper_pg_dir;
        //当前任务
        if (tsk == current)
            __asm__ __volatile__("movl %0,%%cr3": :"a" (tsk->tss.cr3));
        //共享则调用free_page处理
        if (mem_map[MAP_NR(pg_dir)] > 1) {
            free_page(pg_dir);
            return;
        }
        //非共享,循环遍历释放页表
        page_dir = (unsigned long *) pg_dir;
        for (i = 0 ; i < PTRS_PER_PAGE ; i++,page_dir++)
            free_one_table(page_dir);
        free_page(pg_dir);
        invalidate();
    }

    /*
     * clone_page_tables() clones the page table for a process - both
     * processes will have the exact same pages in memory. There are
     * probably races in the memory management with cloning, but we'll
     * see..
     */
     //为进程克隆页目录-------------------------------------------------------------------ok
    int clone_page_tables(struct task_struct * tsk)
    {
        unsigned long pg_dir;

        pg_dir = current->tss.cr3;
        mem_map[MAP_NR(pg_dir)]++;
        tsk->tss.cr3 = pg_dir;
        return 0;
    }

    /*
     * copy_page_tables() just copies the whole process memory range:
     * note the special handling of RESERVED (ie kernel) pages, which
     * means that they are always shared by all processes.
     */
     //拷贝页表,首先克隆页表目录,然后克隆页表,最后克隆页--------------------------------ok
    int copy_page_tables(struct task_struct * tsk)
    {
        int i;
        unsigned long old_pg_dir, *old_page_dir;//旧的页表目录
        unsigned long new_pg_dir, *new_page_dir;//新的页表目录

        if (!(new_pg_dir = get_free_page(GFP_KERNEL))) //为新的页表目录申请一页内存
            return -ENOMEM;
        //获取旧的页表目录地址
        old_pg_dir = current->tss.cr3;
        tsk->tss.cr3 = new_pg_dir;
        old_page_dir = (unsigned long *) old_pg_dir;
        new_page_dir = (unsigned long *) new_pg_dir;
        for (i = 0 ; i < PTRS_PER_PAGE ; i++,old_page_dir++,new_page_dir++) {
            int j;
            unsigned long old_pg_table, *old_page_table;
            unsigned long new_pg_table, *new_page_table;

            old_pg_table = *old_page_dir;
            if (!old_pg_table)
                continue;
            if (old_pg_table >= high_memory || !(old_pg_table & PAGE_PRESENT)) {
                printk("copy_page_tables: bad page table: "
                    "probable memory corruption");
                *old_page_dir = 0;
                continue;
            }
            if (mem_map[MAP_NR(old_pg_table)] & MAP_PAGE_RESERVED) {
                *new_page_dir = old_pg_table;
                continue;
            }
            if (!(new_pg_table = get_free_page(GFP_KERNEL))) {
                free_page_tables(tsk);
                return -ENOMEM;
            }
            old_page_table = (unsigned long *) (PAGE_MASK & old_pg_table);
            new_page_table = (unsigned long *) (PAGE_MASK & new_pg_table);
            for (j = 0 ; j < PTRS_PER_PAGE ; j++,old_page_table++,new_page_table++) {
                unsigned long pg;
                pg = *old_page_table;
                if (!pg)
                    continue;
                if (!(pg & PAGE_PRESENT)) {
                    *new_page_table = swap_duplicate(pg);
                    continue;
                }
                if ((pg & (PAGE_RW | PAGE_COW)) == (PAGE_RW | PAGE_COW))
                    pg &= ~PAGE_RW;
                *new_page_table = pg;
                if (mem_map[MAP_NR(pg)] & MAP_PAGE_RESERVED)
                    continue;
                *old_page_table = pg;
                mem_map[MAP_NR(pg)]++;
            }
            *new_page_dir = new_pg_table | PAGE_TABLE;
        }
        invalidate();
        return 0;
    }

    /*
     * a more complete version of free_page_tables which performs with page
     * granularity.
     */
     //释放一定范围的页表
    int unmap_page_range(unsigned long from, unsigned long size)
    {
        unsigned long page, page_dir;
        unsigned long *page_table, *dir;
        unsigned long poff, pcnt, pc;

        if (from & ~PAGE_MASK) {
            printk("unmap_page_range called with wrong alignment ");
            return -EINVAL;
        }
        //计算size有多少页
        size = (size + ~PAGE_MASK) >> PAGE_SHIFT;
        //计算开始地址所在的页目录项的位置
        dir = PAGE_DIR_OFFSET(current->tss.cr3,from);
        //计算偏移
        poff = (from >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
        //计算页数
        if ((pcnt = PTRS_PER_PAGE - poff) > size)
            pcnt = size;

        //
        for ( ; size > 0; ++dir, size -= pcnt,
             pcnt = (size > PTRS_PER_PAGE ? PTRS_PER_PAGE : size)) {
            if (!(page_dir = *dir))    {
                poff = 0;
                continue;
            }
            if (!(page_dir & PAGE_PRESENT)) {
                printk("unmap_page_range: bad page directory.");
                continue;
            }
            //找到页表
            page_table = (unsigned long *)(PAGE_MASK & page_dir);
            if (poff) {
                page_table += poff;
                poff = 0;
            }
            //遍历释放
            for (pc = pcnt; pc--; page_table++) {
                if ((page = *page_table) != 0) {
                    *page_table = 0;
                    if (1 & page) {
                        if (!(mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED))
                            if (current->rss > 0)
                                --current->rss;
                        free_page(PAGE_MASK & page);
                    } else
                        swap_free(page);
                }
            }
            if (pcnt == PTRS_PER_PAGE) {
                *dir = 0;
                free_page(PAGE_MASK & page_dir);
            }
        }
        invalidate();
        return 0;
    }
    //映射
    int zeromap_page_range(unsigned long from, unsigned long size, int mask)
    {
        unsigned long *page_table, *dir;
        unsigned long poff, pcnt;
        unsigned long page;

        if (mask) {
            if ((mask & (PAGE_MASK|PAGE_PRESENT)) != PAGE_PRESENT) {
                printk("zeromap_page_range: mask = %08x ",mask);
                return -EINVAL;
            }
            mask |= ZERO_PAGE;
        }
        if (from & ~PAGE_MASK) {
            printk("zeromap_page_range: from = %08lx ",from);
            return -EINVAL;
        }
        dir = PAGE_DIR_OFFSET(current->tss.cr3,from);
        size = (size + ~PAGE_MASK) >> PAGE_SHIFT;
        poff = (from >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
        if ((pcnt = PTRS_PER_PAGE - poff) > size)
            pcnt = size;

        while (size > 0) {
            if (!(PAGE_PRESENT & *dir)) {
                    /* clear page needed here?  SRB. */
                if (!(page_table = (unsigned long*) get_free_page(GFP_KERNEL))) {
                    invalidate();
                    return -ENOMEM;
                }
                if (PAGE_PRESENT & *dir) {
                    free_page((unsigned long) page_table);
                    page_table = (unsigned long *)(PAGE_MASK & *dir++);
                } else
                    *dir++ = ((unsigned long) page_table) | PAGE_TABLE;
            } else
                page_table = (unsigned long *)(PAGE_MASK & *dir++);
            page_table += poff;
            poff = 0;
            for (size -= pcnt; pcnt-- ;) {
                if ((page = *page_table) != 0) {
                    *page_table = 0;
                    if (page & PAGE_PRESENT) {
                        if (!(mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED))
                            if (current->rss > 0)
                                --current->rss;
                        free_page(PAGE_MASK & page);
                    } else
                        swap_free(page);
                }
                *page_table++ = mask;
            }
            pcnt = (size > PTRS_PER_PAGE ? PTRS_PER_PAGE : size);
        }
        invalidate();
        return 0;
    }

    /*
     * maps a range of physical memory into the requested pages. the old
     * mappings are removed. any references to nonexistent pages results
     * in null mappings (currently treated as "copy-on-access")
     */
     //映射一段物理内存到请求页,旧的映射被移除,任何引用到不存在的页的源于
     //空映射
     //重新映射
    int remap_page_range(unsigned long from, unsigned long to, unsigned long size, int mask)
    {
        unsigned long *page_table, *dir;
        unsigned long poff, pcnt;
        unsigned long page;

        if (mask) {
            if ((mask & (PAGE_MASK|PAGE_PRESENT)) != PAGE_PRESENT) {
                printk("remap_page_range: mask = %08x ",mask);
                return -EINVAL;
            }
        }
        if ((from & ~PAGE_MASK) || (to & ~PAGE_MASK)) {
            printk("remap_page_range: from = %08lx, to=%08lx ",from,to);
            return -EINVAL;
        }
        dir = PAGE_DIR_OFFSET(current->tss.cr3,from);
        size = (size + ~PAGE_MASK) >> PAGE_SHIFT;
        poff = (from >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
        if ((pcnt = PTRS_PER_PAGE - poff) > size)
            pcnt = size;

        while (size > 0) {
            if (!(PAGE_PRESENT & *dir)) {
                /* clearing page here, needed?  SRB. */
                if (!(page_table = (unsigned long*) get_free_page(GFP_KERNEL))) {
                    invalidate();
                    return -1;
                }
                *dir++ = ((unsigned long) page_table) | PAGE_TABLE;
            }
            else
                page_table = (unsigned long *)(PAGE_MASK & *dir++);
            if (poff) {
                page_table += poff;
                poff = 0;
            }

            for (size -= pcnt; pcnt-- ;) {
                if ((page = *page_table) != 0) {
                    *page_table = 0;
                    if (PAGE_PRESENT & page) {
                        if (!(mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED))
                            if (current->rss > 0)
                                --current->rss;
                        free_page(PAGE_MASK & page);
                    } else
                        swap_free(page);
                }

                /*
                 * the first condition should return an invalid access
                 * when the page is referenced. current assumptions
                 * cause it to be treated as demand allocation in some
                 * cases.
                 */
                if (!mask)
                    *page_table++ = 0;    /* not present */
                else if (to >= high_memory)
                    *page_table++ = (to | mask);
                else if (!mem_map[MAP_NR(to)])
                    *page_table++ = 0;    /* not present */
                else {
                    *page_table++ = (to | mask);
                    if (!(mem_map[MAP_NR(to)] & MAP_PAGE_RESERVED)) {
                        ++current->rss;
                        mem_map[MAP_NR(to)]++;
                    }
                }
                to += PAGE_SIZE;
            }
            pcnt = (size > PTRS_PER_PAGE ? PTRS_PER_PAGE : size);
        }
        invalidate();
        return 0;
    }

    /*
     * This function puts a page in memory at the wanted address.
     * It returns the physical address of the page gotten, 0 if
     * out of memory (either when trying to access page-table or
     * page.)
     */
     //放置一页
    unsigned long put_page(struct task_struct * tsk,unsigned long page,
        unsigned long address,int prot)
    {
        unsigned long *page_table;

        if ((prot & (PAGE_MASK|PAGE_PRESENT)) != PAGE_PRESENT)
            printk("put_page: prot = %08x ",prot);
        if (page >= high_memory) {
            printk("put_page: trying to put page %08lx at %08lx ",page,address);
            return 0;
        }
        page_table = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
        if ((*page_table) & PAGE_PRESENT)
            page_table = (unsigned long *) (PAGE_MASK & *page_table);
        else {
            printk("put_page: bad page directory entry ");
            oom(tsk);
            *page_table = BAD_PAGETABLE | PAGE_TABLE;
            return 0;
        }
        page_table += (address >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
        if (*page_table) {
            printk("put_page: page already exists ");
            *page_table = 0;
            invalidate();
        }
        *page_table = page | prot;
    /* no need for invalidate */
        return page;
    }

    /*
     * The previous function doesn't work very well if you also want to mark
     * the page dirty: exec.c wants this, as it has earlier changed the page,
     * and we want the dirty-status to be correct (for VM). Thus the same
     * routine, but this time we mark it dirty too.
     */
     //放置脏页
    unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
    {
        unsigned long tmp, *page_table;

        if (page >= high_memory)
            printk("put_dirty_page: trying to put page %08lx at %08lx ",page,address);
        if (mem_map[MAP_NR(page)] != 1)
            printk("mem_map disagrees with %08lx at %08lx ",page,address);
        page_table = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
        if (PAGE_PRESENT & *page_table)
            page_table = (unsigned long *) (PAGE_MASK & *page_table);
        else {
            if (!(tmp = get_free_page(GFP_KERNEL)))
                return 0;
            if (PAGE_PRESENT & *page_table) {
                free_page(tmp);
                page_table = (unsigned long *) (PAGE_MASK & *page_table);
            } else {
                *page_table = tmp | PAGE_TABLE;
                page_table = (unsigned long *) tmp;
            }
        }
        page_table += (address >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
        if (*page_table) {
            printk("put_dirty_page: page already exists ");
            *page_table = 0;
            invalidate();
        }
        *page_table = page | (PAGE_DIRTY | PAGE_PRIVATE);
    /* no need for invalidate */
        return page;
    }

    /*
     * This routine handles present pages, when users try to write
     * to a shared page. It is done by copying the page to a new address
     * and decrementing the shared-page counter for the old page.
     *
     * Note that we do many checks twice (look at do_wp_page()), as
     * we have to be careful about race-conditions.
     *
     * Goto-purists beware: the only reason for goto's here is that it results
     * in better assembly code.. The "default" path will see no jumps at all.
     */
     //写保护
    static void __do_wp_page(unsigned long error_code, unsigned long address,
        struct task_struct * tsk, unsigned long user_esp)
    {
        unsigned long *pde, pte, old_page, prot;
        unsigned long new_page;

        new_page = __get_free_page(GFP_KERNEL);
        pde = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
        pte = *pde;
        if (!(pte & PAGE_PRESENT))
            goto end_wp_page;
        if ((pte & PAGE_TABLE) != PAGE_TABLE || pte >= high_memory)
            goto bad_wp_pagetable;
        pte &= PAGE_MASK;
        pte += PAGE_PTR(address);
        old_page = *(unsigned long *) pte;
        if (!(old_page & PAGE_PRESENT))
            goto end_wp_page;
        if (old_page >= high_memory)
            goto bad_wp_page;
        if (old_page & PAGE_RW)
            goto end_wp_page;
        tsk->min_flt++;
        prot = (old_page & ~PAGE_MASK) | PAGE_RW;
        old_page &= PAGE_MASK;
        if (mem_map[MAP_NR(old_page)] != 1) {
            if (new_page) {
                if (mem_map[MAP_NR(old_page)] & MAP_PAGE_RESERVED)
                    ++tsk->rss;
                copy_page(old_page,new_page);
                *(unsigned long *) pte = new_page | prot;
                free_page(old_page);
                invalidate();
                return;
            }
            free_page(old_page);
            oom(tsk);
            *(unsigned long *) pte = BAD_PAGE | prot;
            invalidate();
            return;
        }
        *(unsigned long *) pte |= PAGE_RW;
        invalidate();
        if (new_page)
            free_page(new_page);
        return;
    bad_wp_page:
        printk("do_wp_page: bogus page at address %08lx (%08lx) ",address,old_page);
        *(unsigned long *) pte = BAD_PAGE | PAGE_SHARED;
        send_sig(SIGKILL, tsk, 1);
        goto end_wp_page;
    bad_wp_pagetable:
        printk("do_wp_page: bogus page-table at address %08lx (%08lx) ",address,pte);
        *pde = BAD_PAGETABLE | PAGE_TABLE;
        send_sig(SIGKILL, tsk, 1);
    end_wp_page:
        if (new_page)
            free_page(new_page);
        return;
    }

    /*
     * check that a page table change is actually needed, and call
     * the low-level function only in that case..
     */
     //写保护
    void do_wp_page(unsigned long error_code, unsigned long address,
        struct task_struct * tsk, unsigned long user_esp)
    {
        unsigned long page;
        unsigned long * pg_table;

        pg_table = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
        page = *pg_table;
        if (!page)
            return;
        if ((page & PAGE_PRESENT) && page < high_memory) {
            pg_table = (unsigned long *) ((page & PAGE_MASK) + PAGE_PTR(address));
            page = *pg_table;
            if (!(page & PAGE_PRESENT))
                return;
            if (page & PAGE_RW)
                return;
            if (!(page & PAGE_COW)) {
                if (user_esp && tsk == current) {
                    current->tss.cr2 = address;
                    current->tss.error_code = error_code;
                    current->tss.trap_no = 14;
                    send_sig(SIGSEGV, tsk, 1);
                    return;
                }
            }
            if (mem_map[MAP_NR(page)] == 1) {
                *pg_table |= PAGE_RW | PAGE_DIRTY;
                invalidate();
                return;
            }
            __do_wp_page(error_code, address, tsk, user_esp);
            return;
        }
        printk("bad page directory entry %08lx ",page);
        *pg_table = 0;
    }
    //写时验证
    int __verify_write(unsigned long start, unsigned long size)
    {
        size--;
        size += start & ~PAGE_MASK;
        size >>= PAGE_SHIFT;
        start &= PAGE_MASK;
        do {
            do_wp_page(1,start,current,0);
            start += PAGE_SIZE;
        } while (size--);
        return 0;
    }

    //获取空闲页
    static inline void get_empty_page(struct task_struct * tsk, unsigned long address)
    {
        unsigned long tmp;

        if (!(tmp = get_free_page(GFP_KERNEL))) {
            oom(tsk);
            tmp = BAD_PAGE;
        }
        if (!put_page(tsk,tmp,address,PAGE_PRIVATE))
            free_page(tmp);
    }

    /*
     * try_to_share() checks the page at address "address" in the task "p",
     * to see if it exists, and if it is clean. If so, share it with the current
     * task.
     *
     * NOTE! This assumes we have checked that p != current, and that they
     * share the same executable or library.
     *
     * We may want to fix this to allow page sharing for PIC pages at different
     * addresses so that ELF will really perform properly. As long as the vast
     * majority of sharable libraries load at fixed addresses this is not a
     * big concern. Any sharing of pages between the buffer cache and the
     * code space reduces the need for this as well.  - ERY
     */
     //尝试共享
    static int try_to_share(unsigned long address, struct task_struct * tsk,
        struct task_struct * p, unsigned long error_code, unsigned long newpage)
    {
        unsigned long from;
        unsigned long to;
        unsigned long from_page;
        unsigned long to_page;

        from_page = (unsigned long)PAGE_DIR_OFFSET(p->tss.cr3,address);
        to_page = (unsigned long)PAGE_DIR_OFFSET(tsk->tss.cr3,address);
    /* is there a page-directory at from? */
        from = *(unsigned long *) from_page;
        if (!(from & PAGE_PRESENT))
            return 0;
        from &= PAGE_MASK;
        from_page = from + PAGE_PTR(address);
        from = *(unsigned long *) from_page;
    /* is the page clean and present? */
        if ((from & (PAGE_PRESENT | PAGE_DIRTY)) != PAGE_PRESENT)
            return 0;
        if (from >= high_memory)
            return 0;
        if (mem_map[MAP_NR(from)] & MAP_PAGE_RESERVED)
            return 0;
    /* is the destination ok? */
        to = *(unsigned long *) to_page;
        if (!(to & PAGE_PRESENT))
            return 0;
        to &= PAGE_MASK;
        to_page = to + PAGE_PTR(address);
        if (*(unsigned long *) to_page)
            return 0;
    /* share them if read - do COW immediately otherwise */
        if (error_code & PAGE_RW) {
            if(!newpage)    /* did the page exist?  SRB. */
                return 0;
            copy_page((from & PAGE_MASK),newpage);
            to = newpage | PAGE_PRIVATE;
        } else {
            mem_map[MAP_NR(from)]++;
            from &= ~PAGE_RW;
            to = from;
            if(newpage)    /* only if it existed. SRB. */
                free_page(newpage);
        }
        *(unsigned long *) from_page = from;
        *(unsigned long *) to_page = to;
        invalidate();
        return 1;
    }

    /*
     * share_page() tries to find a process that could share a page with
     * the current one. Address is the address of the wanted page relative
     * to the current data space.
     *
     * We first check if it is at all feasible by checking executable->i_count.
     * It should be >1 if there are other tasks sharing this inode.
     */
     //共享页面
    int share_page(struct vm_area_struct * area, struct task_struct * tsk,
        struct inode * inode,
        unsigned long address, unsigned long error_code, unsigned long newpage)
    {
        struct task_struct ** p;

        if (!inode || inode->i_count < 2 || !area->vm_ops)
            return 0;
        for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
            if (!*p)
                continue;
            if (tsk == *p)
                continue;
            if (inode != (*p)->executable) {
                  if(!area) continue;
                /* Now see if there is something in the VMM that
                   we can share pages with */
                if(area){
                  struct vm_area_struct * mpnt;
                  for (mpnt = (*p)->mmap; mpnt; mpnt = mpnt->vm_next) {
                    if (mpnt->vm_ops == area->vm_ops &&
                       mpnt->vm_inode->i_ino == area->vm_inode->i_ino&&
                       mpnt->vm_inode->i_dev == area->vm_inode->i_dev){
                      if (mpnt->vm_ops->share(mpnt, area, address))
                    break;
                    };
                  };
                  if (!mpnt) continue;  /* Nope.  Nuthin here */
                };
            }
            if (try_to_share(address,tsk,*p,error_code,newpage))
                return 1;
        }
        return 0;
    }

    /*
     * fill in an empty page-table if none exists.
     */
     //获取空的页表
    static inline unsigned long get_empty_pgtable(struct task_struct * tsk,unsigned long address)
    {
        unsigned long page;
        unsigned long *p;

        p = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
        if (PAGE_PRESENT & *p)
            return *p;
        if (*p) {
            printk("get_empty_pgtable: bad page-directory entry ");
            *p = 0;
        }
        page = get_free_page(GFP_KERNEL);
        p = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
        if (PAGE_PRESENT & *p) {
            free_page(page);
            return *p;
        }
        if (*p) {
            printk("get_empty_pgtable: bad page-directory entry ");
            *p = 0;
        }
        if (page) {
            *p = page | PAGE_TABLE;
            return *p;
        }
        oom(current);
        *p = BAD_PAGETABLE | PAGE_TABLE;
        return 0;
    }

    //缺页
    void do_no_page(unsigned long error_code, unsigned long address,
        struct task_struct *tsk, unsigned long user_esp)
    {
        unsigned long tmp;
        unsigned long page;
        struct vm_area_struct * mpnt;

        page = get_empty_pgtable(tsk,address);
        if (!page)
            return;
        page &= PAGE_MASK;
        page += PAGE_PTR(address);
        tmp = *(unsigned long *) page;
        if (tmp & PAGE_PRESENT)
            return;
        ++tsk->rss;
        if (tmp) {
            ++tsk->maj_flt;
            swap_in((unsigned long *) page);
            return;
        }
        address &= 0xfffff000;
        tmp = 0;
        for (mpnt = tsk->mmap; mpnt != NULL; mpnt = mpnt->vm_next) {
            if (address < mpnt->vm_start)
                break;
            if (address >= mpnt->vm_end) {
                tmp = mpnt->vm_end;
                continue;
            }
            if (!mpnt->vm_ops || !mpnt->vm_ops->nopage) {
                ++tsk->min_flt;
                get_empty_page(tsk,address);
                return;
            }
            mpnt->vm_ops->nopage(error_code, mpnt, address);
            return;
        }
        if (tsk != current)
            goto ok_no_page;
        if (address >= tsk->end_data && address < tsk->brk)
            goto ok_no_page;
        if (mpnt && mpnt == tsk->stk_vma &&
            address - tmp > mpnt->vm_start - address &&
            tsk->rlim[RLIMIT_STACK].rlim_cur > mpnt->vm_end - address) {
            mpnt->vm_start = address;
            goto ok_no_page;
        }
        tsk->tss.cr2 = address;
        current->tss.error_code = error_code;
        current->tss.trap_no = 14;
        send_sig(SIGSEGV,tsk,1);
        if (error_code & 4)    /* user level access? */
            return;
    ok_no_page:
        ++tsk->min_flt;
        get_empty_page(tsk,address);
    }

    /*
     * This routine handles page faults.  It determines the address,
     * and the problem, and then passes it off to one of the appropriate
     * routines.
     */
     //页保护
    asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
    {
        unsigned long address;
        unsigned long user_esp = 0;
        unsigned int bit;

        /* get the address */
        __asm__("movl %%cr2,%0":"=r" (address));
        if (address < TASK_SIZE) {
            if (error_code & 4) {    /* user mode access? */
                if (regs->eflags & VM_MASK) {
                    bit = (address - 0xA0000) >> PAGE_SHIFT;
                    if (bit < 32)
                        current->screen_bitmap |= 1 << bit;
                } else
                    user_esp = regs->esp;
            }
            if (error_code & 1)
                do_wp_page(error_code, address, current, user_esp);
            else
                do_no_page(error_code, address, current, user_esp);
            return;
        }
        address -= TASK_SIZE;
        if (wp_works_ok < 0 && address == 0 && (error_code & PAGE_PRESENT)) {
            wp_works_ok = 1;
            pg0[0] = PAGE_SHARED;
            printk("This processor honours the WP bit even when in supervisor mode. Good. ");
            return;
        }
        if (address < PAGE_SIZE) {
            printk("Unable to handle kernel NULL pointer dereference");
            pg0[0] = PAGE_SHARED;
        } else
            printk("Unable to handle kernel paging request");
        printk(" at address %08lx ",address);
        die_if_kernel("Oops", regs, error_code);
        do_exit(SIGKILL);
    }
    //----------------------------------------------------------------------------------------------------------
    /*
     * BAD_PAGE is the page that is used for page faults when linux
     * is out-of-memory. Older versions of linux just did a
     * do_exit(), but using this instead means there is less risk
     * for a process dying in kernel mode, possibly leaving a inode
     * unused etc..
     *
     * BAD_PAGETABLE is the accompanying page-table: it is initialized
     * to point to BAD_PAGE entries.
     *
     * ZERO_PAGE is a special page that is used for zero-initialized
     * data and COW.
     */
     //BAD_PAGE是当linux发送内存越界错误使用的内存页。旧版本的linux仅仅是退出
     //但是使用这个替代意味着当进程在内核模式下死掉减少了风险
     //错误页伴随着页表,它初始化指向了BAD_PAGE项
     //ZERO_PAGE是指定页,用来初始化零数据和COW(写时复制)
     
     //初始化错误页表
    unsigned long __bad_pagetable(void)
    {
        extern char empty_bad_page_table[PAGE_SIZE];

        __asm__ __volatile__("cld ; rep ; stosl":                     //EDI自增4,循环拷贝eax中的内容到edi中
            :"a" (BAD_PAGE + PAGE_TABLE),                             //eax中存放BAD_PAGE第一个页的地址
             "D" ((long) empty_bad_page_table),                       //edx中存放空的错误页表地址
             "c" (PTRS_PER_PAGE)                                      //计数器
            :"di","cx");
        return (unsigned long) empty_bad_page_table;
    }

    //错误页
    unsigned long __bad_page(void)
    {
        extern char empty_bad_page[PAGE_SIZE];

        __asm__ __volatile__("cld ; rep ; stosl":
            :"a" (0),
             "D" ((long) empty_bad_page),
             "c" (PTRS_PER_PAGE)
            :"di","cx");
        return (unsigned long) empty_bad_page;
    }

    //zero页
    unsigned long __zero_page(void)
    {
        extern char empty_zero_page[PAGE_SIZE];

        __asm__ __volatile__("cld ; rep ; stosl":
            :"a" (0),
             "D" ((long) empty_zero_page),
             "c" (PTRS_PER_PAGE)
            :"di","cx");
        return (unsigned long) empty_zero_page;
    }

    //显示内存信息
    void show_mem(void)
    {
        int i,free = 0,total = 0,reserved = 0;
        int shared = 0;

        printk("Mem-info: ");
        printk("Free pages:      %6dkB ",nr_free_pages<<(PAGE_SHIFT-10));
        printk("Secondary pages: %6dkB ",nr_secondary_pages<<(PAGE_SHIFT-10));
        printk("Free swap:       %6dkB ",nr_swap_pages<<(PAGE_SHIFT-10));
        i = high_memory >> PAGE_SHIFT;
        while (i-- > 0) {
            total++;
            if (mem_map[i] & MAP_PAGE_RESERVED)
                reserved++;
            else if (!mem_map[i])
                free++;
            else
                shared += mem_map[i]-1;
        }
        printk("%d pages of RAM ",total);
        printk("%d free pages ",free);
        printk("%d reserved pages ",reserved);
        printk("%d pages shared ",shared);
        show_buffers();
    }

    /*
     * paging_init() sets up the page tables - note that the first 4MB are
     * already mapped by head.S.
     *
     * This routines also unmaps the page at virtual kernel address 0, so
     * that we can trap those pesky NULL-reference errors in the kernel.
     */
     //初始化页表
    unsigned long paging_init(unsigned long start_mem, unsigned long end_mem)
    {
        unsigned long * pg_dir;
        unsigned long * pg_table;
        unsigned long tmp;
        unsigned long address;

    /*
     * Physical page 0 is special; it's not touched by Linux since BIOS
     * and SMM (for laptops with [34]86/SL chips) may need it.  It is read
     * and write protected to detect null pointer references in the
     * kernel.
     */
    #if 0
        memset((void *) 0, 0, PAGE_SIZE);
    #endif
        //开始内存调整位置
        start_mem = PAGE_ALIGN(start_mem);
        address = 0;
        //交换分区页目标表,指向页目标表
        pg_dir = swapper_pg_dir;
        //遍历所有核心内存
        while (address < end_mem) {
            //指向第768项内容,第0页
            tmp = *(pg_dir + 768);        /* at virtual addr 0xC0000000 */
            //如果为空
            if (!tmp) {
                //设置为表属性
                tmp = start_mem | PAGE_TABLE;
                //填充该项
                *(pg_dir + 768) = tmp;
                //调整开始内存为下一页
                start_mem += PAGE_SIZE;
            }
            //此时不空
            *pg_dir = tmp;            /* also map it in at 0x0000000 for init */
            //下一项
            pg_dir++;
            //页表地址
            pg_table = (unsigned long *) (tmp & PAGE_MASK);
            //初始化新创建的页表
            for (tmp = 0 ; tmp < PTRS_PER_PAGE ; tmp++,pg_table++) {
                //如果地址没有超出最高内存,则页表中的项
                if (address < end_mem)
                    *pg_table = address | PAGE_SHARED;
                else
                    *pg_table = 0;
                //下一页
                address += PAGE_SIZE;
            }
        }
        invalidate();
        return start_mem;
    }

    //内存初始化              可用内存低初始值
    void mem_init(unsigned long start_low_mem,
                //可用内存初始值          可用内存结束值
              unsigned long start_mem, unsigned long end_mem)
    {
        //代码页
        int codepages = 0;
        //保留页
        int reservedpages = 0;
        //数据页
        int datapages = 0;
        //
        unsigned long tmp;
        unsigned short * p;
        extern int etext;

        //关中断
        cli();
        //处理高地址
        //调整内存,按页对齐
        end_mem &= PAGE_MASK;
        //内存高地址设置为处理后的内存结尾处
        high_memory = end_mem;
        //处理低地址,调整内存开始地址
        start_mem +=  0x0000000f;
        start_mem &= ~0x0000000f;
        //计算内存的页数,mm头文件中定义的宏,全部页数
        tmp = MAP_NR(end_mem);
        //mem_map第一项指向主存开始地址,即在此时的start_mem内存开始处,放置一个数组mem_map,
        //统一管理从start_mem到end_mem得内存,一页,映射一项
        mem_map = (unsigned short *) start_mem;
        //p指向mem_map最后一项
        p = mem_map + tmp;
        //取最后一项为开始项
        start_mem = (unsigned long) p;
        //遍历设置属性
        while (p > mem_map)
            *--p = MAP_PAGE_RESERVED;
        //确定内存的开始地址
        start_low_mem = PAGE_ALIGN(start_low_mem);
        start_mem = PAGE_ALIGN(start_mem);
        while (start_low_mem < 0xA0000) {
            mem_map[MAP_NR(start_low_mem)] = 0;
            start_low_mem += PAGE_SIZE;
        }
        while (start_mem < end_mem) {
            mem_map[MAP_NR(start_mem)] = 0;
            start_mem += PAGE_SIZE;
        }
    #ifdef CONFIG_SOUND
        sound_mem_init();
    #endif
        free_page_list = 0;
        nr_free_pages = 0;
        //统计内存信息
        for (tmp = 0 ; tmp < end_mem ; tmp += PAGE_SIZE) {
            if (mem_map[MAP_NR(tmp)]) {
                if (tmp >= 0xA0000 && tmp < 0x100000)
                    reservedpages++;
                else if (tmp < (unsigned long) &etext)
                    codepages++;
                else
                    datapages++;
                continue;
            }
            //空闲的内存页面
            *(unsigned long *) tmp = free_page_list;
            free_page_list = tmp;
            nr_free_pages++;
        }
        //
        tmp = nr_free_pages << PAGE_SHIFT;
        printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data) ",
            tmp >> 10,
            end_mem >> 10,
            codepages << (PAGE_SHIFT-10),
            reservedpages << (PAGE_SHIFT-10),
            datapages << (PAGE_SHIFT-10));
    /* test if the WP bit is honoured in supervisor mode */
        wp_works_ok = -1;
        pg0[0] = PAGE_READONLY;
        invalidate();
        __asm__ __volatile__("movb 0,%%al ; movb %%al,0": : :"ax", "memory");
        pg0[0] = 0;
        invalidate();
        if (wp_works_ok < 0)
            wp_works_ok = 0;
        return;
    }

    //内存信息---------------------------------------------------ok
    void si_meminfo(struct sysinfo *val)
    {
        int i;
        
        //最后一页的序号
        i = high_memory >> PAGE_SHIFT;
        //系统信息初始化
        //ram总数
        val->totalram = 0;
        //空闲的ram
        val->freeram = 0;
        //共享的ram
        val->sharedram = 0;
        //缓冲区
        val->bufferram = buffermem;
        while (i-- > 0)  {
            //遍历每一内存页,如果该页保留,则跳过
            if (mem_map[i] & MAP_PAGE_RESERVED)
                continue;
            //否则ram总量属性增加1
            val->totalram++;
            //如果该页没有被使用,则空闲ram增加
            if (!mem_map[i]) {
                val->freeram++;
                continue;
            }
            //共享页数计算
            val->sharedram += mem_map[i]-1;
        }
        //以上计算的是页数,这里计算大小
        val->totalram <<= PAGE_SHIFT;
        val->freeram <<= PAGE_SHIFT;
        val->sharedram <<= PAGE_SHIFT;
        return;
    }


    /* This handles a generic mmap of a disk file */
    //管理通用硬盘文件的mmap
    void file_mmap_nopage(int error_code, struct vm_area_struct * area, unsigned long address)
    {
        struct inode * inode = area->vm_inode;
        unsigned int block;
        unsigned long page;
        int nr[8];
        int i, j;
        int prot = area->vm_page_prot;

        address &= PAGE_MASK;
        block = address - area->vm_start + area->vm_offset;
        block >>= inode->i_sb->s_blocksize_bits;

        page = get_free_page(GFP_KERNEL);
        if (share_page(area, area->vm_task, inode, address, error_code, page)) {
            ++area->vm_task->min_flt;
            return;
        }

        ++area->vm_task->maj_flt;
        if (!page) {
            oom(current);
            put_page(area->vm_task, BAD_PAGE, address, PAGE_PRIVATE);
            return;
        }
        for (i=0, j=0; i< PAGE_SIZE ; j++, block++, i += inode->i_sb->s_blocksize)
            nr[j] = bmap(inode,block);
        if (error_code & PAGE_RW)
            prot |= PAGE_RW | PAGE_DIRTY;
        page = bread_page(page, inode->i_dev, nr, inode->i_sb->s_blocksize, prot);

        if (!(prot & PAGE_RW)) {
            if (share_page(area, area->vm_task, inode, address, error_code, page))
                return;
        }
        if (put_page(area->vm_task,page,address,prot))
            return;
        free_page(page);
        oom(current);
    }

    void file_mmap_free(struct vm_area_struct * area)
    {
    //如果是虚拟内存节点,则放置回
        if (area->vm_inode)
            iput(area->vm_inode);
    #if 0
        if (area->vm_inode)
            printk("Free inode %x:%d (%d) ",area->vm_inode->i_dev,
                     area->vm_inode->i_ino, area->vm_inode->i_count);
    #endif
    }

    /*
     * Compare the contents of the mmap entries, and decide if we are allowed to
     * share the pages
     */
    //如果我们允许共享页,比较mmap实体内容,即需要节点、开始地址、结束地址、偏移和页协议完全相同
    int file_mmap_share(struct vm_area_struct * area1,
                struct vm_area_struct * area2,
                unsigned long address)
    {
        if (area1->vm_inode != area2->vm_inode)
            return 0;
        if (area1->vm_start != area2->vm_start)
            return 0;
        if (area1->vm_end != area2->vm_end)
            return 0;
        if (area1->vm_offset != area2->vm_offset)
            return 0;
        if (area1->vm_page_prot != area2->vm_page_prot)
            return 0;
        return 1;
    }

    //定义虚存操作
    struct vm_operations_struct file_mmap = {
        NULL,            /* open */
        file_mmap_free,        /* close */
        file_mmap_nopage,    /* nopage */
        NULL,            /* wppage */
        file_mmap_share,    /* share */
        NULL,            /* unmap */
    };

  • 相关阅读:
    MVC思想-程序的控制流程-Struts2和SpringMVC黑马流程图
    代理模式
    显卡
    感悟:Java新手一点想法
    java企业级开发的实质就是前台后台如何交互的-各个对象之间如何交互,通信的-程序执行的流程是怎样的
    $.ajax()方法详解--极快瑞中的阿贾克斯函数
    初学者必读之AJAX简单实例2
    初学者必读原生AJAX-异步的javaScript和XML
    c#输入方法名来调用方法(反射)
    unity接入讯飞教程
  • 原文地址:https://www.cnblogs.com/xiaofengwei/p/3773990.html
Copyright © 2020-2023  润新知