• 内存Zone中的pageset成员分析


       1: struct per_cpu_pageset __percpu *pageset;

    首先,分析一个函数,__free_pages,这个函数是Buddy System提供的API接口函数,用于翻译曾经分配的一组页(多少个页视order大小而定)

       1: void __free_pages(struct page *page, unsigned int order)
       2: {
       3:     if (put_page_testzero(page)) {
       4:         if (order == 0)
       5:             free_hot_cold_page(page, 0);
       6:         else
       7:             __free_pages_ok(page, order);
       8:     }
       9: }

    首先,调用put_page_testzero来查看该页是否还有其他引用(struct page结构中的_count)。

    即先减去当前的这次引用(减1),然后查看是否引用值已经为0。

       1: /*
       2:  * Drop a ref, return true if the refcount fell to zero (the page has no users)
       3:  */
       4: static inline int put_page_testzero(struct page *page)
       5: {
       6:     VM_BUG_ON(atomic_read(&page->_count) == 0);
       7:     return atomic_dec_and_test(&page->_count);
       8: }

    其中,atomic_xxx是内核提供的原子操作实现,有兴趣的话可以进一步深入研究。

    然后,如果order为1,代表只有一个内存页需要释放,就调用free_hot_cold_page函数。

       1: /*
       2:  * Free a 0-order page
       3:  * cold == 1 ? free a cold page : free a hot page
       4:  */
       5: void free_hot_cold_page(struct page *page, int cold)
       6: {
       7:     struct zone *zone = page_zone(page);
       8:     struct per_cpu_pages *pcp;
       9:     unsigned long flags;
      10:     int migratetype;
      11:     int wasMlocked = __TestClearPageMlocked(page);
      12:  
      13:     if (!free_pages_prepare(page, 0))
      14:         return;
      15:  
      16:     migratetype = get_pageblock_migratetype(page);
      17:     set_page_private(page, migratetype);
      18:     local_irq_save(flags);
      19:     if (unlikely(wasMlocked))
      20:         free_page_mlock(page);
      21:     __count_vm_event(PGFREE);
      22:  
      23:     /*
      24:      * We only track unmovable, reclaimable and movable on pcp lists.
      25:      * Free ISOLATE pages back to the allocator because they are being
      26:      * offlined but treat RESERVE as movable pages so we can get those
      27:      * areas back if necessary. Otherwise, we may have to free
      28:      * excessively into the page allocator
      29:      */
      30:     if (migratetype >= MIGRATE_PCPTYPES) {
      31:         if (unlikely(migratetype == MIGRATE_ISOLATE)) {
      32:             free_one_page(zone, page, 0, migratetype);
      33:             goto out;
      34:         }
      35:         migratetype = MIGRATE_MOVABLE;
      36:     }
      37:  
      38:     pcp = &this_cpu_ptr(zone->pageset)->pcp;
      39:     if (cold)
      40:         list_add_tail(&page->lru, &pcp->lists[migratetype]);
      41:     else
      42:         list_add(&page->lru, &pcp->lists[migratetype]);
      43:     pcp->count++;
      44:     if (pcp->count >= pcp->high) {
      45:         free_pcppages_bulk(zone, pcp->batch, pcp);
      46:         pcp->count -= pcp->batch;
      47:     }
      48:  
      49: out:
      50:     local_irq_restore(flags);
      51: }

    page_zone是根据page找到其所在的zone的函数,具体实现是在page->flags里面有相应的比特位,保存它是从哪个zone上分配的。

    那么page->flags是从什么时候开始携带这些信息的呢?

    首先,所有的page结构体都保存在pglist_data的成员node_mem_map指向的一片内存里。

       1: /*
       2:  * Initially all pages are reserved - free ones are freed
       3:  * up by free_all_bootmem() once the early boot process is
       4:  * done. Non-atomic initialization, single-pass.
       5:  */
       6: void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
       7:         unsigned long start_pfn, enum memmap_context context)
       8: {
       9:     struct page *page;
      10:     unsigned long end_pfn = start_pfn + size;
      11:     unsigned long pfn;
      12:     struct zone *z;
      13:  
      14:     if (highest_memmap_pfn < end_pfn - 1)
      15:         highest_memmap_pfn = end_pfn - 1;
      16:  
      17:     z = &NODE_DATA(nid)->node_zones[zone];
      18:     for (pfn = start_pfn; pfn < end_pfn; pfn++) {
      19:         /*
      20:          * There can be holes in boot-time mem_map[]s
      21:          * handed to this function.  They do not
      22:          * exist on hotplugged memory.
      23:          */
      24:         if (context == MEMMAP_EARLY) {
      25:             if (!early_pfn_valid(pfn))
      26:                 continue;
      27:             if (!early_pfn_in_nid(pfn, nid))
      28:                 continue;
      29:         }
      30:         page = pfn_to_page(pfn);
      31:         set_page_links(page, zone, nid, pfn);
      32:         mminit_verify_page_links(page, zone, nid, pfn);
      33:         init_page_count(page);
      34:         reset_page_mapcount(page);
      35:         SetPageReserved(page);
      36:         /*
      37:          * Mark the block movable so that blocks are reserved for
      38:          * movable at startup. This will force kernel allocations
      39:          * to reserve their blocks rather than leaking throughout
      40:          * the address space during boot when many long-lived
      41:          * kernel allocations are made. Later some blocks near
      42:          * the start are marked MIGRATE_RESERVE by
      43:          * setup_zone_migrate_reserve()
      44:          *
      45:          * bitmap is created for zone's valid pfn range. but memmap
      46:          * can be created for invalid pages (for alignment)
      47:          * check here not to call set_pageblock_migratetype() against
      48:          * pfn out of zone.
      49:          */
      50:         if ((z->zone_start_pfn <= pfn)
      51:             && (pfn < z->zone_start_pfn + z->spanned_pages)
      52:             && !(pfn & (pageblock_nr_pages - 1)))
      53:             set_pageblock_migratetype(page, MIGRATE_MOVABLE);
      54:  
      55:         INIT_LIST_HEAD(&page->lru);
      56: #ifdef WANT_PAGE_VIRTUAL
      57:         /* The shift won't overflow because ZONE_NORMAL is below 4G. */
      58:         if (!is_highmem_idx(zone))
      59:             set_page_address(page, __va(pfn << PAGE_SHIFT));
      60: #endif
      61:     }
      62: }

    在Buddy System初始化的过程中,会调用memmap_init_zone函数,在该函数中,会将属于该Zone的所有page结构体都遍历处理一遍,都调用一次set_page_links来建立page与zone之间的对应关系。

       1: static inline void set_page_zone(struct page *page, enum zone_type zone)
       2: {
       3:     page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
       4:     page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
       5: }
       6:  
       7: static inline void set_page_node(struct page *page, unsigned long node)
       8: {
       9:     page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
      10:     page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
      11: }
      12:  
      13: static inline void set_page_links(struct page *page, enum zone_type zone,
      14:     unsigned long node, unsigned long pfn)
      15: {
      16:     set_page_zone(page, zone);
      17:     set_page_node(page, node);
      18: #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
      19:     set_page_section(page, pfn_to_section_nr(pfn));
      20: #endif
      21: }

    内存启动过程的初始化

       1: void __init setup_arch(char **cmdline_p)
       2: {
       3: ......
       4: /* max_pfn_mapped is updated here */
       5: max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
       6: max_pfn_mapped = max_low_pfn_mapped;
       7: ......
       8: paging_init();
       9: ......
      10: }

    调用init_memory_mapping

       1: /*
       2:  * Setup the direct mapping of the physical memory at PAGE_OFFSET.
       3:  * This runs before bootmem is initialized and gets pages directly from
       4:  * the physical memory. To access them they are temporarily mapped.
       5:  */
       6: unsigned long __init_refok init_memory_mapping(unsigned long start,
       7:                            unsigned long end)
       8: {
       9: ......
      10:  
      11: for (i = 0; i < nr_range; i++)
      12:     ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
      13:                        mr[i].page_size_mask);
      14:  
      15: ......
      16: }

    调用kernel_physical_mapping_init

       1: /*
       2:  * This maps the physical memory to kernel virtual address space, a total
       3:  * of max_low_pfn pages, by creating page tables starting from address
       4:  * PAGE_OFFSET:
       5:  */
       6: unsigned long __init
       7: kernel_physical_mapping_init(unsigned long start,
       8:                  unsigned long end,
       9:                  unsigned long page_size_mask)
      10: {
      11:     int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
      12:     unsigned long last_map_addr = end;
      13:     unsigned long start_pfn, end_pfn;
      14:     pgd_t *pgd_base = swapper_pg_dir;
      15:     int pgd_idx, pmd_idx, pte_ofs;
      16:     unsigned long pfn;
      17:     pgd_t *pgd;
      18:     pmd_t *pmd;
      19:     pte_t *pte;
      20:     unsigned pages_2m, pages_4k;
      21:     int mapping_iter;
      22:  
      23:     start_pfn = start >> PAGE_SHIFT;
      24:     end_pfn = end >> PAGE_SHIFT;
      25:  
      26:     /*
      27:      * First iteration will setup identity mapping using large/small pages
      28:      * based on use_pse, with other attributes same as set by
      29:      * the early code in head_32.S
      30:      *
      31:      * Second iteration will setup the appropriate attributes (NX, GLOBAL..)
      32:      * as desired for the kernel identity mapping.
      33:      *
      34:      * This two pass mechanism conforms to the TLB app note which says:
      35:      *
      36:      *     "Software should not write to a paging-structure entry in a way
      37:      *      that would change, for any linear address, both the page size
      38:      *      and either the page frame or attributes."
      39:      */
      40:     mapping_iter = 1;
      41:  
      42:     if (!cpu_has_pse)
      43:         use_pse = 0;
      44:  
      45: repeat:
      46:     pages_2m = pages_4k = 0;
      47:     pfn = start_pfn;
      48:     pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
      49:     pgd = pgd_base + pgd_idx;
      50:     for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
      51:         pmd = one_md_table_init(pgd);
      52:  
      53:         if (pfn >= end_pfn)
      54:             continue;
      55: #ifdef CONFIG_X86_PAE
      56:         pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
      57:         pmd += pmd_idx;
      58: #else
      59:         pmd_idx = 0;
      60: #endif
      61:         for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
      62:              pmd++, pmd_idx++) {
      63:             unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
      64:  
      65:             /*
      66:              * Map with big pages if possible, otherwise
      67:              * create normal page tables:
      68:              */
      69:             if (use_pse) {
      70:                 unsigned int addr2;
      71:                 pgprot_t prot = PAGE_KERNEL_LARGE;
      72:                 /*
      73:                  * first pass will use the same initial
      74:                  * identity mapping attribute + _PAGE_PSE.
      75:                  */
      76:                 pgprot_t init_prot =
      77:                     __pgprot(PTE_IDENT_ATTR |
      78:                          _PAGE_PSE);
      79:  
      80:                 addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
      81:                     PAGE_OFFSET + PAGE_SIZE-1;
      82:  
      83:                 if (is_kernel_text(addr) ||
      84:                     is_kernel_text(addr2))
      85:                     prot = PAGE_KERNEL_LARGE_EXEC;
      86:  
      87:                 pages_2m++;
      88:                 if (mapping_iter == 1)
      89:                     set_pmd(pmd, pfn_pmd(pfn, init_prot));
      90:                 else
      91:                     set_pmd(pmd, pfn_pmd(pfn, prot));
      92:  
      93:                 pfn += PTRS_PER_PTE;
      94:                 continue;
      95:             }
      96:             pte = one_page_table_init(pmd);
      97:  
      98:             pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
      99:             pte += pte_ofs;
     100:             for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
     101:                  pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
     102:                 pgprot_t prot = PAGE_KERNEL;
     103:                 /*
     104:                  * first pass will use the same initial
     105:                  * identity mapping attribute.
     106:                  */
     107:                 pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
     108:  
     109:                 if (is_kernel_text(addr))
     110:                     prot = PAGE_KERNEL_EXEC;
     111:  
     112:                 pages_4k++;
     113:                 if (mapping_iter == 1) {
     114:                     set_pte(pte, pfn_pte(pfn, init_prot));
     115:                     last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
     116:                 } else
     117:                     set_pte(pte, pfn_pte(pfn, prot));
     118:             }
     119:         }
     120:     }
     121:     if (mapping_iter == 1) {
     122:         /*
     123:          * update direct mapping page count only in the first
     124:          * iteration.
     125:          */
     126:         update_page_count(PG_LEVEL_2M, pages_2m);
     127:         update_page_count(PG_LEVEL_4K, pages_4k);
     128:  
     129:         /*
     130:          * local global flush tlb, which will flush the previous
     131:          * mappings present in both small and large page TLB's.
     132:          */
     133:         __flush_tlb_all();
     134:  
     135:         /*
     136:          * Second iteration will set the actual desired PTE attributes.
     137:          */
     138:         mapping_iter = 2;
     139:         goto repeat;
     140:     }
     141:     return last_map_addr;
     142: }

    在这里面,将swapper_pg_dir作为pgd_t(Page Directory)的指针,对swapper_pg_dir指向的内存区域作处理,将Normal区域的映射关系建立到该页目录中。

    然后在paging_init中

       1: static void __init pagetable_init(void)
       2: {
       3:     pgd_t *pgd_base = swapper_pg_dir;
       4:  
       5:     permanent_kmaps_init(pgd_base);
       6: }
  • 相关阅读:
    基于Debian的发行版Linux系统安装包命令
    戴尔服务器如何配置远程管理卡(IDRAC9)适用于戴尔R740服务器
    Tracert 命令
    Ubuntu 18.04 进入单用户模式修改密码
    华为eNSP模拟器— telnet实验
    华为交换机Console口属性配置
    Ubuntu 16.04 Bridge配置
    Ubuntu 16.04 配置单网卡绑定多IP
    springboot搭建web项目与使用配置文件
    读书笔记《SpringBoot编程思想》
  • 原文地址:https://www.cnblogs.com/long123king/p/3492856.html
Copyright © 2020-2023  润新知