• <Linux内核源码>内存管理模型


    题外语:本人对linux内核的了解尚浅,如果有差池欢迎指正,也欢迎提问交流!

    首先要理解一下每一个进程是如何维护自己独立的寻址空间的,我的电脑里呢是8G内存空间。了解过的朋友应该都知道这是虚拟内存技术解决的这个问题,然而再linux中具体是怎样的模型解决的操作系统的这个设计需求的呢,让我们从linux源码的片段开始看吧!(以下内核源码均来自fedora21 64位系统的fc-3.19.3版本内核

    <include/linux/mm_type.h>中对于物理页面的定义struct page,也就是我们常说的页表,关于这里的结构体的每个变量/位的操作函数大部分在<include/linux/mm.h>中。

      1 struct page {
      2     /* First double word block */
      3     unsigned long flags;        /* Atomic flags, some possibly
      4                      * updated asynchronously */
      5     union {
      6         struct address_space *mapping;    /* If low bit clear, points to
      7                          * inode address_space, or NULL.
      8                          * If page mapped as anonymous
      9                          * memory, low bit is set, and
     10                          * it points to anon_vma object:
     11                          * see PAGE_MAPPING_ANON below.
     12                          */
     13         void *s_mem;            /* slab first object */
     14     };
     15 
     16     /* Second double word */
     17     struct {
     18         union {
     19             pgoff_t index;        /* Our offset within mapping. */
     20             void *freelist;        /* sl[aou]b first free object */
     21             bool pfmemalloc;    /* If set by the page allocator,
     22                          * ALLOC_NO_WATERMARKS was set
     23                          * and the low watermark was not
     24                          * met implying that the system
     25                          * is under some pressure. The
     26                          * caller should try ensure
     27                          * this page is only used to
     28                          * free other pages.
     29                          */
     30         };
     31 
     32         union {
     33 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && 
     34     defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
     35             /* Used for cmpxchg_double in slub */
     36             unsigned long counters;
     37 #else
     38             /*
     39              * Keep _count separate from slub cmpxchg_double data.
     40              * As the rest of the double word is protected by
     41              * slab_lock but _count is not.
     42              */
     43             unsigned counters;
     44 #endif
     45 
     46             struct {
     47 
     48                 union {
     49                     /*
     50                      * Count of ptes mapped in
     51                      * mms, to show when page is
     52                      * mapped & limit reverse map
     53                      * searches.
     54                      *
     55                      * Used also for tail pages
     56                      * refcounting instead of
     57                      * _count. Tail pages cannot
     58                      * be mapped and keeping the
     59                      * tail page _count zero at
     60                      * all times guarantees
     61                      * get_page_unless_zero() will
     62                      * never succeed on tail
     63                      * pages.
     64                      */
     65                     atomic_t _mapcount;
     66 
     67                     struct { /* SLUB */
     68                         unsigned inuse:16;
     69                         unsigned objects:15;
     70                         unsigned frozen:1;
     71                     };
     72                     int units;    /* SLOB */
     73                 };
     74                 atomic_t _count;        /* Usage count, see below. */
     75             };
     76             unsigned int active;    /* SLAB */
     77         };
     78     };
     79 
     80     /* Third double word block */
     81     union {
     82         struct list_head lru;    /* Pageout list, eg. active_list
     83                      * protected by zone->lru_lock !
     84                      * Can be used as a generic list
     85                      * by the page owner.
     86                      */
     87         struct {        /* slub per cpu partial pages */
     88             struct page *next;    /* Next partial slab */
     89 #ifdef CONFIG_64BIT
     90             int pages;    /* Nr of partial slabs left */
     91             int pobjects;    /* Approximate # of objects */
     92 #else
     93             short int pages;
     94             short int pobjects;
     95 #endif
     96         };
     97 
     98         struct slab *slab_page; /* slab fields */
     99         struct rcu_head rcu_head;    /* Used by SLAB
    100                          * when destroying via RCU
    101                          */
    102 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
    103         pgtable_t pmd_huge_pte; /* protected by page->ptl */
    104 #endif
    105     };
    106 
    107     /* Remainder is not double word aligned */
    108     union {
    109         unsigned long private;        /* Mapping-private opaque data:
    110                           * usually used for buffer_heads
    111                          * if PagePrivate set; used for
    112                          * swp_entry_t if PageSwapCache;
    113                          * indicates order in the buddy
    114                          * system if PG_buddy is set.
    115                          */
    116 #if USE_SPLIT_PTE_PTLOCKS
    117 #if ALLOC_SPLIT_PTLOCKS
    118         spinlock_t *ptl;
    119 #else
    120         spinlock_t ptl;
    121 #endif
    122 #endif
    123         struct kmem_cache *slab_cache;    /* SL[AU]B: Pointer to slab */
    124         struct page *first_page;    /* Compound tail pages */
    125     };
    126 
    127 #ifdef CONFIG_MEMCG
    128     struct mem_cgroup *mem_cgroup;
    129 #endif
    130 
    131     /*
    132      * On machines where all RAM is mapped into kernel address space,
    133      * we can simply calculate the virtual address. On machines with
    134      * highmem some memory is mapped into kernel virtual memory
    135      * dynamically, so we need a place to store that address.
    136      * Note that this field could be 16 bits on x86 ... ;)
    137      *
    138      * Architectures with slow multiplication can define
    139      * WANT_PAGE_VIRTUAL in asm/page.h
    140      */
    141 #if defined(WANT_PAGE_VIRTUAL)
    142     void *virtual;            /* Kernel virtual address (NULL if
    143                        not kmapped, ie. highmem) */
    144 #endif /* WANT_PAGE_VIRTUAL */
    145 
    146 #ifdef CONFIG_KMEMCHECK
    147     /*
    148      * kmemcheck wants to track the status of each byte in a page; this
    149      * is a pointer to such a status block. NULL if not tracked.
    150      */
    151     void *shadow;
    152 #endif
    153 
    154 #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
    155     int _last_cpupid;
    156 #endif
    157 }
    View Code

    在整个struct page的定义里面的注释对每个位都作了详尽的解释,但我还是觉得有几个重要的定义要重复一下:

    (1)void*virtual:页的虚拟地址(由于在64位系统之中C语言里的void*指针的长度最长为64bit,寻址空间是2^64大远远超出了当前主流微机的硬件内存RAM的大小(8GB,16GB左右)这也就给虚拟空间寻址,交换技术提供了可能性)对virtual中的虚拟地址进行映射需要通过四级页表来进行。

    (2)pgoff_t index:这个变量和freelist被定义在同一个union中,index变量被内存管理子系统中的多个模块使用,比如高速缓存。

    (3)unsigned long flags:flag变量很少有设成long的可见里面的信息量比较大,这里是用来存放页的状态,比如锁/未锁,换出(虚拟内存用),激活等等。

    再继续说内存管理机制之前,有一点非常重要,就是linux中关于进程和内存之间的对应关系。

    linux中的每一个进程维护一个PCB,而这个PCB就是/include/linux/sched.h中定义的task_struct,在这个结构体的定义之中有定义变量:

    struct mm_struct *mm, *active_mm;

    这也就是进程和内存管理的桥梁之一,也是由此可见进程和内存块/页之间的关系是一对多的(考虑进程共享的内存的话是多对多),进程在装入内存的时候,操作系统的工作的实质是将task_struct中的相关的内存数据映射到部分映射到物理内存之中,而对于并没有映射的页就采取交换技术来解决。和windows系统中的程序装入过程相比较,windows中的程序装入过程都是靠loader完成的,loader的工作就是针对PE格式的可执行文件通过二进制的分析(比如IDT,IAT等等)进行装入,很多情况下一个进程都会被装入到同一个虚拟地址之中0x40000000(90%都是装入这里)。而linux之中,我们的进程是根据调度算法来安排其在虚拟地址之中的分布情况,buddy算法可以将进程的使用的页尽可能整齐地装入(其实这里我有些不是很清楚的地方,linux如果这么动态分配内存那么该如何处理一些动态加载的库的问题,像windows中的dll文件都是通过计算偏移来重定位,而linux会怎么做呢?)进程在已经装入物理内存的页的基础之上开始执行指令,跳转到并未被装入物理内存的页的虚拟地址的时候,会触发一个缺页中断,缺页中断触发页的交换的过程,从而帮助程序继续执行,这也就是虚拟内存的过程。

      1 struct task_struct {
      2     volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
      3     void *stack;
      4     atomic_t usage;
      5     unsigned int flags;    /* per process flags, defined below */
      6     unsigned int ptrace;
      7 
      8 #ifdef CONFIG_SMP
      9     struct llist_node wake_entry;
     10     int on_cpu;
     11     struct task_struct *last_wakee;
     12     unsigned long wakee_flips;
     13     unsigned long wakee_flip_decay_ts;
     14 
     15     int wake_cpu;
     16 #endif
     17     int on_rq;
     18 
     19     int prio, static_prio, normal_prio;
     20     unsigned int rt_priority;
     21     const struct sched_class *sched_class;
     22     struct sched_entity se;
     23     struct sched_rt_entity rt;
     24 #ifdef CONFIG_CGROUP_SCHED
     25     struct task_group *sched_task_group;
     26 #endif
     27     struct sched_dl_entity dl;
     28 
     29 #ifdef CONFIG_PREEMPT_NOTIFIERS
     30     /* list of struct preempt_notifier: */
     31     struct hlist_head preempt_notifiers;
     32 #endif
     33 
     34 #ifdef CONFIG_BLK_DEV_IO_TRACE
     35     unsigned int btrace_seq;
     36 #endif
     37 
     38     unsigned int policy;
     39     int nr_cpus_allowed;
     40     cpumask_t cpus_allowed;
     41 
     42 #ifdef CONFIG_PREEMPT_RCU
     43     int rcu_read_lock_nesting;
     44     union rcu_special rcu_read_unlock_special;
     45     struct list_head rcu_node_entry;
     46 #endif /* #ifdef CONFIG_PREEMPT_RCU */
     47 #ifdef CONFIG_PREEMPT_RCU
     48     struct rcu_node *rcu_blocked_node;
     49 #endif /* #ifdef CONFIG_PREEMPT_RCU */
     50 #ifdef CONFIG_TASKS_RCU
     51     unsigned long rcu_tasks_nvcsw;
     52     bool rcu_tasks_holdout;
     53     struct list_head rcu_tasks_holdout_list;
     54     int rcu_tasks_idle_cpu;
     55 #endif /* #ifdef CONFIG_TASKS_RCU */
     56 
     57 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
     58     struct sched_info sched_info;
     59 #endif
     60 
     61     struct list_head tasks;
     62 #ifdef CONFIG_SMP
     63     struct plist_node pushable_tasks;
     64     struct rb_node pushable_dl_tasks;
     65 #endif
     66 
     67     struct mm_struct *mm, *active_mm;
     68 #ifdef CONFIG_COMPAT_BRK
     69     unsigned brk_randomized:1;
     70 #endif
     71     /* per-thread vma caching */
     72     u32 vmacache_seqnum;
     73     struct vm_area_struct *vmacache[VMACACHE_SIZE];
     74 #if defined(SPLIT_RSS_COUNTING)
     75     struct task_rss_stat    rss_stat;
     76 #endif
     77 /* task state */
     78     int exit_state;
     79     int exit_code, exit_signal;
     80     int pdeath_signal;  /*  The signal sent when the parent dies  */
     81     unsigned int jobctl;    /* JOBCTL_*, siglock protected */
     82 
     83     /* Used for emulating ABI behavior of previous Linux versions */
     84     unsigned int personality;
     85 
     86     unsigned in_execve:1;    /* Tell the LSMs that the process is doing an
     87                  * execve */
     88     unsigned in_iowait:1;
     89 
     90     /* Revert to default priority/policy when forking */
     91     unsigned sched_reset_on_fork:1;
     92     unsigned sched_contributes_to_load:1;
     93 
     94 #ifdef CONFIG_MEMCG_KMEM
     95     unsigned memcg_kmem_skip_account:1;
     96 #endif
     97 
     98     unsigned long atomic_flags; /* Flags needing atomic access. */
     99 
    100     pid_t pid;
    101     pid_t tgid;
    102 
    103 #ifdef CONFIG_CC_STACKPROTECTOR
    104     /* Canary value for the -fstack-protector gcc feature */
    105     unsigned long stack_canary;
    106 #endif
    107     /*
    108      * pointers to (original) parent process, youngest child, younger sibling,
    109      * older sibling, respectively.  (p->father can be replaced with
    110      * p->real_parent->pid)
    111      */
    112     struct task_struct __rcu *real_parent; /* real parent process */
    113     struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
    114     /*
    115      * children/sibling forms the list of my natural children
    116      */
    117     struct list_head children;    /* list of my children */
    118     struct list_head sibling;    /* linkage in my parent's children list */
    119     struct task_struct *group_leader;    /* threadgroup leader */
    120 
    121     /*
    122      * ptraced is the list of tasks this task is using ptrace on.
    123      * This includes both natural children and PTRACE_ATTACH targets.
    124      * p->ptrace_entry is p's link on the p->parent->ptraced list.
    125      */
    126     struct list_head ptraced;
    127     struct list_head ptrace_entry;
    128 
    129     /* PID/PID hash table linkage. */
    130     struct pid_link pids[PIDTYPE_MAX];
    131     struct list_head thread_group;
    132     struct list_head thread_node;
    133 
    134     struct completion *vfork_done;        /* for vfork() */
    135     int __user *set_child_tid;        /* CLONE_CHILD_SETTID */
    136     int __user *clear_child_tid;        /* CLONE_CHILD_CLEARTID */
    137 
    138     cputime_t utime, stime, utimescaled, stimescaled;
    139     cputime_t gtime;
    140 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
    141     struct cputime prev_cputime;
    142 #endif
    143 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
    144     seqlock_t vtime_seqlock;
    145     unsigned long long vtime_snap;
    146     enum {
    147         VTIME_SLEEPING = 0,
    148         VTIME_USER,
    149         VTIME_SYS,
    150     } vtime_snap_whence;
    151 #endif
    152     unsigned long nvcsw, nivcsw; /* context switch counts */
    153     u64 start_time;        /* monotonic time in nsec */
    154     u64 real_start_time;    /* boot based time in nsec */
    155 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
    156     unsigned long min_flt, maj_flt;
    157 
    158     struct task_cputime cputime_expires;
    159     struct list_head cpu_timers[3];
    160 
    161 /* process credentials */
    162     const struct cred __rcu *real_cred; /* objective and real subjective task
    163                      * credentials (COW) */
    164     const struct cred __rcu *cred;    /* effective (overridable) subjective task
    165                      * credentials (COW) */
    166     char comm[TASK_COMM_LEN]; /* executable name excluding path
    167                      - access with [gs]et_task_comm (which lock
    168                        it with task_lock())
    169                      - initialized normally by setup_new_exec */
    170 /* file system info */
    171     int link_count, total_link_count;
    172 #ifdef CONFIG_SYSVIPC
    173 /* ipc stuff */
    174     struct sysv_sem sysvsem;
    175     struct sysv_shm sysvshm;
    176 #endif
    177 #ifdef CONFIG_DETECT_HUNG_TASK
    178 /* hung task detection */
    179     unsigned long last_switch_count;
    180 #endif
    181 /* CPU-specific state of this task */
    182     struct thread_struct thread;
    183 /* filesystem information */
    184     struct fs_struct *fs;
    185 /* open file information */
    186     struct files_struct *files;
    187 /* namespaces */
    188     struct nsproxy *nsproxy;
    189 /* signal handlers */
    190     struct signal_struct *signal;
    191     struct sighand_struct *sighand;
    192 
    193     sigset_t blocked, real_blocked;
    194     sigset_t saved_sigmask;    /* restored if set_restore_sigmask() was used */
    195     struct sigpending pending;
    196 
    197     unsigned long sas_ss_sp;
    198     size_t sas_ss_size;
    199     int (*notifier)(void *priv);
    200     void *notifier_data;
    201     sigset_t *notifier_mask;
    202     struct callback_head *task_works;
    203 
    204     struct audit_context *audit_context;
    205 #ifdef CONFIG_AUDITSYSCALL
    206     kuid_t loginuid;
    207     unsigned int sessionid;
    208 #endif
    209     struct seccomp seccomp;
    210 
    211 /* Thread group tracking */
    212        u32 parent_exec_id;
    213        u32 self_exec_id;
    214 /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
    215  * mempolicy */
    216     spinlock_t alloc_lock;
    217 
    218     /* Protection of the PI data structures: */
    219     raw_spinlock_t pi_lock;
    220 
    221 #ifdef CONFIG_RT_MUTEXES
    222     /* PI waiters blocked on a rt_mutex held by this task */
    223     struct rb_root pi_waiters;
    224     struct rb_node *pi_waiters_leftmost;
    225     /* Deadlock detection and priority inheritance handling */
    226     struct rt_mutex_waiter *pi_blocked_on;
    227 #endif
    228 
    229 #ifdef CONFIG_DEBUG_MUTEXES
    230     /* mutex deadlock detection */
    231     struct mutex_waiter *blocked_on;
    232 #endif
    233 #ifdef CONFIG_TRACE_IRQFLAGS
    234     unsigned int irq_events;
    235     unsigned long hardirq_enable_ip;
    236     unsigned long hardirq_disable_ip;
    237     unsigned int hardirq_enable_event;
    238     unsigned int hardirq_disable_event;
    239     int hardirqs_enabled;
    240     int hardirq_context;
    241     unsigned long softirq_disable_ip;
    242     unsigned long softirq_enable_ip;
    243     unsigned int softirq_disable_event;
    244     unsigned int softirq_enable_event;
    245     int softirqs_enabled;
    246     int softirq_context;
    247 #endif
    248 #ifdef CONFIG_LOCKDEP
    249 # define MAX_LOCK_DEPTH 48UL
    250     u64 curr_chain_key;
    251     int lockdep_depth;
    252     unsigned int lockdep_recursion;
    253     struct held_lock held_locks[MAX_LOCK_DEPTH];
    254     gfp_t lockdep_reclaim_gfp;
    255 #endif
    256 
    257 /* journalling filesystem info */
    258     void *journal_info;
    259 
    260 /* stacked block device info */
    261     struct bio_list *bio_list;
    262 
    263 #ifdef CONFIG_BLOCK
    264 /* stack plugging */
    265     struct blk_plug *plug;
    266 #endif
    267 
    268 /* VM state */
    269     struct reclaim_state *reclaim_state;
    270 
    271     struct backing_dev_info *backing_dev_info;
    272 
    273     struct io_context *io_context;
    274 
    275     unsigned long ptrace_message;
    276     siginfo_t *last_siginfo; /* For ptrace use.  */
    277     struct task_io_accounting ioac;
    278 #if defined(CONFIG_TASK_XACCT)
    279     u64 acct_rss_mem1;    /* accumulated rss usage */
    280     u64 acct_vm_mem1;    /* accumulated virtual memory usage */
    281     cputime_t acct_timexpd;    /* stime + utime since last update */
    282 #endif
    283 #ifdef CONFIG_CPUSETS
    284     nodemask_t mems_allowed;    /* Protected by alloc_lock */
    285     seqcount_t mems_allowed_seq;    /* Seqence no to catch updates */
    286     int cpuset_mem_spread_rotor;
    287     int cpuset_slab_spread_rotor;
    288 #endif
    289 #ifdef CONFIG_CGROUPS
    290     /* Control Group info protected by css_set_lock */
    291     struct css_set __rcu *cgroups;
    292     /* cg_list protected by css_set_lock and tsk->alloc_lock */
    293     struct list_head cg_list;
    294 #endif
    295 #ifdef CONFIG_FUTEX
    296     struct robust_list_head __user *robust_list;
    297 #ifdef CONFIG_COMPAT
    298     struct compat_robust_list_head __user *compat_robust_list;
    299 #endif
    300     struct list_head pi_state_list;
    301     struct futex_pi_state *pi_state_cache;
    302 #endif
    303 #ifdef CONFIG_PERF_EVENTS
    304     struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
    305     struct mutex perf_event_mutex;
    306     struct list_head perf_event_list;
    307 #endif
    308 #ifdef CONFIG_DEBUG_PREEMPT
    309     unsigned long preempt_disable_ip;
    310 #endif
    311 #ifdef CONFIG_NUMA
    312     struct mempolicy *mempolicy;    /* Protected by alloc_lock */
    313     short il_next;
    314     short pref_node_fork;
    315 #endif
    316 #ifdef CONFIG_NUMA_BALANCING
    317     int numa_scan_seq;
    318     unsigned int numa_scan_period;
    319     unsigned int numa_scan_period_max;
    320     int numa_preferred_nid;
    321     unsigned long numa_migrate_retry;
    322     u64 node_stamp;            /* migration stamp  */
    323     u64 last_task_numa_placement;
    324     u64 last_sum_exec_runtime;
    325     struct callback_head numa_work;
    326 
    327     struct list_head numa_entry;
    328     struct numa_group *numa_group;
    329 
    330     /*
    331      * numa_faults is an array split into four regions:
    332      * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
    333      * in this precise order.
    334      *
    335      * faults_memory: Exponential decaying average of faults on a per-node
    336      * basis. Scheduling placement decisions are made based on these
    337      * counts. The values remain static for the duration of a PTE scan.
    338      * faults_cpu: Track the nodes the process was running on when a NUMA
    339      * hinting fault was incurred.
    340      * faults_memory_buffer and faults_cpu_buffer: Record faults per node
    341      * during the current scan window. When the scan completes, the counts
    342      * in faults_memory and faults_cpu decay and these values are copied.
    343      */
    344     unsigned long *numa_faults;
    345     unsigned long total_numa_faults;
    346 
    347     /*
    348      * numa_faults_locality tracks if faults recorded during the last
    349      * scan window were remote/local. The task scan period is adapted
    350      * based on the locality of the faults with different weights
    351      * depending on whether they were shared or private faults
    352      */
    353     unsigned long numa_faults_locality[2];
    354 
    355     unsigned long numa_pages_migrated;
    356 #endif /* CONFIG_NUMA_BALANCING */
    357 
    358     struct rcu_head rcu;
    359 
    360     /*
    361      * cache last used pipe for splice
    362      */
    363     struct pipe_inode_info *splice_pipe;
    364 
    365     struct page_frag task_frag;
    366 
    367 #ifdef    CONFIG_TASK_DELAY_ACCT
    368     struct task_delay_info *delays;
    369 #endif
    370 #ifdef CONFIG_FAULT_INJECTION
    371     int make_it_fail;
    372 #endif
    373     /*
    374      * when (nr_dirtied >= nr_dirtied_pause), it's time to call
    375      * balance_dirty_pages() for some dirty throttling pause
    376      */
    377     int nr_dirtied;
    378     int nr_dirtied_pause;
    379     unsigned long dirty_paused_when; /* start of a write-and-pause period */
    380 
    381 #ifdef CONFIG_LATENCYTOP
    382     int latency_record_count;
    383     struct latency_record latency_record[LT_SAVECOUNT];
    384 #endif
    385     /*
    386      * time slack values; these are used to round up poll() and
    387      * select() etc timeout values. These are in nanoseconds.
    388      */
    389     unsigned long timer_slack_ns;
    390     unsigned long default_timer_slack_ns;
    391 
    392 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
    393     /* Index of current stored address in ret_stack */
    394     int curr_ret_stack;
    395     /* Stack of return addresses for return function tracing */
    396     struct ftrace_ret_stack    *ret_stack;
    397     /* time stamp for last schedule */
    398     unsigned long long ftrace_timestamp;
    399     /*
    400      * Number of functions that haven't been traced
    401      * because of depth overrun.
    402      */
    403     atomic_t trace_overrun;
    404     /* Pause for the tracing */
    405     atomic_t tracing_graph_pause;
    406 #endif
    407 #ifdef CONFIG_TRACING
    408     /* state flags for use by tracers */
    409     unsigned long trace;
    410     /* bitmask and counter of trace recursion */
    411     unsigned long trace_recursion;
    412 #endif /* CONFIG_TRACING */
    413 #ifdef CONFIG_MEMCG
    414     struct memcg_oom_info {
    415         struct mem_cgroup *memcg;
    416         gfp_t gfp_mask;
    417         int order;
    418         unsigned int may_oom:1;
    419     } memcg_oom;
    420 #endif
    421 #ifdef CONFIG_UPROBES
    422     struct uprobe_task *utask;
    423 #endif
    424 #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
    425     unsigned int    sequential_io;
    426     unsigned int    sequential_io_avg;
    427 #endif
    428 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
    429     unsigned long    task_state_change;
    430 #endif
    431 };
    View Code

    愚蠢的问题1:

    MMU是由硬件实现的专门为解决虚拟地址和物理地址映射问题而设计的部件,那么为什么要在linux的源代码中体现呢?为什么在要在软件中再描述一次呢?

    虚拟地址到物理地址的映射,(目前而讲)需要4级页表索引的访问来完成。在mm_struct结构体中的定义之中有一个pdg_t类型的指针名叫pgd(PageGlobalDirectory),由此出发继续向下级访问有pud(PageUpperDirectory)pmd(PageMiddleDirectory)pte(PageTableEntry),最后一级是具体的页表很遗憾的是,我暂时没有在3.19内核的源码中找到关于pte_t的定义,但是根据书籍上的描述应该是一个指向struct page数组的指针。

    于是我们可以这样总结,程序在执行的过程会有大量的跳转的过程,而每次的跳转需要一个操作数即地址,这个地址是一个虚拟地址,然后根据该虚拟地址进行MMU的操作,过程中得到一个页表,首先根据页表判断该页是否已经存在于物理内存中,如果不是的话则进行一次交换的操作,上文已经阐述过该过程,页交换完成之后,寻址过程就得以继续进行了,此时使用相同的虚拟地址访问到的是另一个物理页面,即交换进入的物理页面。

    愚蠢的问题2:

    虚拟内存的机制像是把物理内存和外部存储容量共同地址编码,这个共同的编码就是虚拟地址,所谓“编码”过程不一定是顺序一对一的,但是虚拟地址和页表的索引之间一定是个满射关系。

    这是我最初对于虚拟内存机制的理解,表面看起来没有什么问题,可还是当考虑每个进程的寻址空间独立性的时候就会发现问题,相同的地址在两个进程中映射外部地址应该可以是不相同的,可是一旦将他们看作共同地址编码,就不会有相同的逻辑地址映射到不同的物理地址这回事了。

    其实答案很简单一句话:每个进程维护一个页表 !

     

    最后一张大图概括一下上文

     

  • 相关阅读:
    Problem C: 爬楼梯
    Problem E: 倒水(Water)
    Problem H: tmk买礼物
    HDU 1078 FatMouse and Cheese
    POJ 3186 Treats for the Cows
    POJ 1661 Help Jimmy
    POJ 1458 Common Subsequence
    2018-软工机试-D-定西
    2018-软工机试-F-庙会
    2018-软工机试-C-和你在一起
  • 原文地址:https://www.cnblogs.com/guguli/p/4489272.html
Copyright © 2020-2023  润新知