• Linux内核学习第六周 进程描述与进程创建


    1.task_struct的数据结构

    1235struct task_struct {
    1236    volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
    1237    void *stack;
    1238    atomic_t usage;
    1239    unsigned int flags;    /* per process flags, defined below */
    1240    unsigned int ptrace;
    1241
    1242#ifdef CONFIG_SMP
    1243    struct llist_node wake_entry;
    1244    int on_cpu;
    1245    struct task_struct *last_wakee;
    1246    unsigned long wakee_flips;
    1247    unsigned long wakee_flip_decay_ts;
    1248
    1249    int wake_cpu;
    1250#endif
    1251    int on_rq;
    1252
    1253    int prio, static_prio, normal_prio;
    1254    unsigned int rt_priority;
    1255    const struct sched_class *sched_class;
    1256    struct sched_entity se;
    1257    struct sched_rt_entity rt;
    1258#ifdef CONFIG_CGROUP_SCHED
    1259    struct task_group *sched_task_group;
    1260#endif
    1261    struct sched_dl_entity dl;
    1262
    1263#ifdef CONFIG_PREEMPT_NOTIFIERS
    1264    /* list of struct preempt_notifier: */
    1265    struct hlist_head preempt_notifiers;
    1266#endif
    1267
    1268#ifdef CONFIG_BLK_DEV_IO_TRACE
    1269    unsigned int btrace_seq;
    1270#endif
    1271
    1272    unsigned int policy;
    1273    int nr_cpus_allowed;
    1274    cpumask_t cpus_allowed;
    1275
    1276#ifdef CONFIG_PREEMPT_RCU
    1277    int rcu_read_lock_nesting;
    1278    union rcu_special rcu_read_unlock_special;
    1279    struct list_head rcu_node_entry;
    1280#endif /* #ifdef CONFIG_PREEMPT_RCU */
    1281#ifdef CONFIG_TREE_PREEMPT_RCU
    1282    struct rcu_node *rcu_blocked_node;
    1283#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
    1284#ifdef CONFIG_TASKS_RCU
    1285    unsigned long rcu_tasks_nvcsw;
    1286    bool rcu_tasks_holdout;
    1287    struct list_head rcu_tasks_holdout_list;
    1288    int rcu_tasks_idle_cpu;
    1289#endif /* #ifdef CONFIG_TASKS_RCU */
    1290
    1291#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
    1292    struct sched_info sched_info;
    1293#endif
    1294
    1295    struct list_head tasks;
    1296#ifdef CONFIG_SMP
    1297    struct plist_node pushable_tasks;
    1298    struct rb_node pushable_dl_tasks;
    1299#endif
    1300
    1301    struct mm_struct *mm, *active_mm;
    1302#ifdef CONFIG_COMPAT_BRK
    1303    unsigned brk_randomized:1;
    1304#endif
    1305    /* per-thread vma caching */
    1306    u32 vmacache_seqnum;
    1307    struct vm_area_struct *vmacache[VMACACHE_SIZE];
    1308#if defined(SPLIT_RSS_COUNTING)
    1309    struct task_rss_stat    rss_stat;
    1310#endif
    1311/* task state */
    1312    int exit_state;
    1313    int exit_code, exit_signal;
    1314    int pdeath_signal;  /*  The signal sent when the parent dies  */
    1315    unsigned int jobctl;    /* JOBCTL_*, siglock protected */
    1316
    1317    /* Used for emulating ABI behavior of previous Linux versions */
    1318    unsigned int personality;
    1319
    1320    unsigned in_execve:1;    /* Tell the LSMs that the process is doing an
    1321                 * execve */
    1322    unsigned in_iowait:1;
    1323
    1324    /* Revert to default priority/policy when forking */
    1325    unsigned sched_reset_on_fork:1;
    1326    unsigned sched_contributes_to_load:1;
    1327
    1328    unsigned long atomic_flags; /* Flags needing atomic access. */
    1329
    1330    pid_t pid;
    1331    pid_t tgid;
    1332
    1333#ifdef CONFIG_CC_STACKPROTECTOR
    1334    /* Canary value for the -fstack-protector gcc feature */
    1335    unsigned long stack_canary;
    1336#endif
    1337    /*
    1338     * pointers to (original) parent process, youngest child, younger sibling,
    1339     * older sibling, respectively.  (p->father can be replaced with
    1340     * p->real_parent->pid)
    1341     */
    1342    struct task_struct __rcu *real_parent; /* real parent process */
    1343    struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
    1344    /*
    1345     * children/sibling forms the list of my natural children
    1346     */
    1347    struct list_head children;    /* list of my children */
    1348    struct list_head sibling;    /* linkage in my parent's children list */
    1349    struct task_struct *group_leader;    /* threadgroup leader */
    1350
    1351    /*
    1352     * ptraced is the list of tasks this task is using ptrace on.
    1353     * This includes both natural children and PTRACE_ATTACH targets.
    1354     * p->ptrace_entry is p's link on the p->parent->ptraced list.
    1355     */
    1356    struct list_head ptraced;
    1357    struct list_head ptrace_entry;
    1358
    1359    /* PID/PID hash table linkage. */
    1360    struct pid_link pids[PIDTYPE_MAX];
    1361    struct list_head thread_group;
    1362    struct list_head thread_node;
    1363
    1364    struct completion *vfork_done;        /* for vfork() */
    1365    int __user *set_child_tid;        /* CLONE_CHILD_SETTID */
    1366    int __user *clear_child_tid;        /* CLONE_CHILD_CLEARTID */
    1367
    1368    cputime_t utime, stime, utimescaled, stimescaled;
    1369    cputime_t gtime;
    1370#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
    1371    struct cputime prev_cputime;
    1372#endif
    1373#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
    1374    seqlock_t vtime_seqlock;
    1375    unsigned long long vtime_snap;
    1376    enum {
    1377        VTIME_SLEEPING = 0,
    1378        VTIME_USER,
    1379        VTIME_SYS,
    1380    } vtime_snap_whence;
    1381#endif
    1382    unsigned long nvcsw, nivcsw; /* context switch counts */
    1383    u64 start_time;        /* monotonic time in nsec */
    1384    u64 real_start_time;    /* boot based time in nsec */
    1385/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
    1386    unsigned long min_flt, maj_flt;
    1387
    1388    struct task_cputime cputime_expires;
    1389    struct list_head cpu_timers[3];
    1390
    1391/* process credentials */
    1392    const struct cred __rcu *real_cred; /* objective and real subjective task
    1393                     * credentials (COW) */
    1394    const struct cred __rcu *cred;    /* effective (overridable) subjective task
    1395                     * credentials (COW) */
    1396    char comm[TASK_COMM_LEN]; /* executable name excluding path
    1397                     - access with [gs]et_task_comm (which lock
    1398                       it with task_lock())
    1399                     - initialized normally by setup_new_exec */
    1400/* file system info */
    1401    int link_count, total_link_count;
    1402#ifdef CONFIG_SYSVIPC
    1403/* ipc stuff */
    1404    struct sysv_sem sysvsem;
    1405    struct sysv_shm sysvshm;
    1406#endif
    1407#ifdef CONFIG_DETECT_HUNG_TASK
    1408/* hung task detection */
    1409    unsigned long last_switch_count;
    1410#endif
    1411/* CPU-specific state of this task */
    1412    struct thread_struct thread;
    1413/* filesystem information */
    1414    struct fs_struct *fs;
    1415/* open file information */
    1416    struct files_struct *files;
    1417/* namespaces */
    1418    struct nsproxy *nsproxy;
    1419/* signal handlers */
    1420    struct signal_struct *signal;
    1421    struct sighand_struct *sighand;
    1422
    1423    sigset_t blocked, real_blocked;
    1424    sigset_t saved_sigmask;    /* restored if set_restore_sigmask() was used */
    1425    struct sigpending pending;
    1426
    1427    unsigned long sas_ss_sp;
    1428    size_t sas_ss_size;
    1429    int (*notifier)(void *priv);
    1430    void *notifier_data;
    1431    sigset_t *notifier_mask;
    1432    struct callback_head *task_works;
    1433
    1434    struct audit_context *audit_context;
    1435#ifdef CONFIG_AUDITSYSCALL
    1436    kuid_t loginuid;
    1437    unsigned int sessionid;
    1438#endif
    1439    struct seccomp seccomp;
    1440
    1441/* Thread group tracking */
    1442       u32 parent_exec_id;
    1443       u32 self_exec_id;
    1444/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
    1445 * mempolicy */
    1446    spinlock_t alloc_lock;
    1447
    1448    /* Protection of the PI data structures: */
    1449    raw_spinlock_t pi_lock;
    1450
    1451#ifdef CONFIG_RT_MUTEXES
    1452    /* PI waiters blocked on a rt_mutex held by this task */
    1453    struct rb_root pi_waiters;
    1454    struct rb_node *pi_waiters_leftmost;
    1455    /* Deadlock detection and priority inheritance handling */
    1456    struct rt_mutex_waiter *pi_blocked_on;
    1457#endif
    1458
    1459#ifdef CONFIG_DEBUG_MUTEXES
    1460    /* mutex deadlock detection */
    1461    struct mutex_waiter *blocked_on;
    1462#endif
    1463#ifdef CONFIG_TRACE_IRQFLAGS
    1464    unsigned int irq_events;
    1465    unsigned long hardirq_enable_ip;
    1466    unsigned long hardirq_disable_ip;
    1467    unsigned int hardirq_enable_event;
    1468    unsigned int hardirq_disable_event;
    1469    int hardirqs_enabled;
    1470    int hardirq_context;
    1471    unsigned long softirq_disable_ip;
    1472    unsigned long softirq_enable_ip;
    1473    unsigned int softirq_disable_event;
    1474    unsigned int softirq_enable_event;
    1475    int softirqs_enabled;
    1476    int softirq_context;
    1477#endif
    1478#ifdef CONFIG_LOCKDEP
    1479# define MAX_LOCK_DEPTH 48UL
    1480    u64 curr_chain_key;
    1481    int lockdep_depth;
    1482    unsigned int lockdep_recursion;
    1483    struct held_lock held_locks[MAX_LOCK_DEPTH];
    1484    gfp_t lockdep_reclaim_gfp;
    1485#endif
    1486
    1487/* journalling filesystem info */
    1488    void *journal_info;
    1489
    1490/* stacked block device info */
    1491    struct bio_list *bio_list;
    1492
    1493#ifdef CONFIG_BLOCK
    1494/* stack plugging */
    1495    struct blk_plug *plug;
    1496#endif
    1497
    1498/* VM state */
    1499    struct reclaim_state *reclaim_state;
    1500
    1501    struct backing_dev_info *backing_dev_info;
    1502
    1503    struct io_context *io_context;
    1504
    1505    unsigned long ptrace_message;
    1506    siginfo_t *last_siginfo; /* For ptrace use.  */
    1507    struct task_io_accounting ioac;
    1508#if defined(CONFIG_TASK_XACCT)
    1509    u64 acct_rss_mem1;    /* accumulated rss usage */
    1510    u64 acct_vm_mem1;    /* accumulated virtual memory usage */
    1511    cputime_t acct_timexpd;    /* stime + utime since last update */
    1512#endif
    1513#ifdef CONFIG_CPUSETS
    1514    nodemask_t mems_allowed;    /* Protected by alloc_lock */
    1515    seqcount_t mems_allowed_seq;    /* Seqence no to catch updates */
    1516    int cpuset_mem_spread_rotor;
    1517    int cpuset_slab_spread_rotor;
    1518#endif
    1519#ifdef CONFIG_CGROUPS
    1520    /* Control Group info protected by css_set_lock */
    1521    struct css_set __rcu *cgroups;
    1522    /* cg_list protected by css_set_lock and tsk->alloc_lock */
    1523    struct list_head cg_list;
    1524#endif
    1525#ifdef CONFIG_FUTEX
    1526    struct robust_list_head __user *robust_list;
    1527#ifdef CONFIG_COMPAT
    1528    struct compat_robust_list_head __user *compat_robust_list;
    1529#endif
    1530    struct list_head pi_state_list;
    1531    struct futex_pi_state *pi_state_cache;
    1532#endif
    1533#ifdef CONFIG_PERF_EVENTS
    1534    struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
    1535    struct mutex perf_event_mutex;
    1536    struct list_head perf_event_list;
    1537#endif
    1538#ifdef CONFIG_DEBUG_PREEMPT
    1539    unsigned long preempt_disable_ip;
    1540#endif
    1541#ifdef CONFIG_NUMA
    1542    struct mempolicy *mempolicy;    /* Protected by alloc_lock */
    1543    short il_next;
    1544    short pref_node_fork;
    1545#endif
    1546#ifdef CONFIG_NUMA_BALANCING
    1547    int numa_scan_seq;
    1548    unsigned int numa_scan_period;
    1549    unsigned int numa_scan_period_max;
    1550    int numa_preferred_nid;
    1551    unsigned long numa_migrate_retry;
    1552    u64 node_stamp;            /* migration stamp  */
    1553    u64 last_task_numa_placement;
    1554    u64 last_sum_exec_runtime;
    1555    struct callback_head numa_work;
    1556
    1557    struct list_head numa_entry;
    1558    struct numa_group *numa_group;
    1559
    1560    /*
    1561     * Exponential decaying average of faults on a per-node basis.
    1562     * Scheduling placement decisions are made based on the these counts.
    1563     * The values remain static for the duration of a PTE scan
    1564     */
    1565    unsigned long *numa_faults_memory;
    1566    unsigned long total_numa_faults;
    1567
    1568    /*
    1569     * numa_faults_buffer records faults per node during the current
    1570     * scan window. When the scan completes, the counts in
    1571     * numa_faults_memory decay and these values are copied.
    1572     */
    1573    unsigned long *numa_faults_buffer_memory;
    1574
    1575    /*
    1576     * Track the nodes the process was running on when a NUMA hinting
    1577     * fault was incurred.
    1578     */
    1579    unsigned long *numa_faults_cpu;
    1580    unsigned long *numa_faults_buffer_cpu;
    1581
    1582    /*
    1583     * numa_faults_locality tracks if faults recorded during the last
    1584     * scan window were remote/local. The task scan period is adapted
    1585     * based on the locality of the faults with different weights
    1586     * depending on whether they were shared or private faults
    1587     */
    1588    unsigned long numa_faults_locality[2];
    1589
    1590    unsigned long numa_pages_migrated;
    1591#endif /* CONFIG_NUMA_BALANCING */
    1592
    1593    struct rcu_head rcu;
    1594
    1595    /*
    1596     * cache last used pipe for splice
    1597     */
    1598    struct pipe_inode_info *splice_pipe;
    1599
    1600    struct page_frag task_frag;
    1601
    1602#ifdef    CONFIG_TASK_DELAY_ACCT
    1603    struct task_delay_info *delays;
    1604#endif
    1605#ifdef CONFIG_FAULT_INJECTION
    1606    int make_it_fail;
    1607#endif
    1608    /*
    1609     * when (nr_dirtied >= nr_dirtied_pause), it's time to call
    1610     * balance_dirty_pages() for some dirty throttling pause
    1611     */
    1612    int nr_dirtied;
    1613    int nr_dirtied_pause;
    1614    unsigned long dirty_paused_when; /* start of a write-and-pause period */
    1615
    1616#ifdef CONFIG_LATENCYTOP
    1617    int latency_record_count;
    1618    struct latency_record latency_record[LT_SAVECOUNT];
    1619#endif
    1620    /*
    1621     * time slack values; these are used to round up poll() and
    1622     * select() etc timeout values. These are in nanoseconds.
    1623     */
    1624    unsigned long timer_slack_ns;
    1625    unsigned long default_timer_slack_ns;
    1626
    1627#ifdef CONFIG_FUNCTION_GRAPH_TRACER
    1628    /* Index of current stored address in ret_stack */
    1629    int curr_ret_stack;
    1630    /* Stack of return addresses for return function tracing */
    1631    struct ftrace_ret_stack    *ret_stack;
    1632    /* time stamp for last schedule */
    1633    unsigned long long ftrace_timestamp;
    1634    /*
    1635     * Number of functions that haven't been traced
    1636     * because of depth overrun.
    1637     */
    1638    atomic_t trace_overrun;
    1639    /* Pause for the tracing */
    1640    atomic_t tracing_graph_pause;
    1641#endif
    1642#ifdef CONFIG_TRACING
    1643    /* state flags for use by tracers */
    1644    unsigned long trace;
    1645    /* bitmask and counter of trace recursion */
    1646    unsigned long trace_recursion;
    1647#endif /* CONFIG_TRACING */
    1648#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
    1649    unsigned int memcg_kmem_skip_account;
    1650    struct memcg_oom_info {
    1651        struct mem_cgroup *memcg;
    1652        gfp_t gfp_mask;
    1653        int order;
    1654        unsigned int may_oom:1;
    1655    } memcg_oom;
    1656#endif
    1657#ifdef CONFIG_UPROBES
    1658    struct uprobe_task *utask;
    1659#endif
    1660#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
    1661    unsigned int    sequential_io;
    1662    unsigned int    sequential_io_avg;
    1663#endif
    1664};
    task_struct

     2.进程创建

    Linux 系统中fork 、vfork 、clone等函数都可以用来创建一个新的进程,调用函数如下所示:

    SYSCALL_DEFINE0(fork) 
    { 
      return do_fork(SIGCHLD, 0, 0, NULL, NULL);
    }
    
    SYSCALL_DEFINE0(vfork)
    { 
      return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 0, NULL, NULL);
    }
    
    SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, 
                    int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val)
    {
      return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
    }

    可以看到,fork 、vfork 、clone 等函数对应的系统调用,也都是调用了 do_fork 函数实现的。在do_fork函数中,真正实现复制的是copy_process函数:

    
    
    p = copy_process(clone_flags, stack_start, stack_size,child_tidptr, NULL, trace);
    p = dup_task_struct(current);创建内核栈
    retval = security_task_create(clone_flags);
    retval = sched_fork(clone_flags, p);和调度相关的设置,cpu将调度这个task
    retval = copy_thread(clone_flags, stack_start, stack_size, p); 复制父进程堆栈的内容到子进程的堆栈中去.

    这其中,copy_thread函数中的语句p->thread.ip = (unsigned long) ret_from_fork决定了新进程的第一条指令地址。

    创建栈函数dup_task_struct:

    tsk = alloc_task_struct_node(node);开辟内存空间
    ti = alloc_thread_info_node(tsk, node);ti指向thread_info的首地址,同时也是系统为新进程分配的两个连续页面的首地址。
    err = arch_dup_task_struct(tsk, orig);复制父进程的task_struct信息到新的task_struct里
    tsk->stack = ti;task对应栈
    setup_thread_stack(tsk, orig);初始化thread info结构
    set_task_stack_end_magic(tsk);栈结束的地址设置数据为栈结束标示

    3.新进程开始执行

    在之前的分析中,谈到copy_process中的copy_thread()函数,正是这个函数决定了子进程从系统调用中返回后的执行.ret_from_fork决定了新进程的第一条指令地址。p->thread.ip = (unsigned long)ret_from_fork;将子进程的ip设置为ret_from_fork的首地址,子进程从ret_from_fork开始执行。

    4.执行起点与内核堆栈如何保持一致

    • 在ret_from_fork之前,也就是在copy_thread()函数中*childregs = *current_pt_regs();该句将父进程的regs参数赋值到子进程的内核堆栈,
    • *childregs的类型为pt_regs,里面存放了SAVE ALL中压入栈的参数
    •  故在之后的RESTORE ALL中能顺利执行下去
    5.总结
    linux通过复制父进程创建子进程,linux为每个新创建的进程动态分配一个task_struct结构,fork被调用一次,返回两次。
  • 相关阅读:
    算法分析(3)——冒泡排序真的慢吗?
    算法分析(2)——大O和大Θ
    算法分析(1)——数据的影响和函数的增长
    递归的逻辑(5)——米诺斯的迷宫
    递归的逻辑(4)——递归与分形
    李洪强和你一起学习前端之(9)规避脱标,CSS可见性,滑动门案例
    iOS应用管理(优化)
    iOS开发-应用管理
    iOS开发-Tom猫
    iOS 10.3.3 更新背后的故事
  • 原文地址:https://www.cnblogs.com/zmingh/p/5350748.html
Copyright © 2020-2023  润新知