• dpdk rte_memzone_reserve


    [root@localhost dpdk-19.11]# cat   /sys/devices/system/node/node*/hugepages/hugepages-2048kB/nr_hugepages 
    0
    0
    0
    0
    [root@localhost dpdk-19.11]# cat   /sys/devices/system/node/node*/hugepages/hugepages-524288kB/nr_hugepages 
    64
    64
    64
    64
    [root@localhost dpdk-19.11]# 
    Breakpoint 1, main (argc=4, argv=0xfffffffff518) at /data1/dpdk-19.11/demo/memzone/main.c:45
    45                   mz = rte_memzone_reserve("memzone", sizeof(int)*2,
    (gdb) s
    rte_memzone_reserve (name=0xba6358 "memzone", len=8, socket_id=-1, flags=0)
        at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_memzone.c:240
    240             return rte_memzone_reserve_thread_safe(name, len, socket_id,
    (gdb) s
    rte_memzone_reserve_thread_safe (name=0xba6358 "memzone", len=8, socket_id=-1, flags=0, align=128, bound=0)
        at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_memzone.c:192
    192             const struct rte_memzone *mz = NULL;
    (gdb) list
    187     static const struct rte_memzone *
    188     rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id,
    189                     unsigned int flags, unsigned int align, unsigned int bound)
    190     {
    191             struct rte_mem_config *mcfg;
    192             const struct rte_memzone *mz = NULL;
    193
    194             /* get pointer to global configuration */
    195             mcfg = rte_eal_get_configuration()->mem_config;
    196
    (gdb) n
    195             mcfg = rte_eal_get_configuration()->mem_config;
    (gdb) p *mcfg
    Cannot access memory at address 0xfffffffff380
    (gdb) n
    197             rte_rwlock_write_lock(&mcfg->mlock);
    (gdb) p *mcfg
    $1 = {magic = 19820526, version = 319488099, nchannel = 0, nrank = 0, mlock = {cnt = 0}, qlock = {cnt = 0}, 
      mplock = {cnt = 0}, tlock = {locked = 0}, memory_hotplug_lock = {cnt = 0}, memzones = {
        name = "memzone", '00' <repeats 56 times>, count = 143, len = 2560, elt_sz = 72, data = 0x100010000, 
        rwlock = {cnt = 0}}, memsegs = {{{base_va = 0x120000000, addr_64 = 4831838208}, page_sz = 536870912, 
          socket_id = 0, version = 1, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-0-0", '00' <repeats 45 times>, count = 1, len = 64, elt_sz = 48, 
            data = 0x100040000, rwlock = {cnt = 0}}}, {{base_va = 0x940000000, addr_64 = 39728447488}, 
          page_sz = 536870912, socket_id = 0, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-0-1", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x920000000, rwlock = {cnt = 0}}}, {{base_va = 0x1160000000, addr_64 = 74625056768}, 
          page_sz = 536870912, socket_id = 0, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-0-2", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x1140000000, rwlock = {cnt = 0}}}, {{base_va = 0x1980000000, addr_64 = 109521666048}, 
          page_sz = 536870912, socket_id = 0, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-0-3", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x1960000000, rwlock = {cnt = 0}}}, {{base_va = 0x21a0000000, addr_64 = 144418275328}, 
          page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-1-0", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x2180000000, rwlock = {cnt = 0}}}, {{base_va = 0x29c0000000, addr_64 = 179314884608}, 
          page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-1-1", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x29a0000000, rwlock = {cnt = 0}}}, {{base_va = 0x31e0000000, addr_64 = 214211493888}, 
          page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-1-2", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x31c0000000, rwlock = {cnt = 0}}}, {{base_va = 0x3a00000000, addr_64 = 249108103168}, 
          page_sz = 536870912, socket_id = 1, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-1-3", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x39e0000000, rwlock = {cnt = 0}}}, {{base_va = 0x4220000000, addr_64 = 284004712448}, 
          page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-2-0", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x4200000000, rwlock = {cnt = 0}}}, {{base_va = 0x4a40000000, addr_64 = 318901321728}, 
    ---Type <return> to continue, or q <return> to quit---
          page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-2-1", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x4a20000000, rwlock = {cnt = 0}}}, {{base_va = 0x5260000000, addr_64 = 353797931008}, 
          page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-2-2", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x5240000000, rwlock = {cnt = 0}}}, {{base_va = 0x5a80000000, addr_64 = 388694540288}, 
          page_sz = 536870912, socket_id = 2, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-2-3", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x5a60000000, rwlock = {cnt = 0}}}, {{base_va = 0x62a0000000, addr_64 = 423591149568}, 
          page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-3-0", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x6280000000, rwlock = {cnt = 0}}}, {{base_va = 0x6ac0000000, addr_64 = 458487758848}, 
          page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-3-1", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x6aa0000000, rwlock = {cnt = 0}}}, {{base_va = 0x72e0000000, addr_64 = 493384368128}, 
          page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-3-2", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x72c0000000, rwlock = {cnt = 0}}}, {{base_va = 0x7b00000000, addr_64 = 528280977408}, 
          page_sz = 536870912, socket_id = 3, version = 0, len = 34359738368, external = 0, heap = 1, memseg_arr = {
            name = "memseg-524288k-3-3", '00' <repeats 45 times>, count = 0, len = 64, elt_sz = 48, 
            data = 0x7ae0000000, rwlock = {cnt = 0}}}, {{base_va = 0x0, addr_64 = 0}, page_sz = 0, socket_id = 0, 
          version = 0, len = 0, external = 0, heap = 0, memseg_arr = {name = '00' <repeats 63 times>, count = 0, 
            len = 0, elt_sz = 0, data = 0x0, rwlock = {cnt = 0}}} <repeats 48 times>}, tailq_head = {{tailq_head = {
            tqh_first = 0x0, tqh_last = 0x100002288}, name = "RTE_LPM", '00' <repeats 24 times>}, {tailq_head = {
            tqh_first = 0x0, tqh_last = 0x1000022b8}, name = "RTE_LPM6", '00' <repeats 23 times>}, {tailq_head = {
            tqh_first = 0x0, tqh_last = 0x1000022e8}, name = "RTE_ACL", '00' <repeats 24 times>}, {tailq_head = {
            tqh_first = 0x13ff79c00, tqh_last = 0x13ff79c00}, name = "RTE_HASH", '00' <repeats 23 times>}, {
          tailq_head = {tqh_first = 0x0, tqh_last = 0x100002348}, name = "RTE_FBK_HASH", '00' <repeats 19 times>}, 
        {tailq_head = {tqh_first = 0x0, tqh_last = 0x100002378}, name = "RTE_MEMBER", '00' <repeats 21 times>}, {
          tailq_head = {tqh_first = 0x0, tqh_last = 0x1000023a8}, 
          name = "RTE_MBUF_DYNFIELD", '00' <repeats 14 times>}, {tailq_head = {tqh_first = 0x0, 
    ---Type <return> to continue, or q <return> to quit---
            tqh_last = 0x1000023d8}, name = "RTE_MBUF_DYNFLAG", '00' <repeats 15 times>}, {tailq_head = {
            tqh_first = 0x0, tqh_last = 0x100002408}, name = "RTE_EVENT_RING", '00' <repeats 17 times>}, {
          tailq_head = {tqh_first = 0x13febd800, tqh_last = 0x13febd800}, 
          name = "RTE_MEMPOOL", '00' <repeats 20 times>}, {tailq_head = {tqh_first = 0x0, tqh_last = 0x100002468}, 
          name = "RTE_STACK", '00' <repeats 22 times>}, {tailq_head = {tqh_first = 0x13ff9a080, 
            tqh_last = 0x13febd500}, name = "RTE_RING", '00' <repeats 23 times>}, {tailq_head = {tqh_first = 0x0, 
            tqh_last = 0x1000024c8}, name = "RTE_REORDER", '00' <repeats 20 times>}, {tailq_head = {
            tqh_first = 0x0, tqh_last = 0x1000024f8}, name = "RTE_KNI", '00' <repeats 24 times>}, {tailq_head = {
            tqh_first = 0x13ffd4d80, tqh_last = 0x13ffd4d80}, 
          name = "VFIO_RESOURCE_LIST", '00' <repeats 13 times>}, {tailq_head = {tqh_first = 0x0, 
            tqh_last = 0x100002558}, name = "UIO_RESOURCE_LIST", '00' <repeats 14 times>}, {tailq_head = {
            tqh_first = 0x0, tqh_last = 0x100002588}, name = "VMBUS_RESOURCE_LIST", '00' <repeats 12 times>}, {
          tailq_head = {tqh_first = 0x0, tqh_last = 0x0}, name = '00' <repeats 31 times>} <repeats 15 times>}, 
      malloc_heaps = {{lock = {locked = 0}, free_head = {{lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x0}, {
              lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x13fe81000}, {lh_first = 0x0}, {lh_first = 0x0}, {
              lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x120000000}, {lh_first = 0x0}}, 
          first = 0x120000000, last = 0x13fffdf80, alloc_count = 259, socket_id = 0, total_size = 536870912, 
          name = "socket_0", '00' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{
              lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 1, 
          total_size = 0, name = "socket_1", '00' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{
              lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 2, 
          total_size = 0, name = "socket_2", '00' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{
              lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 3, 
          total_size = 0, name = "socket_3", '00' <repeats 23 times>}, {lock = {locked = 0}, free_head = {{
              lh_first = 0x0} <repeats 13 times>}, first = 0x0, last = 0x0, alloc_count = 0, socket_id = 0, 
          total_size = 0, name = '00' <repeats 31 times>} <repeats 28 times>}, next_socket_id = 256, 
      mem_cfg_addr = 4294967296, legacy_mem = 0, single_file_segments = 0, tsc_hz = 100000000, 
      dma_maskbits = 0 '00'}
    (gdb) 
    (gdb) n
    199             mz = memzone_reserve_aligned_thread_unsafe(
    (gdb) s
    memzone_reserve_aligned_thread_unsafe (name=0xba6358 "memzone", len=8, socket_id=-1, flags=0, align=128, bound=0)
        at /data1/dpdk-19.11/lib/librte_eal/common/eal_common_memzone.c:69
    69              mcfg = rte_eal_get_configuration()->mem_config;
    (gdb) n
    70              arr = &mcfg->memzones;
    (gdb) n
    73              if (arr->count >= arr->len) {
    (gdb) p *arr
    $2 = {name = "memzone", '00' <repeats 56 times>, count = 143, len = 2560, elt_sz = 72, data = 0x100010000, 
      rwlock = {cnt = 0}}
    (gdb) n
    79              if (strlen(name) > sizeof(mz->name) - 1) {
    (gdb) n
    87              if ((memzone_lookup_thread_unsafe(name)) != NULL) {
    (gdb) n
    95              if (align && !rte_is_power_of_2(align)) {
    (gdb) n
    103             if (align < RTE_CACHE_LINE_SIZE)
    (gdb) n
    107             if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
    (gdb) n
    112             len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE);
    (gdb) n
    115             requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE,  len);
    (gdb) n
    118             if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) {
    (gdb) n
    123             if ((socket_id != SOCKET_ID_ANY) && socket_id < 0) {
    (gdb) n
    131             if (!rte_eal_has_hugepages() && socket_id < RTE_MAX_NUMA_NODES)
    (gdb) n
    134             contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0;
    (gdb) n
    136             flags &= ~RTE_MEMZONE_IOVA_CONTIG;
    (gdb) n
    138             if (len == 0 && bound == 0) {
    (gdb) n
    144                     if (len == 0)
    (gdb) n
    147                     mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,            ---------------------mz_addr从heap分配
    (gdb) n
    150             if (mz_addr == NULL) {
    (gdb) n
    155             struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
    (gdb) n
    158             mz_idx = rte_fbarray_find_next_free(arr, 0);
    (gdb) n
    160             if (mz_idx < 0) {
    (gdb) n
    163                     rte_fbarray_set_used(arr, mz_idx);
    (gdb) n
    164                     mz = rte_fbarray_get(arr, mz_idx);
    (gdb) p *mz
    $3 = {name = "356o.01c00v230000000000000000377377377377", '00' <repeats 11 times>, 
      {phys_addr = 0, iova = 0}, {addr = 0x656e6f7a6d656d, addr_64 = 28550397722191213}, len = 0, hugepage_sz = 0, 
      socket_id = 0, flags = 0}
    (gdb) n
    167             if (mz == NULL) {
    (gdb) n
    174             strlcpy(mz->name, name, sizeof(mz->name));
    (gdb) n
    175             mz->iova = rte_malloc_virt2iova(mz_addr);
    (gdb) n
    176             mz->addr = mz_addr;
    (gdb) n
    178                             elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
    (gdb) n
    177             mz->len = requested_len == 0 ?
    (gdb) n
    180             mz->hugepage_sz = elem->msl->page_sz;
    (gdb) n
    181             mz->socket_id = elem->msl->socket_id;
    (gdb) p *elem
    $4 = {heap = 0x100002900, prev = 0x13fe81000, next = 0x13febc800, free_list = {le_next = 0x0, le_prev = 0x0}, 
      msl = 0x100000088, state = ELEM_BUSY, pad = 0, size = 256, orig_elem = 0x120000000, orig_size = 536870912}
    (gdb) n
    182             mz->flags = 0;
    (gdb) n
    184             return mz;
    (gdb) p *mz
    $5 = {name = "memzone", '00' <repeats 24 times>, {phys_addr = 261454808960, iova = 261454808960}, {
        addr = 0x13febc780, addr_64 = 5367383936}, len = 128, hugepage_sz = 536870912, socket_id = 0, flags = 0}
    (gdb) c
    static const struct rte_memzone *
    memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
            int socket_id, unsigned flags, unsigned align, unsigned bound)
    {
        struct rte_memzone *mz;
        struct rte_mem_config *mcfg;
        size_t requested_len;
        int socket, i;
    
        /* 获取全局变量rte_mem_config结构的指针 */
        mcfg = rte_eal_get_configuration()->mem_config;
    
        /* no more room in config */
        /*如果分配的memzone数量已经超过了最大值,则返错(数组大小是有限的)*/
        if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) {
            RTE_LOG(ERR, EAL, "%s(): No more room in config
    ", __func__);
            rte_errno = ENOSPC;
            return NULL;
        }
        /*检查memzone的名字长度是否超过了限制*/
        if (strlen(name) > sizeof(mz->name) - 1) {
            RTE_LOG(DEBUG, EAL, "%s(): memzone <%s>: name too long
    ",
                __func__, name);
            rte_errno = ENAMETOOLONG;
            return NULL;
        }
    
        /* 在mcfg->memzone[]中查找是否已有同名的memzone,如果有表示已存在,返回创建出错*/
        if ((memzone_lookup_thread_unsafe(name)) != NULL) {
            RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists
    ",
                __func__, name);
            rte_errno = EEXIST;
            return NULL;
        }
    
        /* 检查对齐内存大小是否是2的幂大小 */
        if (align && !rte_is_power_of_2(align)) {
            RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u
    ", __func__,
                    align);
            rte_errno = EINVAL;
            return NULL;
        }
    
        /* alignment less than cache size is not allowed */
        if (align < RTE_CACHE_LINE_SIZE)/*对齐大小不能小于cache_line大小*/
            align = RTE_CACHE_LINE_SIZE;
    
        /* align length on cache boundary. Check for overflow before doing so */
        if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
            rte_errno = EINVAL; /* requested size too big */
            return NULL;
        }
    
        len += RTE_CACHE_LINE_MASK;
        len &= ~((size_t) RTE_CACHE_LINE_MASK); /*申请内存大小进行内存对齐计算*/
    
        /* save minimal requested length */
        /*当申请的内存大小小于RTE_CACHE_LINE_SIZE时,则至少要分配RTE_CACHE_LINE_SIZE大小的内存*/
        requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len);
    
        /* check that boundary condition is valid */
        if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) {
            rte_errno = EINVAL;
            return NULL;
        }
        /*检查socket_id的合法性*/
        if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) {
            rte_errno = EINVAL;
            return NULL;
        }
        /*如果不使用hugepage,memzone的内存分配就不会考虑socke_id,而直接设置为SOCKET_ID_ANY*/
        if (!rte_eal_has_hugepages())
            socket_id = SOCKET_ID_ANY;
    
        if (len == 0) { /*申请内存大小等于0的情况,则申请申请最大的连续内存空间*/
            if (bound != 0)
                requested_len = bound;
            else {
                requested_len = find_heap_max_free_elem(&socket_id, align);
                if (requested_len == 0) {
                    rte_errno = ENOMEM;
                    return NULL;
                }
            }
        }
        /*如果socket_id为SOCKET_ID_ANY,则先在当前cpu所在的socket上分配内存*/
        if (socket_id == SOCKET_ID_ANY)
            socket = malloc_get_numa_socket();
        else
            socket = socket_id;
    
        /* 尝试在当前socket对应的malloc_heap上分配内存 */
        void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL,
                requested_len, flags, align, bound);
        /*如果socket_id为SOCKET_ID_ANY,且在当前socket上分配失败,就尝试在其他cpu分配*/
        if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) {
            /* try other heaps */
            for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
                if (socket == i)
                    continue;
    
                mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i],
                        NULL, requested_len, flags, align, bound);
                if (mz_addr != NULL)
                    break;
            }
        }
    
        if (mz_addr == NULL) {
            rte_errno = ENOMEM;
            return NULL;
        }
        /*获取对应内存的malloc_elem结构*/
        const struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
    
        /* 从mcfg->memzone[]中找到一个还为使用的memzone结构 */
        mz = get_next_free_memzone();
    
        if (mz == NULL) {
            RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room "
                    "in config!
    ", __func__);
            rte_errno = ENOSPC;
            return NULL;
        }
        /*增加mcfg的memzone计数*/
        mcfg->memzone_cnt++;
        snprintf(mz->name, sizeof(mz->name), "%s", name);
        mz->phys_addr = rte_malloc_virt2phy(mz_addr);
        mz->addr = mz_addr;
        mz->len = (requested_len == 0 ? elem->size : requested_len);
        mz->hugepage_sz = elem->ms->hugepage_sz;/*memzone对应的socketid和hupagesize即为对应malloc_elem的值*/
        mz->socket_id = elem->ms->socket_id;
        mz->flags = 0;
        mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;
    
        return mz;
    }
        mz = get_next_free_memzone();

    19.11版本

    mz = rte_fbarray_get(arr, mz_idx);
    if (len == 0 && bound == 0) {
                    /* no size constraints were placed, so use malloc elem len */
                    requested_len = 0;
                    mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags,
                                    align, contig);
            } else {
                    if (len == 0)
                            requested_len = bound;
                    /* allocate memory on heap */
                    mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,
                                    flags, align, bound, contig);
            }
            if (mz_addr == NULL) {
                    rte_errno = ENOMEM;
                    return NULL;
            }
    struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
    
            /* fill the zone in config */
            mz_idx = rte_fbarray_find_next_free(arr, 0);
    
            if (mz_idx < 0) {
                    mz = NULL;
            } else {
                    rte_fbarray_set_used(arr, mz_idx);
                    mz = rte_fbarray_get(arr, mz_idx);
            }
    
            if (mz == NULL) {
                    RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone
    ", __func__);
                    malloc_heap_free(elem);
                    rte_errno = ENOSPC;
                    return NULL;
            }
    
            strlcpy(mz->name, name, sizeof(mz->name));
            mz->iova = rte_malloc_virt2iova(mz_addr);
            mz->addr = mz_addr;
            mz->len = requested_len == 0 ?
                            elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
                            requested_len;
            mz->hugepage_sz = elem->msl->page_sz;
            mz->socket_id = elem->msl->socket_id;
            mz->flags = 0;
    
            return mz;

     

    看一下memzone的结构体, 包含了zone的name、起始IO addr、virt addr、长度、对应的大页大小等。

    /**
     * A structure describing a memzone, which is a contiguous portion of
     * physical memory identified by a name.
     */
    struct rte_memzone {
    
    #define RTE_MEMZONE_NAMESIZE 32       /**< Maximum length of memory zone name.*/
    	char name[RTE_MEMZONE_NAMESIZE];  /**< Name of the memory zone. */
    
    	RTE_STD_C11
    	union {
    		phys_addr_t phys_addr;        /**< deprecated - Start physical address. */
    		rte_iova_t iova;              /**< Start IO address. */
    	};
    	RTE_STD_C11
    	union {
    		void *addr;                   /**< Start virtual address. */
    		uint64_t addr_64;             /**< Makes sure addr is always 64-bits */
    	};
    	size_t len;                       /**< Length of the memzone. */
    
    	uint64_t hugepage_sz;             /**< The page size of underlying memory */
    
    	int32_t socket_id;                /**< NUMA socket ID. */
    
    	uint32_t flags;                   /**< Characteristics of this memzone. */
    	uint32_t memseg_id;               /**< Memseg it belongs. */
    } __attribute__((__packed__));

    接下来,我们从rte_memzone_reserve()开始看起,用户程序会调用该函数申请memzone,此时不会指定align和bound,DPDK为提高内存读写效率,到处运用了内存对齐技术,但是暴露给客户的时候不会像他底层的实现那样需要到处留意,从这段就可以大概看到DPDK的封装确实很好,只暴露有必要暴露的。

    const struct rte_memzone *
    rte_memzone_reserve(const char *name, size_t len, int socket_id,
    		    unsigned flags)
    {
    	return rte_memzone_reserve_thread_safe(name, len, socket_id,
    					       flags, RTE_CACHE_LINE_SIZE, 0);
    }

    这里继续封装一层,上了一把锁,因此 memzone_reserve_aligned_thread_unsafe这个函数的实现将不会再考虑线程安全的问题了。

    static const struct rte_memzone *
    rte_memzone_reserve_thread_safe(const char *name, size_t len,
    				int socket_id, unsigned flags, unsigned align,
    				unsigned bound)
    {
    	rte_rwlock_write_lock(&mcfg->mlock);
    	mz = memzone_reserve_aligned_thread_unsafe(
    		name, len, socket_id, flags, align, bound);
    	rte_rwlock_write_unlock(&mcfg->mlock);
    	return mz;
    }

    继续分析 memzone_reserve_aligned_thread_unsafe()。首先检查memzone数量,这个最大值是用户编译DPDK前通过配置文件指定的,因此这里也可以看到,并不是DPDK绑定的所有大页内存都拿来做memzone了,还有其他的内存模块会使用到。

    	/* no more room in config */
    	if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) {
    		RTE_LOG(ERR, EAL, "%s(): No more room in config
    ", __func__);
    		rte_errno = ENOSPC;
    		return NULL;
    	}

    检查用户申请的name是否已经存在。这个函数里面的实现很简单,在memzone数组中一个一个memzone地找过去,一个一个比较这个name是否已经存在。这里就可以看到memzone的申请确实效率很低,不适合大数量多次数地申请,只适合对申请效率要求不高的程序,或者预先规划好在程序初始化过程中一次性把需要的memzone全部申请完。

    	/* zone already exist */
    	if ((memzone_lookup_thread_unsafe(name)) != NULL) {
    		RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists
    ",
    			__func__, name);
    		rte_errno = EEXIST;
    		return NULL;
    	}

    如果用户不指定要求alloc的memzone的内存长度,DPDK会在所有heap中找个最大的memsegelem给用户。find_heap_max_free_elem()这个函数效率更低,要每一个heap的每一个queue的每一个elem地遍历过去,全部遍历完了之后才能知道空闲的哪个elem才是长度最大的。

    			requested_len = find_heap_max_free_elem(&socket_id, align);
    			if (requested_len == 0) {
    				rte_errno = ENOMEM;
    				return NULL;
    			}

    如果用户指定了len,就以用户指定为准,如果没指定(即len=0),就以找到的最大长度来申请elem。

    	/* allocate memory on heap */
    	void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL,
    			requested_len, flags, align, bound);

    如果用户没有指定socket id的话,就到其他的heap中去申请一下内存,但这样存在一个问题,会出现跨socket访问内存的问题,这个对效率影响非常大,程序性能甚至会降到30%左右,直接打了3折。

    	if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) {
    		/* try other heaps */
    		for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
    			if (socket == i)
    				continue;
    			mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i],
    					NULL, requested_len, flags, align, bound);
    			if (mz_addr != NULL)
    				break;
    		}
    	}

    最后根据alloc到的elem和相关信息填写一下新的memzone,返回给用户

    	struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
    
    	/* fill the zone in config */
    	mz = get_next_free_memzone();
    	mcfg->memzone_cnt++;
    	snprintf(mz->name, sizeof(mz->name), "%s", name);
    	mz->iova = rte_malloc_virt2iova(mz_addr);
    	mz->addr = mz_addr;
    	mz->len = (requested_len == 0 ? elem->size : requested_len);
    	mz->hugepage_sz = elem->ms->hugepage_sz;
    	mz->socket_id = elem->ms->socket_id;
    	mz->flags = 0;
    	mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;

    接下来看看memzone的释放流程。memset清空掉内存块后,最后调用rte_free。我们再下一篇文章再来分析这个rte_free的实现。

    int
    rte_memzone_free(const struct rte_memzone *mz)
    {
    	rte_rwlock_write_lock(&mcfg->mlock);
    
    	idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
    	idx = idx / sizeof(struct rte_memzone);
    
    	addr = mcfg->memzone[idx].addr;
    	if (addr == NULL)
    		ret = -EINVAL;
    	else if (mcfg->memzone_cnt == 0) {
    		rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!
    ",
    				__func__);
    	} else {
    		memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx]));
    		mcfg->memzone_cnt--;
    	}
    
    	rte_rwlock_write_unlock(&mcfg->mlock);
    
    	rte_free(addr);
    
    	return ret;
    }
    
  • 相关阅读:
    一位资深程序员大牛给予Java初学者的学习路线建议
    Java基础部分全套教程.
    Java进阶面试问题列表
    成为伟大程序员的 10 个要点
    一位资深程序员大牛给予Java初学者的学习路线建议
    2年Java开发工作经验面试总结
    有效处理Java异常三原则
    Java打飞机小游戏(附完整源码)
    原生ajax封装,包含post、method方式
    手机端布局,rem布局动态获取根字体大小
  • 原文地址:https://www.cnblogs.com/dream397/p/13601025.html
Copyright © 2020-2023  润新知