malloc lib
该lab要求实现 mm_init
, mm_malloc
, mm_free
, mm_realloc
接口
准备工作
这次的lab没有step-by-step的提示, 要求你自己找到解决方案, 选择一种并实现.
首先是测试用例文件, 从官网上下载下来没有包含完整的, 只有两个简单的用例, 我找到 utah 大学的课程网站上的用例文件, 可以参考, 但这里的也缺少了 realloc 的用例.
其次是如何实现, 首先基于第9章已有的实现, 然后参考 utah 大学课程的 slide
思路
初步实现思路为
- 已分配块 只使用头部
- 使用显式空闲链表
得到结果:
Results for mm malloc:
trace valid util ops secs Kops
0 yes 89% 5694 0.001513 3764
1 yes 91% 5848 0.001202 4864
2 yes 94% 6648 0.002109 3152
3 yes 97% 5380 0.001928 2790
4 yes 80% 14402 0.001270 11345
5 yes 88% 4800 0.003394 1414
6 yes 88% 4800 0.003437 1397
7 yes 55% 12000 0.006543 1834
8 yes 51% 24000 0.007400 3243
Total 81% 83572 0.028796 2902
Perf index = 49 (util) + 40 (thru) = 89/100
不足:
最后两个用例效果不好, 可以观察到两个用例都是先将一整块(512), 分成两部分malloc(64+448), 最后将一部分free(448), 然后请求分配一整块(512), 对于我的实现, 不重新利用free掉的内存, 从而内存利用率下降. 一个比较好的方案是 Segregated Free List
, 将不同大小的块分别放在不同的空闲链表中, 比如当请求分配一块大小为64的时候, 从(32,64]的空闲链表中取出一个块, 如果没有, 则分配一个新的块, 如果有, 则直接使用. 这里我实现时参考了博客.
然后还要对小内存块分配进行优化, 考虑到小内存块和大内存块交替分配的场景, 就算使用了Segregated Free List
还是没有解决内存碎片, 可以开辟连续的空间, 加载到单独的链表上, 专门给小内存分配使用, 这样大段内存和小段内存分开分配, 可以减轻内存碎片现象
加入优化
- Segregated Free List
- 单独的内存 chunk 只分配小内存
trace valid util ops secs Kops
0 yes 99% 5694 0.001857 3066
1 yes 96% 5848 0.001976 2959
2 yes 100% 6648 0.002472 2689
3 yes 100% 5380 0.003225 1668
4 yes 80% 14402 0.002151 6697
5 yes 96% 4800 0.009382 512
6 yes 95% 4800 0.004393 1093
7 yes 97% 12000 0.002544 4717
8 yes 52% 24000 0.003696 6494
Total 90% 83572 0.031695 2637
Perf index = 54 (util) + 40 (thru) = 94/100
性能的分数已经满了, 可惜空间利用率还差一点.
对最后一个情况还是不行, 因为最后一个两次分配都是小内存, 我认为还要优化的话可以采用随机化思路, first random fit, 给定一个较小的概率, 在遇到合适的内存块时根据这个概率拒绝, 由于 debug 过于痛苦, 这个思路就暂时不去实现了.
代码
代码我将 list_node 的一些操作移到了单独的文件里, 实现比较简单, 这里只给出头文件
// 64 will let a better performance for binary2, but worse performace for binary
#define SMALL_BLOCK_SIZE 128
typedef struct list_node {
struct list_node *next;
struct list_node *prev;
} list_node, *list_ptr;
// add to list, choose list by get_free_list_idx.
void add_to_free_list(list_ptr bp, size_t size);
// choose the free list header for given size.
list_ptr get_free_list_idx(size_t size);
// list add.
void add_to_list(list_ptr bp, list_ptr list);
// list remove.
void remove_from_free_list(list_ptr node);
然后是mm.c
/* single word (4) or double word (8) alignment */
#define ALIGNMENT 8
/* rounds up to the nearest multiple of ALIGNMENT */
#define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~0x7)
#define WSIZE 4
#define DSIZE 8
#define MIN_BLOCK 16 // 2 pointers + HEADER + 4 for align
#define CHUNKSIZE (1 << 12)
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#define PACK(size, prev_alloc ,alloc) ((size) | (prev_alloc << 1) |(alloc))
#define GET(P) (*(size_t*)(P))
#define PUT(P, val) (*(size_t*)(P) = (val))
#define GET_SIZE(P) (GET(P) & ~0x7)
#define GET_ALLOC(P) (GET(P) & 0x1)
#define GET_PREV_ALLOC(P) ((GET(P) & 0x2) >> 1)
/* Given block ptr bp, compute address of its header and footer */
#define HDRP(bp) ((char*)bp - WSIZE)
#define FTRP(bp) ((char*)bp + GET_SIZE(HDRP(bp)) - DSIZE)
#define NEXT_BLKP(bp) ((char*)(bp) + GET_SIZE(((char*)(bp)-WSIZE)))
#define PREV_BLKP(bp) ((char*)(bp) - GET_SIZE(((char*)(bp)-DSIZE)))
static void* heap_listp;
void* find_fit(size_t size);
void place(void* bp, size_t size);
static void* extend_heap(size_t words, int prev_alloced);
static void* coalesce(void* bp);
static void update_next(void* bp, int alloc);
static void* find_for_small(size_t size);
static int m_verbose = 0;
extern list_ptr free_list_base;
static void* free_list_end, *free_list_small_end;
#define ENABLE_SMALL_OPMIZATION
void print_free_list(const char* msg) {
void* list = free_list_base;
list_ptr cur;
size_t i = 0;
if (m_verbose != 2) {
return;
}
puts("==============");
if (msg) {
puts(msg);
}
while (list != (free_list_end + WSIZE)) {
printf("FreeList[%d]:\n", i);
for (cur = ((list_ptr)list)->next; cur != NULL; cur = cur->next) {
printf("%p 0x%x\t", cur, GET(HDRP(cur)));
}
list += WSIZE;
i++;
puts("");
}
puts("==============");
}
list_ptr get_free_list_idx(size_t size) {
size_t i;
#ifdef ENABLE_SMALL_OPMIZATION
if (size < SMALL_BLOCK_SIZE) {
i = 5;
} else if (size < 512) {
i = 2;
} else if (size < 1024) {
i = 3;
} else {
i = 4;
}
#else // ENABLE_SMALL_OPMIZATION
if (size < 32) {
i = 0;
} else if (size < 128) {
i = 1;
} else if (size < 512) {
i = 2;
} else if (size < 1024) {
i = 3;
} else {
i = 4;
}
#endif // ENABLE_SMALL_OPMIZATION
return (list_ptr)((unsigned int*)free_list_base + i);
}
/*
* mm_init - initialize the malloc package.
*/
int mm_init(void)
{
mem_init();
if ((heap_listp = mem_sbrk(8 * WSIZE)) == (void*)-1)
return -1;
PUT(heap_listp, 0); //block size <= 32
PUT(heap_listp+(1*WSIZE), 0); //block size <= 128
PUT(heap_listp+(2*WSIZE), 0); //block size <= 512
PUT(heap_listp+(3*WSIZE), 0); //block size <= 1024
PUT(heap_listp+(4*WSIZE), 0); //block size > 1024
PUT(heap_listp+(5*WSIZE), 0); //small block alloc
PUT(heap_listp+(6*WSIZE), PACK(0, 1, 1));
PUT(heap_listp+(7*WSIZE), PACK(0, 1, 1));
free_list_base = (list_ptr)heap_listp;
free_list_end = heap_listp + (5*WSIZE);
free_list_small_end = heap_listp + (2*WSIZE);
heap_listp += (6*WSIZE);
if (extend_heap(CHUNKSIZE / WSIZE, 1) == NULL)
return -1;
if (m_verbose) {
printf("INFO: mm_init finished\n");
}
return 0;
}
/*
* mm_malloc - Allocate a block by incrementing the brk pointer.
* Always allocate a block whose size is a multiple of the alignment.
*/
void *mm_malloc(size_t size)
{
print_free_list("before malloc");
size_t asize;
char* bp;
if (size == 0)
return NULL;
if (size <= 2 * WSIZE)
asize = MIN_BLOCK;
else
asize = DSIZE * ((size + WSIZE + (DSIZE - 1)) / DSIZE);
#ifdef ENABLE_SMALL_OPMIZATION
if (asize < SMALL_BLOCK_SIZE) {
bp = find_for_small(asize);
if (m_verbose) {
printf("malloc %p %d\n", bp, asize);
}
return bp;
}
#endif // ENABLE_SMALL_OPMIZATION
if ((bp = find_fit(asize)) != NULL) {
place(bp, asize);
if (m_verbose) {
printf("malloc %p %d\n", bp, asize);
}
return bp;
}
if ((bp = extend_heap(MAX(asize, CHUNKSIZE) / WSIZE, 0)) == NULL)
return NULL;
if (m_verbose) {
printf("malloc %p %d\n", bp, asize);
}
place(bp, asize);
return bp;
}
/*
* mm_free - Freeing a block does nothing.
*/
void mm_free(void *bp)
{
print_free_list("before free");
size_t size = GET_SIZE(HDRP(bp));
size_t prev_alloc = GET_PREV_ALLOC(HDRP(bp));
PUT(HDRP(bp), PACK(size, prev_alloc, 0));
PUT(FTRP(bp), PACK(size, prev_alloc, 0));
update_next(bp, 0);
coalesce(bp);
if (m_verbose) {
printf("freed %p %d\n", bp, size);
}
}
/*
* mm_realloc - Implemented simply in terms of mm_malloc and mm_free
*/
void *mm_realloc(void *ptr, size_t size)
{
void *newptr = ptr;
size_t copySize = GET_SIZE(HDRP(ptr));
if (copySize >= size) {
// Reuse current memory.
place(ptr, copySize);
} else {
newptr = mm_malloc(size);
if (newptr == NULL)
return NULL;
memcpy(newptr, ptr, copySize);
mm_free(ptr);
}
return newptr;
}
static void* coalesce(void* bp) {
void* prev_block = PREV_BLKP(bp);
void* next_block = NEXT_BLKP(bp);
size_t prev_alloc = GET_PREV_ALLOC(HDRP(bp));
size_t next_alloc = GET_ALLOC(HDRP(next_block));
size_t size = GET_SIZE(HDRP(bp));
void* new_bp = bp;
if (prev_alloc && next_alloc) {
add_to_free_list(bp, size);
update_next(bp, 0);
return bp;
}
if (!next_alloc) {
// Merge with next block.
// putchar('1');
remove_from_free_list((list_ptr)next_block);
size += GET_SIZE(HDRP(next_block));
}
if (!prev_alloc) {
// Merge with previout block.
// putchar('2');
remove_from_free_list((list_ptr)prev_block);
size += GET_SIZE(HDRP(prev_block));
// bp = prev_block;
new_bp = prev_block;
}
PUT(HDRP(new_bp), PACK(size, 1, 0));
PUT(FTRP(new_bp), PACK(size, 1, 0));
add_to_free_list(new_bp, size);
update_next(new_bp, 0);
return new_bp;
}
static void* extend_heap(size_t words, int prev_alloced) {
char* bp;
size_t size;
if (words % 2) {
words += 1;
}
size = words * WSIZE;
if ((bp = mem_sbrk(size)) == (void*)-1)
return NULL;
if (!prev_alloced) {
prev_alloced = GET_PREV_ALLOC(HDRP(bp));
}
PUT(HDRP(bp), PACK(size, prev_alloced, 0));
PUT(FTRP(bp), PACK(size, prev_alloced, 0));
// No size, prev not alloced, self alloced to disable merge.
PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 0, 1));
return coalesce(bp);
}
void* find_first_fit(size_t size) {
list_ptr bp;
void* free_list = get_free_list_idx(size);
void* list_end = free_list_end;
// #ifdef ENABLE_SMALL_OPMIZATION
// if (size < SMALL_BLOCK_SIZE) {
// list_end = free_list_small_end;
// }
// #endif // ENABLE_SMALL_OPMIZATION
for (; free_list < list_end; free_list += WSIZE) {
for (bp = ((list_ptr)free_list)->next; bp != NULL ; bp = bp->next) {
if (size <= GET_SIZE(HDRP(bp))) {
return bp;
}
}
}
return NULL;
}
void* find_best_fit(size_t size) {
list_ptr bp = NULL;
list_ptr best_bp = NULL;
void* free_list = get_free_list_idx(size);
void* list_end = free_list_end;
// #ifdef ENABLE_SMALL_OPMIZATION
// if (size < SMALL_BLOCK_SIZE) {
// list_end = free_list_small_end;
// }
// #endif // ENABLE_SMALL_OPMIZATION
for (; free_list < list_end && best_bp == NULL; free_list += WSIZE) {
for (bp = ((list_ptr)free_list)->next; bp != NULL ; bp = bp->next) {
if (GET_SIZE(HDRP(bp)) >= size &&
(best_bp == NULL || GET_SIZE(HDRP(bp)) < GET_SIZE(HDRP(best_bp)))) {
// return bp;
best_bp = bp;
}
}
}
return best_bp;
}
static void* find_for_small(size_t size) {
list_ptr bp;
list_ptr small_block_list;
size_t asize;
void* next_bp;
small_block_list = (list_ptr)free_list_end;
print_free_list("before find small");
for (bp = small_block_list->next; bp != NULL ; bp = bp->next) {
if (GET_SIZE(HDRP(bp)) >= size) {
break;
}
}
if (bp == NULL) {
if ((bp = extend_heap(MAX(size, CHUNKSIZE) / WSIZE, 0)) == NULL)
return NULL;
remove_from_free_list((list_ptr)bp);
// add to free list
add_to_list((list_ptr)bp, small_block_list);
}
asize = GET_SIZE(HDRP(bp));
remove_from_free_list((list_ptr)bp);
if (asize - size < MIN_BLOCK) {
// not split
PUT(HDRP(bp), PACK(asize, 1, 1));
update_next(bp, 1);
} else {
PUT(HDRP(bp), PACK(size, 1, 1));
// Split out a new block.
if (asize - size > CHUNKSIZE) {
// Split again if new block is larger than chunksize
void* nn_bp;
next_bp = NEXT_BLKP(bp);
PUT(HDRP(next_bp), PACK(CHUNKSIZE, 1, 0));
PUT(FTRP(next_bp), PACK(CHUNKSIZE, 1, 0));
nn_bp = NEXT_BLKP(next_bp);
PUT(HDRP(nn_bp), PACK(asize - size - CHUNKSIZE, 0, 0));
PUT(FTRP(nn_bp), PACK(asize - size - CHUNKSIZE, 0, 0));
add_to_free_list(nn_bp, GET_SIZE(HDRP(nn_bp)));
} else {
next_bp = NEXT_BLKP(bp);
PUT(HDRP(next_bp), PACK(asize - size, 1, 0));
PUT(FTRP(next_bp), PACK(asize - size, 1, 0));
}
add_to_list(next_bp, small_block_list);
}
print_free_list("after find small");
return bp;
}
void* find_fit(size_t size) {
return find_best_fit(size);
// return find_first_fit(size);
}
void place(void* bp, size_t size) {
size_t asize;
void* next_bp;
asize = GET_SIZE(HDRP(bp));
remove_from_free_list((list_ptr)bp);
if (asize - size < MIN_BLOCK) {
// not split
PUT(HDRP(bp), PACK(asize, GET_PREV_ALLOC(HDRP(bp)), 1));
update_next(bp, 1);
} else {
PUT(HDRP(bp), PACK(size, GET_PREV_ALLOC(HDRP(bp)), 1));
// Split out a new block.
next_bp = NEXT_BLKP(bp);
PUT(HDRP(next_bp), PACK(asize - size, 1, 0));
PUT(FTRP(next_bp), PACK(asize - size, 1, 0));
add_to_free_list(next_bp, asize - size);
}
}
/**
* When bp is alloced or freed, update `prev_alloc` flag for the block next to bp
*/
static void update_next(void* bp, int alloc) {
char* next_block = HDRP(NEXT_BLKP(bp));
size_t next_alloc = GET_ALLOC(next_block);
size_t next_size = GET_SIZE(next_block);
PUT(next_block, PACK(next_size, alloc, next_alloc));
if (!next_alloc) {
PUT(FTRP(NEXT_BLKP(bp)), PACK(next_size, alloc, next_alloc));
}
}
其实要干的就两件事, 维护 free list, 维护 header, 首先区分好函数影响的范围, 然后去做维护, 这样代码就更加有条理.
比如 free, 首先要更新我 header 的 alloc , footer 和 next block 的 prev_alloc, 然后交给 coalesce 去合并和更新 free list.
malloc
find_for_small
find_fit
extend_heap
coalesce
add_to_free_list
remove_to_free_list
place
add_to_free_list
free
coalesce
add_to_free_list
remove_to_free_list
一些总结和技巧
- pointer size : 32位编译, 4字节
- pointer add : 指针运算是按类型走的, list_ptr + 4 就会直接越界, 使用 void* 就是按字节加.
- 熟悉块的内存结构
- |header|payload| allocated
- |header|next, prev|footer| unallocated
- header = size | prev_alloc << 1 | alloc
- 因为size是8字节对齐的, 余出3个bit 可以塞flag
- 有了 prev_alloc , 已分配的内存块就不需要 footer, 因为合并我们知道他已经分配, 就不需要在看 header 信息.
- header 中的 size 是带上 overhead 的已分配 size, 用户 malloc 所期望返回的是一个大于等于 target-size 的内存块, 我们在它上面做了什么它并不关心, 因此在代码内部的 size 永远是块的真正 size(从header开始算, 而不是payload开始, 这里有时候会迷, 因为代码是以payload为基准).
- 为内存块设置结构体(header, free list), 这样的好处是你可以方便的从gdb里看到它的内容, 而不是手动去看内存. 你可以不按结构体存, 继续使用宏定义操作它, 需要用到结构体时直接将它转成对应结构体指针就好(类似于直接当成栈来用).
- 关于 debug, 可以尝试 vscode remote + gdb, 比 gdb 的 gui 模式方便许多.
- debug 2, mdriver 自带了一个 alloc 检查, 可以检查 overlapping / align / out of boundary. 后面我发现写一个
print_free_list
, 在调试小用例时会很有帮助.