1,mbuf就是存储要发送数据的memery buf,类似于skb_buf.不过结构比较简单。
/* header at beginning of each mbuf: */
这个结构用来描述mbuf跟具体的内容无关
struct m_hdr {
struct mbuf *mh_next; /* next buffer in chain */
struct mbuf *mh_nextpkt; /* next chain in queue/record */
int mh_len; /* amount of data in this mbuf */
caddr_t mh_data; /* location of data */
short mh_type; /* type of data in this mbuf */
short mh_flags; /* flags; see below */
};
/* record/packet header in first mbuf of chain; valid if M_PKTHDR set */
对mbuf中数据的描述,len和接收接口
struct pkthdr {
int len; /* total packet length */
struct ifnet *rcvif; /* rcv interface */
};
/* description of external storage mapped into mbuf, valid if M_EXT set */
struct m_ext {
caddr_t ext_buf; /* start of buffer */
void (*ext_free)(); /* free routine if not the usual */
u_int ext_size; /* size of buffer, for ext_free */
};
这个就是mbuf的描述,设计的比较巧妙
struct mbuf {
struct m_hdr m_hdr;
union {
struct {
struct pkthdr MH_pkthdr; /* M_PKTHDR set */
union {
struct m_ext MH_ext; /* M_EXT set */
char MH_databuf[MHLEN];
} MH_dat;
} MH;
char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */
} M_dat;
};
2,mbuf中mhdr.md_flags:
/* mbuf flags */
一个mbuf的大小是128字节,猜测是一个cacheline的大小。如果数据比较多,就需要多个mbuf连起来或者用一个叫cluster的东西来存储数据。M_EXT就是这个标志
#define M_EXT 0x0001 /* has associated external storage */
表明分组的第一个mbuf,在数据区中有pkthdr
#define M_PKTHDR 0x0002 /* start of record */
表明记录的尾部,TCP是一个字节流,不设置这个标志
#define M_EOR 0x0004 /* end of record */
/* mbuf pkthdr flags, also in m_flags */
#define M_BCAST 0x0100 /* send/received as link-level broadcast */
#define M_MCAST 0x0200 /* send/received as link-level multicast */
/* flags copied when copying m_pkthdr */
这个具体干嘛用的不懂。。。
#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_BCAST|M_MCAST)
3,mbuf的种类
/* mbuf types */
#define MT_FREE 0 /* should be on free list */
#define MT_DATA 1 /* dynamic (data) allocation */ 数据就是这个类型
#define MT_HEADER 2 /* packet header */
#define MT_SOCKET 3 /* socket structure */
#define MT_PCB 4 /* protocol control block */
#define MT_RTABLE 5 /* routing tables */
#define MT_HTABLE 6 /* IMP host tables */
#define MT_ATABLE 7 /* address resolution tables */
#define MT_SONAME 8 /* socket name */
#define MT_SOOPTS 10 /* socket options */
#define MT_FTABLE 11 /* fragment reassembly header */
#define MT_RIGHTS 12 /* access rights */
#define MT_IFADDR 13 /* interface address */
#define MT_CONTROL 14 /* extra-data protocol message */
#define MT_OOBDATA 15 /* expedited data */
4,mbuf相关函数
4.1mbuf的分配
/*
* mbuf allocation/deallocation macros:
*
* MGET(struct mbuf *m, int how, int type)
* allocates an mbuf and initializes it to contain internal data.
*
* MGETHDR(struct mbuf *m, int how, int type)
* allocates an mbuf and initializes it to contain a packet header
* and internal data.
*/
#define MGET(m, how, type) {
mbtypes[type]把mbuf的type转换成MALLOC需要的type,如M_MBUF,M_SOCKET等
MALLOC((m), struct mbuf *, MSIZE, mbtypes[type], (how));
if (m) {
(m)->m_type = (type);
MBUFLOCK改变处理器优先级,防止被网络处理器中断,共享资源的保护
MBUFLOCK(mbstat.m_mtypes[type]++;)
(m)->m_next = (struct mbuf *)NULL;
(m)->m_nextpkt = (struct mbuf *)NULL;
#define m_dat M_dat.M_databuf 为pkthdr和m_ext预留了空间
(m)->m_data = (m)->m_dat;
(m)->m_flags = 0;
} else
尝试重新分配,一个主要的问题,分配的内存从哪里来?详见后面
(m) = m_retry((how), (type));
}
/*
* When MGET failes, ask protocols to free space when short of memory,
* then re-attempt to allocate an mbuf.
*/
struct mbuf *
m_retry(i, t)
int i, t;
{
register struct mbuf *m;
调用协议的注册函数释放内存
m_reclaim();
把m_retrydefine成NULL这样就直接返回NULL了,但这里怎么保证这个MGET中m_retry返回的是NULL,而上一个返回的是这个函数???????#define在预编译期间就做替换了。
这个的关键就是MGET是一个宏,而不是函数。
#define m_retry(i, t) (struct mbuf *)0
MGET(m, i, t);
#undef m_retry
return (m);
}
这个函数循环调用协议的drain函数分配内存
m_reclaim()
{
register struct domain *dp;
register struct protosw *pr;
提升处理器的优先级不被网络处理中断
int s = splimp();
for (dp = domains; dp; dp = dp->dom_next)
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
if (pr->pr_drain)
(*pr->pr_drain)();
恢复处理器的优先级
splx(s);
mbstat.m_drain++;
}
分配一个分组头部的mbuf,对m_data和m_flags进行初始化
#define MGETHDR(m, how, type) {
MALLOC((m), struct mbuf *, MSIZE, mbtypes[type], (how));
if (m) {
(m)->m_type = (type);
MBUFLOCK(mbstat.m_mtypes[type]++;)
(m)->m_next = (struct mbuf *)NULL;
(m)->m_nextpkt = (struct mbuf *)NULL;
(m)->m_data = (m)->m_pktdat;
(m)->m_flags = M_PKTHDR;
} else
(m) = m_retryhdr((how), (type));
}
587 /*
588 * Routine to copy from device local memory into mbufs.
589 */
590 struct mbuf *
591 m_devget(buf, totlen, off0, ifp, copy)
592 char *buf;
593 int totlen, off0;
594 struct ifnet *ifp;
595 void (*copy)();
这个函数是对MGET和MGETHDR的封装,一般由设备驱动程序调用,分配mbuf空间。
1,如果数据长度《84,则在数据(IP数据包)的前面保留16个字节。为输出时添加14字节的MAC包头准备。(一个包含pak_hdr的mbuf最多放100字节的数据)
2,如果数据》85 && 数据《100则不额外保留这16字节的数据
3,如果数据》100,则分配一个cluster进行数据的存放。
可见m_devget根据数据的长度,分配合适的mbuf
4.2mbuf到mbuf中data的转换
定义了两个宏
56 * mtod(m,t) - convert mbuf pointer to data pointer of correct type
57 * dtom(x) - convert data pointer within mbuf to mbuf pointer (XXX)
61 #define mtod(m,t) ((t)((m)->m_data))
MSIZE == 128 这个基于mbuf是128字节对齐
62 #define dtom(x) ((struct mbuf *)((int)(x) & ~(MSIZE-1)))
dotm对cluster的数据有问题,不能正常转换到mbuf,所以需要下面的函数
/*
* Rearange an mbuf chain so that len bytes are contiguous
* and in the data area of an mbuf (so that mtod and dtom
* will work for a structure of size len). Returns the resulting
* mbuf chain on success, frees it and returns null on failure.
* If there is room, it will add up to max_protohdr-len extra bytes to the
* contiguous region in an attempt to avoid being called next time.
*/
这个函数从mbuf链表中取出len字节的数据放在第一个mbuf中,使dtom能正确运行
struct mbuf *http://www.zyxsw.net
m_pullup(n, len)
register struct mbuf *n;
int len;
{
register struct mbuf *m;
register int count;
int space;
/*
* If first mbuf has no cluster, and has room for len bytes
* without shifting current data, pullup into it,
* otherwise allocate a new mbuf to prepend to the chain.
*/
if ((n->m_flags & M_EXT) == 0 &&
n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
if (n->m_len >= len)
return (n);
m = n;
n = n->m_next;
len -= m->m_len;
} else {
if (len > MHLEN)
goto bad;
MGET(m, M_DONTWAIT, n->m_type);
if (m == 0)
goto bad;
m->m_len = 0;
if (n->m_flags & M_PKTHDR) {
M_COPY_PKTHDR(m, n);
n->m_flags &= ~M_PKTHDR;
}
}
space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
do {
count = min(min(max(len, max_protohdr), space), n->m_len);
bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
(unsigned)count);
len -= count;
m->m_len += count;
n->m_len -= count;
space -= count;
if (n->m_len)
n->m_data += count;
else
n = m_free(n);
} while (len > 0 && n);
if (len > 0) {
(void) m_free(m);
goto bad;
}
m->m_next = n;
return (m);
bad:
m_freem(n);
MPFail++;
return (0);
}
有些细节的东西还不明确,还需要进一步整理。