zlib库解压http报文中的gzip数据_bluemonster_新浪博客
zlib库解压http报文中的gzip数据
[此博文包含图片] (2010-09-02 11:27:20)
转载▼
标签:
zlib
http
gzip
解压
it
分类: 实验室
抓HTTP包的时候发现很多网站的响应报文是gzip压缩后的数据,存放在一个或多个chunk里面(参见HTTP响应报文中的chunked)。这些gzip数据是不可阅读的,需要进行解压。一开始在网上找到了一份可以正常运行的代码,贴出来:
http://hi.baidu.com/xzq2000/blog/item/c5429f2fd6a646301f308991.html/cmtid/332e72f08f0b53a2a40f5237
char* ungzip(char* source,int len)
{
int err;
z_stream d_stream;
Byte compr[segment_size]={0}, uncompr[segment_size*4]={0};
memcpy(compr,(Byte*)source,len);
uLong comprLen, uncomprLen;
comprLen = sizeof(compr) / sizeof(compr[0]);
uncomprLen = 4*comprLen;
strcpy((char*)uncompr, "garbage");
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = compr;
d_stream.avail_in = 0;
d_stream.next_out = uncompr;
err = inflateInit2(&d_stream,47);
if(err!=Z_OK)
{
printf("inflateInit2 error:%d",err);
return NULL;
}
while (d_stream.total_out < uncomprLen && d_stream.total_in < comprLen) {
d_stream.avail_in = d_stream.avail_out = 1;
err = inflate(&d_stream,Z_NO_FLUSH);
if(err == Z_STREAM_END) break;
if(err!=Z_OK)
{
printf("inflate error:%d",err);
return NULL;
}
}
err = inflateEnd(&d_stream);
if(err!=Z_OK)
{
printf("inflateEnd error:%d",err);
return NULL;
}
char* b = new char[d_stream.total_out+1];
memset(b,0,d_stream.total_out+1);
memcpy(b,(char*)uncompr,d_stream.total_out);
return b;
}
后来看了zlib usage example(参见zlib使用范例),模仿写了一段代码,可以正常运行,而且感觉比上面的代码要快,因为上面的代码把z_stream的avail_in和avail_out都设为1了,只能一字节一字节地解压,非常慢。
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <zlib.h>
#define segment_size 1460//largest tcp data segment
int ungzip(char* source,int len,char*des)
{
int ret,have;
int offset=0;
z_stream d_stream;
Byte compr[segment_size]={0}, uncompr[segment_size*4]={0};
memcpy(compr,(Byte*)source,len);
uLong comprLen, uncomprLen;
comprLen =len;//一开始写成了comprlen=sizeof(compr)以及comprlen=strlen(compr),后来发现都不对。
//sizeof(compr)永远都是segment_size,显然不对,strlen(compr)也是不对的,因为strlen只算到\0之前,
//但是gzip或者zlib数据里\0很多。
uncomprLen = segment_size*4;
strcpy((char*)uncompr, "garbage");
d_stream.zalloc = Z_NULL;
d_stream.zfree = Z_NULL;
d_stream.opaque = Z_NULL;
d_stream.next_in = Z_NULL;//inflateInit和inflateInit2都必须初始化next_in和avail_in
d_stream.avail_in = 0;//deflateInit和deflateInit2则不用
ret = inflateInit2(&d_stream,47);
if(ret!=Z_OK)
{
printf("inflateInit2 error:%d",ret);
return ret;
}
d_stream.next_in=compr;
d_stream.avail_in=comprLen;
do
{
d_stream.next_out=uncompr;
d_stream.avail_out=uncomprLen;
ret = inflate(&d_stream,Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR);
switch (ret)
{
case Z_NEED_DICT:
ret = Z_DATA_ERROR;
case Z_DATA_ERROR:
case Z_MEM_ERROR:
(void)inflateEnd(&d_stream);
return ret;
}
have=uncomprLen-d_stream.avail_out;
memcpy(des+offset,uncompr,have);//这里一开始我写成了memcpy(des+offset,d_stream.next_out,have);
//后来发现这是不对的,因为next_out指向的下次的输出,现在指向的是无有意义数据的内存。见下图
offset+=have;
}while(d_stream.avail_out==0);
inflateEnd(&d_stream);
memcpy(des+offset,"\0",1);
return ret;
}
zlib库解压http报文中的gzip数据