• zlib库解压http报文中的gzip数据_bluemonster_新浪博客


    zlib库解压http报文中的gzip数据_bluemonster_新浪博客

    zlib库解压http报文中的gzip数据
    [此博文包含图片] (2010-09-02 11:27:20)
    转载▼
    标签:
    zlib
    http
    gzip
    解压
    it
        分类: 实验室

    抓HTTP包的时候发现很多网站的响应报文是gzip压缩后的数据,存放在一个或多个chunk里面(参见HTTP响应报文中的chunked)。这些gzip数据是不可阅读的,需要进行解压。一开始在网上找到了一份可以正常运行的代码,贴出来:

    http://hi.baidu.com/xzq2000/blog/item/c5429f2fd6a646301f308991.html/cmtid/332e72f08f0b53a2a40f5237

    char* ungzip(char* source,int len)
    {
    int err;
    z_stream d_stream;
    Byte compr[segment_size]={0}, uncompr[segment_size*4]={0};
    memcpy(compr,(Byte*)source,len);
    uLong comprLen, uncomprLen;
    comprLen = sizeof(compr) / sizeof(compr[0]);
    uncomprLen = 4*comprLen;
    strcpy((char*)uncompr, "garbage");

    d_stream.zalloc = (alloc_func)0;
    d_stream.zfree = (free_func)0;
    d_stream.opaque = (voidpf)0;

    d_stream.next_in = compr;
    d_stream.avail_in = 0;
    d_stream.next_out = uncompr;

    err = inflateInit2(&d_stream,47);
    if(err!=Z_OK)
    {
       printf("inflateInit2 error:%d",err);
       return NULL;
    }
    while (d_stream.total_out < uncomprLen && d_stream.total_in < comprLen) {
    d_stream.avail_in = d_stream.avail_out = 1;
    err = inflate(&d_stream,Z_NO_FLUSH);
        if(err == Z_STREAM_END) break;
        if(err!=Z_OK)
        {
        printf("inflate error:%d",err);
        return NULL;
       }
    }
    err = inflateEnd(&d_stream);
    if(err!=Z_OK)
    {
       printf("inflateEnd error:%d",err);
       return NULL;
    }
    char* b = new char[d_stream.total_out+1];
    memset(b,0,d_stream.total_out+1);
    memcpy(b,(char*)uncompr,d_stream.total_out);
    return b;
    }
    后来看了zlib usage example(参见zlib使用范例),模仿写了一段代码,可以正常运行,而且感觉比上面的代码要快,因为上面的代码把z_stream的avail_in和avail_out都设为1了,只能一字节一字节地解压,非常慢。

    #include <stdio.h>
    #include <string.h>
    #include <assert.h>
    #include <zlib.h>
    #define segment_size 1460//largest tcp data segment
    int ungzip(char* source,int len,char*des)
    {
    int ret,have;
    int offset=0;
    z_stream d_stream;
    Byte compr[segment_size]={0}, uncompr[segment_size*4]={0};
    memcpy(compr,(Byte*)source,len);
    uLong comprLen, uncomprLen;
    comprLen =len;//一开始写成了comprlen=sizeof(compr)以及comprlen=strlen(compr),后来发现都不对。

    //sizeof(compr)永远都是segment_size,显然不对,strlen(compr)也是不对的,因为strlen只算到\0之前,

    //但是gzip或者zlib数据里\0很多。
    uncomprLen = segment_size*4;
    strcpy((char*)uncompr, "garbage");

    d_stream.zalloc = Z_NULL;
    d_stream.zfree = Z_NULL;
    d_stream.opaque = Z_NULL;

    d_stream.next_in = Z_NULL;//inflateInit和inflateInit2都必须初始化next_in和avail_in
    d_stream.avail_in = 0;//deflateInit和deflateInit2则不用

    ret = inflateInit2(&d_stream,47);
    if(ret!=Z_OK)
    {
       printf("inflateInit2 error:%d",ret);
       return ret;
    }
    d_stream.next_in=compr;
    d_stream.avail_in=comprLen;
    do
    {
     d_stream.next_out=uncompr;
     d_stream.avail_out=uncomprLen;
     ret = inflate(&d_stream,Z_NO_FLUSH);
     assert(ret != Z_STREAM_ERROR);
     switch (ret)
     {
      case Z_NEED_DICT:
                  ret = Z_DATA_ERROR;  
            case Z_DATA_ERROR:
            case Z_MEM_ERROR:
                  (void)inflateEnd(&d_stream);
                   return ret;
        }
     have=uncomprLen-d_stream.avail_out;
     memcpy(des+offset,uncompr,have);//这里一开始我写成了memcpy(des+offset,d_stream.next_out,have);

     //后来发现这是不对的,因为next_out指向的下次的输出,现在指向的是无有意义数据的内存。见下图

    offset+=have;

    }while(d_stream.avail_out==0);
    inflateEnd(&d_stream);
    memcpy(des+offset,"\0",1);
    return ret;
    }

     

    zlib库解压http报文中的gzip数据
  • 相关阅读:
    MySql为查询结果添加前、后缀
    SQLHelper类
    OracleHelper类
    C#在sql中使用變量訪問Oracle數據庫
    转-调用HTMLTestRunner生成的报告中不能显示用例中print函数的输出
    HTMLTestRunner优化:带截图、饼图、失败重跑
    【转】如何成功安装旧版本火狐,成功安装firebug和firepath插件
    【转】Selenium 加载Chrome/Firefox浏览器配置文件
    mac查看python安装路径
    requests模拟上传照片
  • 原文地址:https://www.cnblogs.com/lexus/p/2991627.html
Copyright © 2020-2023  润新知