• (转)复合文档格式分析


    标 题: 【原创】复合文档格式分析
    作 者: kanghtta
    时 间: 2009-12-27,01:25:11
    链 接: http://bbs.pediy.com/showthread.php?t=103868

    参加了看雪10周年活动。见了很多牛牛,但苦于近来工作忙,经常潜水中,,不能白拿kanxue的u盘,也来回报下,重新活动下。。
    如今,复合文档格式被广泛使用,虽然格式早就被人研究明白,也有相应的文档。
    但网上大多都是些文字介绍,因此,想写个格式分析的东西来加深对复合文档的理解,
    此小程序有如下功能:
    1:打印输出文档头 512字节
    2:分析ssat,sat,short sat
    3:分析各个steam 使用的sat 和ssat的扇区号,
    可在windows 和linux 下使用,更多功能我会逐步扩充好加上来。。。
    写此程序的目的只是为了巩固下对复合文档的学习,具体的格式就不写了
    ,附件里是复合文档格式,英文好的看,

    有用得着的朋友注意下,由于只是解析了下,没有建立红黑树,没有考虑ssat大于109扇区的情况,需要的可以此基础上扩充,应该不难。。。
    代码:
    #include <iostream>
    #include
    <fstream>

    #include
    <math.h>
    #include
    <tchar.h>
    #include
    <cassert>
    #include
    <string>
    #include
    <iomanip>
    #include
    <vector>
    #include
    <list>

    #include
    "ComDoc.h"

    using namespace std;

    /* declare const */

    int SectorSize = 0;
    int miniSectorSize = 0;
    ULONG MaxMiniStreamSize
    =0; /** short stream 的最大长度。用来判断使用的sat表式那种类型,小于在ssat中 */
    /* declare function */
    bool DumpDocHeader(PDocHeader pHeader);
    bool IfReadFile(ifstream &inStream,unsigned char * buf,unsigned int iReadOffest,size_t size);
    int GetOffestFremSid(SECT sid);
    bool ProcessDirEntry(PDirectoryEntry pDirEntry,vector<int> & slist,vector<int> & sslist,vector<vector<int> >& FatOfDirEntry,int );

    int main(int argc,char *argv[])
    {
    cout
    <<"Enter the name of the input file :\n";
    string inputFileName;
    getline(cin,inputFileName);

    BYTE
    * lpHeaderBuf = new BYTE[512];

    memset(lpHeaderBuf,
    0,512);

    ifstream inStream;
    inStream.open(inputFileName.data(),ios::binary
    |ios::in);
    assert(inStream.is_open());
    inStream.read((
    char*)lpHeaderBuf,512);
    PDocHeader pHeaderSec
    = (PDocHeader)lpHeaderBuf;

    cout
    <<"open the "<<inputFileName<<" file is successful\n "<<endl;
    DumpDocHeader(pHeaderSec);
    MaxMiniStreamSize
    = pHeaderSec->_ulMiniSectorCutoff;
    /**********************************************************************/
    /* 处理msat */
    /************************************************************************/
    unsigned
    long iMastSize = 0;
    vector
    <int> vMastList; /** 用于存储sat链表*/
    if ((pHeaderSec->_sectDifStart == ENDOFCHAIN)&&(pHeaderSec->_csectDif == 0))
    {
    for(iMastSize = 1;iMastSize <= 109 ;iMastSize ++)
    {
    if ((( pHeaderSec->_sectFat[iMastSize-1] ) == 0xFFFFFFFF))
    {
    break;
    }
    else
    {

    BYTE
    *SecBuf = new BYTE[SectorSize];
    IfReadFile(inStream,SecBuf,GetOffestFremSid((pHeaderSec
    ->_sectFat[iMastSize - 1] )),SectorSize);
    int * pListOfMast = (int *)SecBuf;
    int i = 0;
    while( pListOfMast[i]!= FREESECT )
    {
    vMastList.push_back(pListOfMast[i]);
    //cout<<vMastList.front()<<" \t"<<vMastList.back()<<'\t';

    cout
    <<"MAST["<<i<<"] == ";
    cout
    <<vMastList[i]<<'\t';
    if (vMastList[vMastList.size()-1] == ENDOFCHAIN)
    {
    cout
    <<endl;
    }
    i
    ++;
    }

    delete []SecBuf;
    }

    }
    }
    else
    {
    /** Mast 大于 109个扇区的情况 */
    }

    /************************************************************************/
    /* 处理ssat */
    /************************************************************************/

    /*
    * 读取存取ssat短链分配表占用的扇区链,
    */
    vector
    <int> vSsatFat;
    vSsatFat.push_back(pHeaderSec
    ->_sectMiniFatStart);
    int index = vSsatFat[0];
    while(vMastList[index] != ENDOFCHAIN )
    {
    index
    = vMastList[index];
    vSsatFat.push_back(index);

    }

    vSsatFat.push_back(vMastList[index]);
    int i = 0; /** 循环计数器*/
    for (i; i < vSsatFat.size();i++)
    {
    cout
    <<"S-FAT["<<i<<"] == ";
    cout
    <<hex<<vSsatFat[i]<<'\t';
    }
    cout
    <<endl;

    /*
    * 读取ssat链,即用于记录short stream 的链
    * pHeaderSec->_csectMiniFat 指出short stream 占几个扇区
    */
    vector
    <int> vSsatList;

    for (i = 1; i <= pHeaderSec->_csectMiniFat; i++)
    {

    BYTE
    *SecBuf = new BYTE[SectorSize];
    IfReadFile(inStream,SecBuf,GetOffestFremSid(vSsatFat[i
    -1]),SectorSize);
    int * pListOfMast = (int *)SecBuf;
    int i = 0;
    while( pListOfMast[i]!= FREESECT )
    {
    vSsatList.push_back(pListOfMast[i]);
    //cout<<vMastList.front()<<" \t"<<vMastList.back()<<'\t';

    cout
    <<"SSAT["<<i<<"] == ";
    cout
    <<vSsatList[i]<<'\t';
    if (vSsatList[vSsatList.size()-1] == ENDOFCHAIN)
    {
    cout
    <<endl;
    }
    i
    ++;
    }

    delete []SecBuf;

    }

    /************************************************************************/
    /* Process Directory */
    /************************************************************************/

    /*
    * 读取存取directory 的扇区sid链
    */
    vector
    <int> vDirFat;
    vDirFat.push_back(pHeaderSec
    ->_sectDirStart);
    index
    = vDirFat[0];
    while(vMastList[index] != ENDOFCHAIN )
    {
    index
    = vMastList[index];
    vDirFat.push_back(index);

    }

    vDirFat.push_back(vMastList[index]);

    for (i =0; i < vDirFat.size();i++)
    {
    cout
    <<"Director-FAT["<<i<<"] == ";
    cout
    <<hex<<vDirFat[i]<<'\t';
    }
    cout
    <<endl;
    cout
    <<vDirFat.size()<<endl;

    /*
    *分析并处理directory目录
    */
    vector
    <DirectoryEntry> lDirList;
    for (i=0; i<(vDirFat.size()-1) ;i++)
    {
    BYTE
    *SecBuf = new BYTE[SectorSize];
    IfReadFile(inStream,SecBuf,GetOffestFremSid(vDirFat[i]),SectorSize);
    PDirectoryEntry pDirEntry
    = PDirectoryEntry(SecBuf);
    for (int j = 0;j<4;j++)
    {
    DirectoryEntry tempDirEntry
    = pDirEntry[j];
    lDirList.push_back(tempDirEntry);

    }



    delete []SecBuf;
    }

    vector
    < vector<int> > vFatOfDirEntry;
    /*
    *processing DirectoryEntry list
    */
    for(i = 0;i<lDirList.size();i++)
    {
    ProcessDirEntry(
    &lDirList[i],vMastList,vSsatList,vFatOfDirEntry,i) ;
    }



    delete []lpHeaderBuf;

    inStream.close();
    return 0;
    }

    bool ProcessDirEntry(PDirectoryEntry pDirEntry,vector<int> & slist,vector<int> & sslist,vector<vector<int> >& vFatOfDirEntry,int i)
    {
    string DirName;
    unsigned
    int index;
    vector
    <int> FatOfDirEntry;
    if (pDirEntry->_cb == 0)
    {
    return false;
    }
    else
    {
    char buf[256] ={0,0};
    wcstombs(buf,(wchar_t
    *)pDirEntry->_ab,(size_t)pDirEntry->_cb);
    DirName
    = buf;

    /************************************************************************/
    /* 是否是短流 */
    /************************************************************************/
    if((pDirEntry->_ulSize < MaxMiniStreamSize)&&(pDirEntry->_mse != STGTY_ROOT))
    {
    FatOfDirEntry.push_back(pDirEntry
    ->_sectStart);
    index
    = pDirEntry->_sectStart;
    while(sslist[index] != ENDOFCHAIN)
    {
    index
    =sslist[index];
    FatOfDirEntry.push_back(index);
    }
    FatOfDirEntry.push_back(sslist[index]);

    }
    else
    {
    FatOfDirEntry.push_back(pDirEntry
    ->_sectStart);
    index
    = pDirEntry->_sectStart;
    while(slist[index] != ENDOFCHAIN)
    {
    index
    =slist[index];
    FatOfDirEntry.push_back(index);
    }
    FatOfDirEntry.push_back(slist[index]);
    }
    cout
    <<DirName<<'\t';
    for (int j =0; j < FatOfDirEntry.size();j++)
    {
    cout
    <<" \" \" <<DirName<<-FAT["<<j<<"] == ";
    cout
    <<hex<<FatOfDirEntry[j]<<'\t';
    }
    cout
    <<endl;
    vFatOfDirEntry.push_back(FatOfDirEntry);
    FatOfDirEntry.clear();

    }

    return true;

    }



    bool IfReadFile(ifstream &inStream,unsigned char * buf,unsigned int iReadOffest,size_t size)
    {
    inStream.seekg(iReadOffest,ios::beg);
    inStream.read((
    char* )buf,size);
    return true;

    }

    int GetOffestFremSid(SECT sid)
    {
    return sid*SectorSize+512;
    }


    bool DumpDocHeader(PDocHeader pHeader)
    {
    cout
    <<"\t The comdoc flag is\t{ ";
    for (int i = 0; i < 8; i++)
    {
    cout
    <<hex<<(int)pHeader->_abSig[i]<<" ";
    if (i!=7)
    {
    cout
    <<',';
    }
    }
    cout
    <<'}'<<endl;
    if (pHeader->_uByteOrder == 0xFFFE)
    {
    cout
    <<"\t The file Byte order is Little-Endian"<<endl;
    }
    cout.setf(ios::dec,ios::basefield);
    SectorSize
    = (int)pow((double)2,(int)pHeader->_uSectorShift);

    cout
    <<"\t Size of a Sector in the compound document file is "<<SectorSize<<endl;
    miniSectorSize
    = (int)pow((double)2,(int)pHeader->_uMiniSectorShift);
    cout
    <<"\t Size of a short-sector in the short-stream container stream is "<<miniSectorSize<<endl;
    cout
    <<"\t Total number of sectors used for the sector allocation table is "<<(DWORD)pHeader->_csectFat<<endl;
    cout
    <<"\t SecID of first sector of the directory stream is "<<(ULONG)pHeader->_sectDirStart<<endl;
    cout
    <<"\t Minimum size of a standard stream is "<<(ULONG)pHeader->_ulMiniSectorCutoff<<endl;
    cout
    <<"\t SecID of first sector of the short-sector allocation table is "<<(ULONG)pHeader->_sectMiniFatStart<<endl;
    cout
    <<"\t Total number of sectors used for the short-sector allocation table is "<<(ULONG)pHeader->_csectMiniFat<<endl;
    cout
    <<"\t SecID of first sector of the master sector allocation table is "<<pHeader->_sectDifStart<<endl;
    cout
    <<"\t Total number of sectors used for the master sector allocation table is "<<(ULONG)pHeader->_csectDif<<endl;
    cout
    <<"\t First part of the master sector allocation table containing 109 SecIDs is "<<endl;
    cout
    <<"\t {\t";
    for (i = 0;i<109;i++)
    {
    if ( pHeader->_sectFat[i] != -1 )
    {
    cout
    <<pHeader->_sectFat[i]<<'\t';
    if (i/20 !=0)
    {
    cout
    <<endl;
    }
    }
    else
    {
    break;
    }
    }
    cout
    <<'}'<<endl;
    return true;
    }
    
    
  • 相关阅读:
    【笔记】Cross Join&lag与lead函数
    【笔记】Oracle 窗口函数
    【笔记】greatest/least函数&Round函数
    【笔记】Oracle列转行unpivot&行转列 PIVOT
    【Oracle】ORDER BY 2 DESC,1 ASC,同时对多个数据列进行不同的顺序排序&Oracle中的 (+)
    PostgreSQL 14.4的安装以及使用以及一些安装的异常
    如何设计一个keyvalue存储
    Implementing a KeyValue Store – Part 5: Hash table implementations
    Using existing keyvalue stores as models
    Implementing a KeyValue Store – Part 6: OpenAddressing Hash Tables
  • 原文地址:https://www.cnblogs.com/zhyryxz/p/1987011.html
Copyright © 2020-2023  润新知