torrent文件分析

torrent文件信息存储格式：

bencoding是一种以简洁格式指定和组织数据的方法。支持下列类型：字节串、整数、列表和字典。

1 字符串存储格式: <字符串的长度>:<字符串的内容>
例如：    4:spam 表示spam, 2:ab 表示ab

2 数字的存储格式: i<整数>e
例如：    i3e 表示整数3, i1024e 表示整数1024

3 列表的存储格式： l<子元素>e 其中：子元素可以是字符串，整数，列表和字典，或者是它们的组合体
例如：    l4:spam4:eggse    表示 [ "spam", "eggs" ]
        l3:logi32ee        表示 [ "log", 32 ]

4 字典的存储格式： d<<key><value><key><value><key><value>...<key><value>>e
其中：key只能是字符串类型，value则可以是字符串，整数，列表和字典，或者是它们的组合体，key和value必须是成对出现的
例如：    d3:cow3:moo4:spam4:eggse    表示 { "cow" => "moo", "spam" => "eggs" }
        d4:spaml1:a1:bee            表示 { "spam" => [ "a", "b" ] }
        d9:publisher3:bob4:spaml1:a1:be5:counti80ee 表示 { "publisher" => "bob", "spam" => [ "a", "b" ], "count" => 80 }

torrent文件的信息：

announce:                tracker服务器的URL(字符串)
announce-list(可选):    备用tracker服务器列表(列表)
creation date(可选):    种子创建的时间，Unix标准时间格式，从1970年1月1日 00:00:00到创建时间的秒数(整数)
comment(可选):            备注(字符串)
created by(可选):        创建人或创建程序的信息(字符串)

info:                一个字典结构，包含文件的主要信息，为分二种情况：单文件结构或多文件结构
    piece length:    每个块的大小，单位字节(整数)
    pieces:            每个块的20个字节的SHA1 Hash的值(二进制格式)

    单文件结构如下：
    name:            文件名(字符串)
    length:            文件长度，单位字节(整数)

    多文件结构如下：
    name:            目录名(字符串)
    files:            一个字典结构的列表，字典结构至少包含下面两个信息
        length:        文件长度，单位字节(整数)
        path:        文件的路径和名字，是一个列表结构，如"test"test.txt 列表为l4:test8test.txte

torrent文件解析的代码：

下面给出解析torrent文件C++示例代码：

//////////////////////////////////////////////////////////////////////////

// interfaceBencode.h
#pragma once
#include "interfaceString.h"

//////////////////////////////////////////////////////////////////////////
//    torrent信息存储文法分析
//    <content>    ::= <dict>
//    <dict>        ::= d<string><<string> | <int> | <dict> | <list>e
//    <list>        ::= l<string> | <int> | <dict> | <list>e
//    <string>    ::= <string length>:<string data>
//    <int>        ::= i<number>e
//////////////////////////////////////////////////////////////////////////

class INode
{
public:
    virtual ~INode(){}
    virtual bool encode(string& content) = 0;
};

class StringNode : public INode
{
public:
    virtual ~StringNode() {}
    virtual bool encode(string& content);

    string m_value;
};

class IntNode : public INode
{
public:
    virtual ~IntNode() {}
    virtual bool encode(string& content);

    int m_value;
};

class DictNode : public INode
{
public:
    virtual ~DictNode()
    {
        for (map<StringNode*, INode*>::iterator iter = m_map_nodes.begin(); iter != m_map_nodes.end(); ++iter)
        {
            delete iter->first;
            delete iter->second;
        }

        m_map_nodes.clear();
    }
    virtual bool encode(string& content);

    map<StringNode*, INode*> m_map_nodes;
};

class ListNode : public INode
{
public:
    virtual ~ListNode()
    {
        for (vector<INode*>::iterator iter = m_nodes.begin(); iter != m_nodes.end(); ++iter)
            delete *iter;

        m_nodes.clear();
    }
    virtual bool encode(string& content);

    vector<INode*> m_nodes;
};

bool StringNode::encode(string& content)
{
    if (content.size() < 3)
        return false;

    size_t pos = content.find(':', 0);
    if (pos == string::npos)
        return false;

    int count = 0;
    InterfaceString::to_number(content.substr(0, pos), count);

    m_value = content.substr(pos+1, count);
    content = content.erase(0, pos+1+count);

    return true;
}

bool IntNode::encode(string& content)
{
    if (content[0] != 'i')
    {
        // bad int node
        assert(false);
        return false;
    }

    size_t pos = content.find('e', 0);
    if (pos == string::npos)
        return false;

    string s_value = content.substr(1, pos-1);
    InterfaceString::to_number(s_value, m_value);

    content = content.erase(0, s_value.size()+2);
    return true;
}

bool DictNode::encode(string& content)
{
    if (content.empty())
        return false;

    if (content[0] != 'd')
    {
        // bad dict node
        assert(false);
        return false;
    }

    content = content.erase(0, 1);

    while (!content.empty())
    {
        StringNode* keyNode = new StringNode();
        keyNode->decode(content);

        if (content.empty())
            break;

        INode* valueNode = NULL;
        if (content[0] == 'l')
            valueNode = new ListNode();
        else if (content[0] == 'd')
            valueNode = new DictNode();
        else if (content[0] == 'i')
            valueNode = new IntNode();
        else
            valueNode = new StringNode();

        if (valueNode == NULL)
            return false;

        valueNode->encode(content);
        m_map_nodes[keyNode] = valueNode;

        if (content[0] == 'e')
        {
            content = content.erase(0, 1);
            break;
        }
    }
    return true;
}

bool ListNode::encode(string& content)
{
    if (content[0] != 'l')
    {
        // bad list node
        assert(false);
        return false;
    }

    content = content.erase(0, 1);

    while (!content.empty())
    {
        INode* valueNode = NULL;
        if (content[0] == 'l')
            valueNode = new ListNode();
        else if (content[0] == 'd')
            valueNode = new DictNode();
        else if (content[0] == 'i')
            valueNode = new IntNode();
        else
            valueNode = new StringNode();

        if (valueNode == NULL)
            return false;

        valueNode->encode(content);
        m_nodes.push_back(valueNode);

        if (content[0] == 'e')
        {
            content = content.erase(0, 1);
            break;
        }
    }
    return true;
}

注：上述代码用到了数字跟字符串之间的转换，可自行实现。上述只是分析torrent文件里的信息，如果想得到全部信息可以这样调用：

string content = "d8:ann..........e"; // content表示torrent文件的内容

DictNode* pDictNode = new DictNode();

pDictNode->encode(content);

那么所有的信息都可以在pDictNode结点里找到了。

如果你想要得到torrent文件相关字段的信息，则还需要对上述代码进行封装，下面给出我封装过的类。

////////////////////////////////////////////////

// interfaceTorrentFile.h

#pragma once

// begin namespace core_common
namespace core_common
{
class TorrentFile
{
public:
    struct files_t
    {
        string        file_path;
        uint64_t    file_size;
    };

    struct infos_t
    {
        uint64_t        piece_length;
        string            pieces;

        bool            is_dir;
        vector<files_t> files;
    };

    struct torrent_t
    {
        string            announce;
        vector<string>    announce_list;
        string            comment;
        string            create_by;
        uint64_t        create_data;
        string            encoding;
        infos_t            infos;
    };

public:

    ///将torrent文件的字符串转化为torrent结构
    static bool encode(const string& content, torrent_t& torrent);

    ///将torrent结构转化为torrent文件的字符串
    //static bool decode(const torrent_t& torrent, string& content);
};

};    // end namespace core_common

/////////////////////////////////////////////////

// interfaceTorrentFile.cpp

#include "interfaceBencode.h"

#include "InterfaceTorrent.h"

using namespace core_common;

INode* find_node(const map<StringNode*, INode*>& node_map, const string& key)
{
    for (map<StringNode*, INode*>::const_iterator iter = node_map.begin(); iter != node_map.end(); ++iter)
    {
        StringNode* pKeyNode = dynamic_cast<StringNode*>(iter->first);
        assert(pKeyNode != NULL);
        if (pKeyNode == NULL)
            return NULL;

        if ( pKeyNode->m_value == key )
            return iter->second;
    }

    return NULL;
}

string get_node_value(StringNode* strNode)
{
    return strNode == NULL ? "" : strNode->m_value;
}

uint64_t get_node_value(IntNode* intNode)
{
    return intNode == NULL ? 0 : intNode->m_value;
}

bool TorrentFile::encode(const string& torrent_content, torrent_t& torrent)
{
    string content = torrent_content;

    DictNode* pDictNode = new DictNode();
    pDictNode->encode(content);

    torrent.create_by    = get_node_value( dynamic_cast<StringNode*>(find_node(pDictNode->m_map_nodes, "created by")) );    // 查找 created by
    torrent.create_data = get_node_value( dynamic_cast<IntNode*>(find_node(pDictNode->m_map_nodes, "creation date")) );    // 查找 creation date
    torrent.encoding    = get_node_value( dynamic_cast<StringNode*>(find_node(pDictNode->m_map_nodes, "encoding")) );    // 查找 encoding
    torrent.comment        = get_node_value( dynamic_cast<StringNode*>(find_node(pDictNode->m_map_nodes, "comment")) );    // 查找 comment
    torrent.announce    = get_node_value( dynamic_cast<StringNode*>(find_node(pDictNode->m_map_nodes, "announce")) );    // 查找 announce

    // 查找 announce-list
    {
        INode* pNode = find_node(pDictNode->m_map_nodes, "announce-list");
        if (pNode != NULL)
        {
            ListNode* pValueNode = dynamic_cast<ListNode*>(pNode);
            assert(pValueNode != NULL);
            if (pValueNode == NULL)
                return false;

            for (vector<INode*>::iterator iter_announce = pValueNode->m_nodes.begin(); iter_announce != pValueNode->m_nodes.end(); ++iter_announce)
                torrent.announce_list.push_back( get_node_value( dynamic_cast<StringNode*>(*iter_announce)) );
        }
    }

    // 查找 info
    INode* pNode = find_node(pDictNode->m_map_nodes, "info");
    if (pNode != NULL)
    {
        DictNode* pValueNode = dynamic_cast<DictNode*>(pNode);
        assert(pValueNode != NULL);

        torrent.infos.piece_length    = get_node_value( dynamic_cast<IntNode*>(find_node(pValueNode->m_map_nodes, "piece length")) );    // 查找 piece length
        torrent.infos.pieces            = get_node_value( dynamic_cast<StringNode*>(find_node(pValueNode->m_map_nodes, "pieces")) );        // 查找 piece

        // 查找是否是目录
        INode* pSubNode = find_node(pValueNode->m_map_nodes, "files");
        if (pSubNode != NULL)
        {
            torrent.infos.is_dir = true;

            // 查找目录名
            string dir_name = get_node_value( dynamic_cast<StringNode*>(find_node(pValueNode->m_map_nodes, "name")) );

            // 查找子文件
            ListNode* pSubValueNode = dynamic_cast<ListNode*>(pSubNode);
            assert(pSubValueNode != NULL);
            if (pSubValueNode != NULL)
            {
                for (vector<INode*>::iterator iter_file = pSubValueNode->m_nodes.begin(); iter_file != pSubValueNode->m_nodes.end(); ++iter_file)
                {
                    DictNode* pDictNode = dynamic_cast<DictNode*>(*iter_file);
                    assert(pDictNode != NULL);
                    if (pDictNode != NULL)
                    {
                        files_t file;
                        file.file_size = get_node_value( dynamic_cast<IntNode*>(find_node(pDictNode->m_map_nodes, "length")) );

                        ListNode* pListNode = dynamic_cast<ListNode*>(find_node(pDictNode->m_map_nodes, "path"));
                        if (pListNode != NULL)
                        {
                            file.file_path = dir_name;
                            for (vector<INode*>::iterator iter_path = pListNode->m_nodes.begin(); iter_path != pListNode->m_nodes.end(); ++iter_path)
                            {
                                file.file_path += "//";
                                file.file_path += get_node_value( dynamic_cast<StringNode*>(*iter_path));
                            }
                        }

                        torrent.infos.files.push_back(file);
                    }
                }
            }
        }
        else
        {
            torrent.infos.is_dir = false;

            files_t file;
            file.file_size = get_node_value( dynamic_cast<IntNode*>(find_node(pValueNode->m_map_nodes, "length")) );
            file.file_path = get_node_value( dynamic_cast<StringNode*>(find_node(pValueNode->m_map_nodes, "name")) );

            torrent.infos.files.push_back(file);
        }
    }

    delete pDictNode;
    return true;
}

注1：该torrent_t结构只是官方发布的torrent文件可能包含的信息，如果有torrent文件有特殊结点也可自己定义，反正所有结点都能在DictNode中找到。

相关阅读:
>>> fout = open('output.txt', 'w') Traceback (most recent call last): File "<stdin>", line 1, in <module> PermissionError: [Errno 13] Permission denied: 'output.txt'
Python元组术语
Python元组与列表_元组与字典
Python元组_参数长度可变
Python元组_赋值与返回值
Python元组_不可修改
第二篇-bmob云端服务器的发现
第一篇-关于语言与计划
《JavaScript》JS中的常用方法attr(),splice()
Java接口interface,匿名内部类

原文地址：https://www.cnblogs.com/hnrainll/p/2112809.html