背景
涉及对象传输或存储时,均需要序列化的参与,将对象转为连续的二进制数据后才能进行传输或存储,需要还原对象时,通过反序列化逆向处理二进制数据遍能得到原对象
- 这里的对象是一种广泛的概念,往大了说就是一段有意义的内存
实现
- 实现过程中主要使用模板应对各种类型,特化+宏应对基础数据类型,逐层处理应对STL容器嵌套情况
- C++语言本身不带反射,自定义结构需继承接口类Serializable,并实现相应接口
- 实现时使用了std::move()进行构造与赋值,自定义结构最好实现自己的移动构造函数与移动赋值函数
- 获取序列化buffer时,先计算序列化目标在序列化后的总字节数,一次性申请足够内存
- 获取序列化buffer时,返回的buffer之前藏了序列化目标的总字节数,藏东西这招跟redis学的
- 藏的目标总字节数目的用于,反序列化前简单判断待反序列化buffer是否有效
- 未支持指针类型,传递错误类型的指针(强转目标类型与实际类型不符合)会导致内存错误,序列化过程中没法判断类型是否正确
- 未支持C风格字符串,C风格字符串以0结尾,非二进制安全
- 需要序列化一段连续内存时,构造std::string后传入
- 反序列化时,调用者需自行保证,待反序列化buffer的原始类型与目标类型一致,代码不能识别出类型差异与结构改变
- 使用方式 申请序列化buffer->序列化->三方库压缩->三方库加密->传输/落盘->接收/读取->三方库解密->三方库解压->简单检查->反序列化->释放buffer
- 序列化后的数据格式如下
- 先写入对象序列化后总长度,后面才接着写入对象序列化数据
- 对象是基础类型则直接写入数据
- 对象是STL容器先写入元素个数,再写入每个元素序列化数据
- 如果元素是std::pair
l类型,则先写入序列化Key,再写入序列化Value
- 如果元素是std::pair
- 对象是自定义结构,则需用户定义数据格式
- 先写入对象序列化后总长度,后面才接着写入对象序列化数据
总体结构:
对象序列化后总长度 | 对象序列化数据 |
---|
对象是基础数据:
对象序列化后总长度 | 对象序列化数据(charintshortlong.........) |
---|
对象是STL容器:
对象序列化后总长度 | 元素个数 | 元素1序列化数据 | 元素2序列化数据 | 元素3序列化数据 | ..... | 元素n序列化数据 |
---|
对象是STL容器之Map:
对象序列化后总长度 | 元素个数 | Pair 1之Key序列化数据 | Pair 1之Value序列化数据 | ..... | Pair n之Key序列化数据 | Pair n之Value序列化数据 |
---|
代码
#ifndef _Serialization_H_
#define _Serialization_H_
#include <map>
#include <set>
#include <list>
#include <string>
#include <vector>
//基础数据长度计算
#define BASIC_DATA_LENGTH(BasicType)
template<>
static unsigned Length(const BasicType& elem){
return sizeof(elem);
}
//申请用于序列化的buffer 总格式 |数据总长度|数据|
#define MAKE_BUFFER(uLen){
char *pBuffer = new char[uLen + sizeof(LEN)];
if(nullptr == pBuffer){
return nullptr;
}
*reinterpret_cast<LEN *const>(pBuffer) = uLen;
return pBuffer + sizeof(LEN);
}
//基础数据序列化buffer获取
#define BASIC_DATA_GET_BUFFER(BasicType)
template<>
static char *const GetBuffer(const BasicType& elem){
LEN uLen = sizeof(elem);
MAKE_BUFFER(uLen);
}
//基础数据序列化主体
#define BASIC_DATA_SERIALIZE(BasicType)
template<>
static unsigned Serialize(const BasicType& elem, char *const pBuffer){
::memcpy(pBuffer, &elem, sizeof(BasicType));
return sizeof(BasicType);
}
//基础数据反序列化主体
#define BASIC_DATA_UNSERIALIZE(BasicType)
template<>
static unsigned UnSerialize(const char *const pBuffer, BasicType& elem){
::memcpy(&elem, pBuffer, sizeof(BasicType));
return sizeof(BasicType);
}
namespace Serialization{
//存放长度信息类型
using LEN = unsigned;
//自定义结构序列化基类
class Serializable{
public:
virtual unsigned Length() const = 0;
virtual unsigned Serialize(char *const pBuffer) const = 0;
virtual unsigned UnSerialize(const char *const pBuffer) = 0;
virtual ~Serializable(){};
};
/*
前项声明部分,STL容器嵌套编译问题
*/
template<typename Elem>
unsigned Length(const std::list<Elem>& list);
template<typename Key, typename Value>
unsigned Length(const std::map<Key, Value>& map);
template<typename Elem>
unsigned Length(const std::vector<Elem>& vec);
template<typename Elem>
unsigned Length(const std::set<Elem>& set);
template<typename Elem>
char *const GetBuffer(const std::list<Elem>& list);
template<typename Key, typename Value>
char *const GetBuffer(const std::map<Key, Value>& map);
template<typename Elem>
char *const GetBuffer(const std::vector<Elem>& vec);
template<typename Elem>
char *const GetBuffer(const std::set<Elem>& set);
template<typename Elem>
unsigned Serialize(const std::list<Elem>& list, char *const pBuffer);
template<typename Key, typename Value>
unsigned Serialize(const std::map<Key, Value>& map, char *const pBuffer);
template<typename Elem>
unsigned Serialize(const std::vector<Elem>& vec, char *const pBuffer);
template<typename Elem>
unsigned Serialize(const std::set<Elem>& set, char *const pBuffer);
template<typename Elem>
unsigned UnSerialize(const char* pBuffer, std::list<Elem>& list);
template<typename Key, typename Value>
unsigned UnSerialize(const char* pBuffer, std::map<Key, Value>& map);
template<typename Elem>
unsigned UnSerialize(const char* pBuffer, std::vector<Elem>& vec);
template<typename Elem>
unsigned UnSerialize(const char* pBuffer, std::set<Elem>& set);
/*
长度计算部分
得到数据长度
*/
template<typename Elem>
static unsigned Length(const Elem& elem){
return elem.Length();
}
BASIC_DATA_LENGTH(char)
BASIC_DATA_LENGTH(short)
BASIC_DATA_LENGTH(int)
BASIC_DATA_LENGTH(float)
BASIC_DATA_LENGTH(long)
BASIC_DATA_LENGTH(double)
BASIC_DATA_LENGTH(long long)
BASIC_DATA_LENGTH(unsigned char)
BASIC_DATA_LENGTH(unsigned short)
BASIC_DATA_LENGTH(unsigned int)
BASIC_DATA_LENGTH(unsigned long)
BASIC_DATA_LENGTH(unsigned long long)
template<>
static unsigned Length(const std::string& str){
return sizeof(LEN) + static_cast<unsigned>(str.size()); //容器对象 格式 |元素个数|数据|
}
template<typename Elem>
static unsigned Length(const std::list<Elem>& list){
LEN uLen = 0;
for(const auto& elem : list){
uLen += Length(elem);
}
return sizeof(LEN) + uLen;
}
template<typename Key, typename Value>
static unsigned Length(const std::map<Key, Value>& map){
LEN uLen = 0;
for(const auto& elem : map){
uLen += Length(elem.first);
uLen += Length(elem.second);
}
return sizeof(LEN) + uLen;
}
template<typename Elem>
static unsigned Length(const std::vector<Elem>& vec){
LEN uLen = 0;
for(const auto& elem : vec){
uLen += Length(elem);
}
return sizeof(LEN) + uLen;
}
template<typename Elem>
static unsigned Length(const std::set<Elem>& set){
LEN uLen = 0;
for(const auto& elem : set){
uLen += Length(elem);
}
return sizeof(LEN) + uLen;
}
/*
buffer申请、释放部分
一次性申请足够内存
*/
template<typename Elem>
static char *const GetBuffer(const Elem& elem){
LEN uLen = elem.Length();
MAKE_BUFFER(uLen);
}
BASIC_DATA_GET_BUFFER(char)
BASIC_DATA_GET_BUFFER(short)
BASIC_DATA_GET_BUFFER(int)
BASIC_DATA_GET_BUFFER(float)
BASIC_DATA_GET_BUFFER(long)
BASIC_DATA_GET_BUFFER(double)
BASIC_DATA_GET_BUFFER(long long)
BASIC_DATA_GET_BUFFER(unsigned char)
BASIC_DATA_GET_BUFFER(unsigned short)
BASIC_DATA_GET_BUFFER(unsigned int)
BASIC_DATA_GET_BUFFER(unsigned long)
BASIC_DATA_GET_BUFFER(unsigned long long)
template<>
static char *const GetBuffer(const std::string& str){
LEN uLen = Length(str);
MAKE_BUFFER(uLen);
}
template<typename Elem>
static char *const GetBuffer(const std::list<Elem>& list){
LEN uLen = Length(list);
MAKE_BUFFER(uLen);
}
template<typename Key, typename Value>
static char *const GetBuffer(const std::map<Key, Value>& map){
LEN uLen = Length(map);
MAKE_BUFFER(uLen);
}
template<typename Elem>
static char *const GetBuffer(const std::vector<Elem>& vec){
LEN uLen = Length(vec);
MAKE_BUFFER(uLen);
}
template<typename Elem>
static char *const GetBuffer(const std::set<Elem>& set){
LEN uLen = Length(set);
MAKE_BUFFER(uLen);
}
static void ReleaseBuffer(char *const pBuffer){
delete[] (pBuffer - sizeof(LEN));
}
/*
序列化部分
*/
template<typename Elem>
static unsigned Serialize(const Elem& elem, char *const pBuffer){
return elem.Serialize(pBuffer);
}
BASIC_DATA_SERIALIZE(char)
BASIC_DATA_SERIALIZE(short)
BASIC_DATA_SERIALIZE(int)
BASIC_DATA_SERIALIZE(float)
BASIC_DATA_SERIALIZE(long)
BASIC_DATA_SERIALIZE(double)
BASIC_DATA_SERIALIZE(long long)
BASIC_DATA_SERIALIZE(unsigned char)
BASIC_DATA_SERIALIZE(unsigned short)
BASIC_DATA_SERIALIZE(unsigned int)
BASIC_DATA_SERIALIZE(unsigned long)
BASIC_DATA_SERIALIZE(unsigned long long)
template<>
static unsigned Serialize(const std::string& str, char* const pBuffer){
*reinterpret_cast<LEN *const>(pBuffer) = static_cast<LEN>(str.size()); //元素个数
::memcpy(pBuffer + sizeof(LEN), str.data(), str.size()); //数据
return static_cast<unsigned>(str.size()) + sizeof(LEN);
}
template<typename Elem>
static unsigned Serialize(const std::list<Elem>& list, char *const pBuffer){
unsigned uPos = sizeof(LEN);
*reinterpret_cast<LEN *const>(pBuffer) = static_cast<LEN>(list.size()); //元素个数
for(const auto& elem : list){
uPos += Serialize(elem, pBuffer + uPos);
}
return uPos;
}
template<typename Key, typename Value>
static unsigned Serialize(const std::map<Key, Value>& map, char *const pBuffer){
unsigned uPos = sizeof(LEN);
*reinterpret_cast<LEN *const>(pBuffer) = static_cast<LEN>(map.size()); //元素个数
for(const auto& elem : map){
uPos += Serialize(elem.first, pBuffer + uPos);
uPos += Serialize(elem.second, pBuffer + uPos);
}
return uPos;
}
template<typename Elem>
static unsigned Serialize(const std::vector<Elem>& vec, char *const pBuffer){
unsigned uPos = sizeof(LEN);
*reinterpret_cast<LEN *const>(pBuffer) = static_cast<LEN>(vec.size()); //元素个数
for(const auto& elem : vec){
uPos += Serialize(elem, pBuffer + uPos);
}
return uPos;
}
template<typename Elem>
static unsigned Serialize(const std::set<Elem>& set, char *const pBuffer){
unsigned uPos = sizeof(LEN);
*reinterpret_cast<LEN *const>(pBuffer) = static_cast<LEN>(set.size()); //元素个数
for(const auto& elem : set){
uPos += Serialize(elem, pBuffer + uPos);
}
return uPos;
}
/*
检查反序列化之前的buffer
BufferLen指包含数据总长度在内的长度,即从硬盘上读取并解压缩后的整个buffer长度
*/
static int CheckLength(const char *const pBuffer, int BufferLen){
if(*reinterpret_cast<const LEN *const>(pBuffer - sizeof(LEN)) + sizeof(LEN) != BufferLen){
return 0;
}
return 1;
}
/*
反序列化部分
*/
template<typename Elem>
static unsigned UnSerialize(const char* pBuffer, Elem& elem){
return elem.UnSerialize(pBuffer);
}
BASIC_DATA_UNSERIALIZE(char)
BASIC_DATA_UNSERIALIZE(short)
BASIC_DATA_UNSERIALIZE(int)
BASIC_DATA_UNSERIALIZE(float)
BASIC_DATA_UNSERIALIZE(long)
BASIC_DATA_UNSERIALIZE(double)
BASIC_DATA_UNSERIALIZE(long long)
BASIC_DATA_UNSERIALIZE(unsigned char)
BASIC_DATA_UNSERIALIZE(unsigned short)
BASIC_DATA_UNSERIALIZE(unsigned int)
BASIC_DATA_UNSERIALIZE(unsigned long)
BASIC_DATA_UNSERIALIZE(unsigned long long)
template<>
static unsigned UnSerialize(const char* pBuffer, std::string& str){
LEN uLen = *reinterpret_cast<const LEN *>(pBuffer);
std::string strTemp(pBuffer + sizeof(LEN), uLen);
str = std::move(strTemp);
return uLen + sizeof(LEN);
}
template<typename Elem>
static unsigned UnSerialize(const char* pBuffer, std::list<Elem>& list){
LEN uLen = *reinterpret_cast<const LEN *>(pBuffer);
unsigned uPos = sizeof(LEN);
for(LEN i = 0; i < uLen; i++){
Elem elem;
uPos += UnSerialize(pBuffer + uPos, elem);
list.push_back(std::move(elem));
}
return uPos;
}
template<typename Key, typename Value>
static unsigned UnSerialize(const char* pBuffer, std::map<Key, Value>& map){
LEN uLen = *reinterpret_cast<const LEN *>(pBuffer);
unsigned uPos = sizeof(LEN);
for(LEN i = 0; i < uLen; i++){
Key key;
Value val;
uPos += UnSerialize(pBuffer + uPos, key);
uPos += UnSerialize(pBuffer + uPos, val);
map.insert(std::make_pair(std::move(key), std::move(val)));
}
return uPos;
}
template<typename Elem>
static unsigned UnSerialize(const char* pBuffer, std::vector<Elem>& vec){
LEN uLen = *reinterpret_cast<const LEN *>(pBuffer);
vec.resize(uLen);
unsigned uPos = sizeof(LEN);
for(LEN i = 0; i < uLen; i++){
Elem elem;
uPos += UnSerialize(pBuffer + uPos, elem);
vec[i] = std::move(elem);
}
return uPos;
}
template<typename Elem>
static unsigned UnSerialize(const char* pBuffer, std::set<Elem>& set){
LEN uLen = *reinterpret_cast<const LEN *>(pBuffer);
unsigned uPos = sizeof(LEN);
for(LEN i = 0; i < uLen; i++){
Elem elem;
uPos += UnSerialize(pBuffer + uPos, elem);
set.emplace(std::move(elem));
}
return uPos;
}
};
#endif // !_Serialization_H_
测试代码
#include <cstdlib>
#include <cassert>
#include <iostream>
#include "Serialization.h"
template<typename T>
void Test(const T& a){
std::cout << Serialization::Length(a) << std::endl;
auto pBuffer = Serialization::GetBuffer(a);
auto len = Serialization::Serialize(a, pBuffer);
std::cout << len << std::endl;
assert(1 == Serialization::CheckLength(pBuffer, Serialization::Length(a) + sizeof(Serialization::LEN)));
T b = T();
len = Serialization::UnSerialize(pBuffer, b);
std::cout << len << std::endl;
Serialization::ReleaseBuffer(pBuffer);
assert(a == b);
}
class MyTest : public Serialization::Serializable{
public:
MyTest() : m_i(0), m_c(0), m_d(0){}
MyTest(int i, char c, double d, const std::string& str, const std::list<int>& list,
const std::map<int, double>& map, const std::vector<int>& vec, const std::set<int>& set)
:m_i(i), m_c(c), m_d(d), m_str(str), m_list(list), m_map(map), m_vec(vec), m_set(set){}
MyTest(int i, char c, double d, std::string&& str, std::list<int>&& list,
std::map<int, double>&& map, std::vector<int>&& vec, std::set<int>&& set) //针对列表初始化
:m_i(i), m_c(c), m_d(d), m_str(std::move(str)), m_list(std::move(list)), m_map(std::move(map)), m_vec(std::move(vec)), m_set(std::move(set)){}
bool operator==(const MyTest& rhs) const{
return m_i == rhs.m_i && m_c == rhs.m_c && m_d == rhs.m_d && m_str == rhs.m_str && m_list == rhs.m_list && m_map == rhs.m_map && m_vec == rhs.m_vec && m_set == rhs.m_set;
}
virtual unsigned Length() const override{
return Serialization::Length(m_i) + Serialization::Length(m_c) + Serialization::Length(m_d) + Serialization::Length(m_str) + Serialization::Length(m_list)
+ Serialization::Length(m_map) + Serialization::Length(m_vec) + Serialization::Length(m_set);
}
virtual unsigned Serialize(char *const pBuffer) const override{
unsigned uPos = 0;
uPos += Serialization::Serialize(m_i, pBuffer + uPos);
uPos += Serialization::Serialize(m_c, pBuffer + uPos);
uPos += Serialization::Serialize(m_d, pBuffer + uPos);
uPos += Serialization::Serialize(m_str, pBuffer + uPos);
uPos += Serialization::Serialize(m_list, pBuffer + uPos);
uPos += Serialization::Serialize(m_map, pBuffer + uPos);
uPos += Serialization::Serialize(m_vec, pBuffer + uPos);
uPos += Serialization::Serialize(m_set, pBuffer + uPos);
return uPos;
}
virtual unsigned UnSerialize(const char *const pBuffer)override{
unsigned uPos = 0;
uPos += Serialization::UnSerialize(pBuffer + uPos, m_i);
uPos += Serialization::UnSerialize(pBuffer + uPos, m_c);
uPos += Serialization::UnSerialize(pBuffer + uPos, m_d);
uPos += Serialization::UnSerialize(pBuffer + uPos, m_str);
uPos += Serialization::UnSerialize(pBuffer + uPos, m_list);
uPos += Serialization::UnSerialize(pBuffer + uPos, m_map);
uPos += Serialization::UnSerialize(pBuffer + uPos, m_vec);
uPos += Serialization::UnSerialize(pBuffer + uPos, m_set);
return uPos;
}
private:
int m_i;
char m_c;
double m_d;
std::string m_str;
std::list<int> m_list;
std::map<int, double> m_map;
std::vector<int> m_vec;
std::set<int> m_set;
};
int main(int argc, char** argv){
//基础类型
Test<char>('a');
Test<short>(-1);
Test<int>(-1);
Test<float>(-1.0);
Test<long>(-1);
Test<double>(-1.0);
Test<long long>(-1);
Test<unsigned char>(1);
Test<unsigned short>(1);
Test<unsigned int>(1);
Test<unsigned long>(1);
Test<unsigned long long>(1);
//STL容器
Test<std::string>("Test!!!");
Test<std::list<int>>({1, 2, 3, 4, 5, 6});
Test<std::map<int, double>>({{1, 1.0}, {2, 2.0}, {3, 3.0}});
Test<std::vector<int>>({1, 2, 3, 4, 5, 6});
Test<std::set<int>>({1, 2, 3, 4, 5, 6});
//STL容器嵌套
Test<std::set<std::string>>({"test1", "test2", "test3"});
Test<std::list<std::set<std::string>>>({{"test1", "test2", "test3"}, {"test1", "test2", "test3"}, {"test1", "test2", "test3"}});
Test<std::map<int, std::list<std::set<std::string>>>>({{1, {{"test1", "test2", "test3"}, {"test1", "test2", "test3"}, {"test1", "test2", "test3"}}},
{2, {{"test1", "test2", "test3"}, {"test1", "test2", "test3"}, {"test1", "test2", "test3"}}},
{3, {{"test1", "test2", "test3"}, {"test1", "test2", "test3"}, {"test1", "test2", "test3"}}}});
Test<std::vector<std::map<int, std::list<std::set<std::string>>>>>({{{1, {{"test1", "test2", "test3"}, {"test1", "test2", "test3"}, {"test1", "test2", "test3"}}},
{2, {{"test1", "test2", "test3"}, {"test1", "test2", "test3"}, {"test1", "test2", "test3"}}},
{3, {{"test1", "test2", "test3"}, {"test1", "test2", "test3"}, {"test1", "test2", "test3"}}}},
{{1, {{"test1", "test2", "test3"}, {"test1", "test2", "test3"}, {"test1", "test2", "test3"}}},
{2, {{"test1", "test2", "test3"}, {"test1", "test2", "test3"}, {"test1", "test2", "test3"}}},
{3, {{"test1", "test2", "test3"}, {"test1", "test2", "test3"}, {"test1", "test2", "test3"}}}},
{{1, {{"test1", "test2", "test3"}, {"test1", "test2", "test3"}, {"test1", "test2", "test3"}}},
{2, {{"test1", "test2", "test3"}, {"test1", "test2", "test3"}, {"test1", "test2", "test3"}}},
{3, {{"test1", "test2", "test3"}, {"test1", "test2", "test3"}, {"test1", "test2", "test3"}}}}});
//自定义结构
MyTest test(1, 'a', 1.0, "Test!!!", {1, 2, 3, 4, 5, 6}, {{1, 1.0}, {2, 2.0}, {3, 3.0}}, {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6});
Test<MyTest>(test);
system("pause");
return 0;
}
改进
- 在序列化后总长度前方增加两个字段
- 字段一:特定标识字符串,用于直接判断buffer是否是可序列化内存,如rdb文件开头就有"REDIS"
- 字段二:增加校验和字段,存储序列化后数据的校验和,用于检查序列化数据是否被篡改,如网络协议上一般都会有此字段
无法增加协议版本号,因为自定义结构的序列化、反序列化由用户控制,无法统一决定版本