无论用什么框架训练的模型,推荐转为onnx格式,方便部署。
支持onnx模型的框架如下:
- TensorRT:英伟达的,用于GPU推理加速。注意需要英伟达GPU硬件的支持。
- OpenVino:英特尔的,用于CPU推理加速。注意需要英特尔CPU硬件的支持。
- ONNXRuntime:微软,亚马逊 ,Facebook 和 IBM 等公司共同开发的,可用于GPU、CPU
- OpenCV dnn:OpenCV的调用模型的模块
pt格式的模型,可以用Pytorch框架部署。
推理效率上:TensorRT>OpenVino>ONNXRuntime>OpenCV dnn>Pytorch
由于电脑只有CPU,因此研究下OpenVino、ONNXRuntime、OpenCV dnn的C++使用。
【ONNXRuntime C++】
mini-batches of 3-channel RGB images of shape (N x 3 x H x W), where N is the batch size, and H and W are expected to be at least 224。
#include <onnxruntime_cxx_api.h> #include <opencv2/dnn/dnn.hpp> #include <opencv2/imgcodecs.hpp> #include <opencv2/imgproc.hpp> #include <chrono> #include <cmath> #include <exception> #include <fstream> #include <iostream> #include <limits> #include <numeric> #include <string> #include <vector> template <typename T> T vectorProduct(const std::vector<T>& v) { return accumulate(v.begin(), v.end(), 1, std::multiplies<T>()); } /** * @brief Operator overloading for printing vectors * @tparam T * @param os * @param v * @return std::ostream& */ template <typename T> std::ostream& operator<<(std::ostream& os, const std::vector<T>& v) { os << "["; for (int i = 0; i < v.size(); ++i) { os << v[i]; if (i != v.size() - 1) { os << ", "; } } os << "]"; return os; } /** * @brief Print ONNX tensor data type * https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_c_api.h#L93 * @param os * @param type * @return std::ostream& */ std::ostream& operator<<(std::ostream& os, const ONNXTensorElementDataType& type) { switch (type) { case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED: os << "undefined"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: os << "float"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: os << "uint8_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: os << "int8_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16: os << "uint16_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16: os << "int16_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: os << "int32_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: os << "int64_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING: os << "std::string"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL: os << "bool"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16: os << "float16"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE: os << "double"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32: os << "uint32_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64: os << "uint64_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64: os << "float real + float imaginary"; break; case ONNXTensorElementDataType:: ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128: os << "double real + float imaginary"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16: os << "bfloat16"; break; default: break; } return os; } std::vector<std::string> readLabels(std::string& labelFilepath) { std::vector<std::string> labels; std::string line; std::ifstream fp(labelFilepath); while (std::getline(fp, line)) { labels.push_back(line); } return labels; } int main() { bool useCUDA{ false }; std::string instanceName{ "image-classification-inference" }; std::string modelFilepath{ "best-sim.onnx" }; std::string imageFilepath{ "D:/barcode.jpg" }; std::string labelFilepath{ "label.txt" }; //读取 txt std::vector<std::string> labels{ readLabels(labelFilepath) }; //指定日志严重性记录级别(onnxruntime必须指定) Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, instanceName.c_str()); Ort::SessionOptions sessionOptions; //sessionOptions.SetIntraOpNumThreads(1); if (useCUDA) { // Using CUDA backend // https://github.com/microsoft/onnxruntime/blob/v1.8.2/include/onnxruntime/core/session/onnxruntime_cxx_api.h#L329 //OrtCUDAProviderOptions cuda_options{ 0 }; //sessionOptions.AppendExecutionProvider_CUDA(cuda_options); } // 设置优化等级 // Available levels are // ORT_DISABLE_ALL -> To disable all optimizations // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals) // ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions) // ORT_ENABLE_ALL -> To Enable All possible optimizations sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); //读取 模型 std::wstring widestr = std::wstring(modelFilepath.begin(), modelFilepath.end());//string转wstring Ort::Session session(env, widestr.c_str(), sessionOptions); Ort::AllocatorWithDefaultOptions allocator; size_t numInputNodes = session.GetInputCount(); size_t numOutputNodes = session.GetOutputCount(); std::cout << "Number of Input Nodes: " << numInputNodes << std::endl; std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl; const char* inputName = session.GetInputName(0, allocator); std::cout << "Input Name: " << inputName << std::endl; Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0); auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo(); ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType(); std::cout << "Input Type: " << inputType << std::endl; std::vector<int64_t> inputDims = inputTensorInfo.GetShape(); std::cout << "Input Dimensions: " << inputDims << std::endl; const char* outputName = session.GetOutputName(0, allocator); std::cout << "Output Name: " << outputName << std::endl; Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0); auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo(); ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType(); std::cout << "Output Type: " << outputType << std::endl; std::vector<int64_t> outputDims = outputTensorInfo.GetShape(); std::cout << "Output Dimensions: " << outputDims << std::endl; //【图像标准化】——————————————————————————————————————————————————————————————————————【begin】 //操作思路:BGR图——缩放——转RGB——归一化[0,1]——HWC转CHW——转std::vector<float>(模型的输入) //OpenCV读入的图像是HWC格式,PyTorch是CHW(其依赖的cuda和cudnn被设计为CHW,便于卷积等运算) cv::Mat imageBGR = cv::imread(imageFilepath, cv::ImreadModes::IMREAD_COLOR); cv::Mat preprocessedImage; //RRR-GGG-BBB顺序 cv::dnn::blobFromImage(imageBGR, preprocessedImage,1/255.0, cv::Size(inputDims.at(2), inputDims.at(3)), cv::Scalar(0, 0, 0), true, false); //缩放——转RGB——归一化[0,1]——HWC转CHW //3通道转一维向量std::vector<float> std::vector<float> inputTensorValues; inputTensorValues.assign(preprocessedImage.begin<float>(), preprocessedImage.end<float>()); //【图像标准化】——————————————————————————————————————————————————————————————————————【end】 size_t outputTensorSize = vectorProduct(outputDims); //assert(("Output tensor size should equal to the label set size.", labels.size() == outputTensorSize)); std::vector<float> outputTensorValues(outputTensorSize); std::vector<const char*> inputNames{ inputName }; std::vector<const char*> outputNames{ outputName }; //std::vector<Ort::Value> inputTensors; //std::vector<Ort::Value> outputTensors; Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(), inputTensorValues.size(), inputDims.data(), inputDims.size()); //预测 std::vector<Ort::Value> ort_outputs= session.Run(Ort::RunOptions{ nullptr }, inputNames.data(), &inputTensor, inputNames.size(), outputNames.data(), outputNames.size()); //画框继续研究中………… //获取结果的类别序号、标签、置信度 int predId = 0; float activation = 0; float maxActivation = std::numeric_limits<float>::lowest(); float expSum = 0; for (int i = 0; i < labels.size(); i++) { activation = outputTensorValues.at(i); expSum += std::exp(activation); if (activation > maxActivation) { predId = i; maxActivation = activation; } } std::cout << "Predicted Label ID: " << predId << std::endl; std::cout << "Predicted Label: " << labels.at(predId) << std::endl; std::cout << "Uncalibrated Confidence: " << std::exp(maxActivation) / expSum << std::endl; // 100次平均值 int numTests{ 100 }; std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); for (int i = 0; i < numTests; i++) { session.Run(Ort::RunOptions{ nullptr }, inputNames.data(), &inputTensor, inputNames.size(), outputNames.data(), outputNames.size()); } std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); std::cout << "Mean Inference Latency: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() / static_cast<float>(numTests) << " ms" << std::endl; }
leimao博主的案例leimao/ONNX-Runtime-Inference: ONNX Runtime Inference C++ Example (github.com)
官方教程onnxruntime-inference-examples/c_cxx at main · microsoft/onnxruntime-inference-examples (github.com)