• onnx模型部署:TensorRT、OpenVino、ONNXRuntime、OpenCV dnn


    无论用什么框架训练的模型,推荐转为onnx格式,方便部署。

    支持onnx模型的框架如下:

    • TensorRT:英伟达的,用于GPU推理加速。注意需要英伟达GPU硬件的支持。
    • OpenVino:英特尔的,用于CPU推理加速。注意需要英特尔CPU硬件的支持。
    • ONNXRuntime:微软,亚马逊 ,Facebook 和 IBM 等公司共同开发的,可用于GPU、CPU
    • OpenCV dnn:OpenCV的调用模型的模块

    pt格式的模型,可以用Pytorch框架部署。

    推理效率上:TensorRT>OpenVino>ONNXRuntime>OpenCV dnn>Pytorch

    由于电脑只有CPU,因此研究下OpenVino、ONNXRuntime、OpenCV dnn的C++使用。

    【ONNXRuntime C++】

    mini-batches of 3-channel RGB images of shape (N x 3 x H x W), where N is the batch size, and H and W are expected to be at least 224。

    #include <onnxruntime_cxx_api.h>
    
    #include <opencv2/dnn/dnn.hpp>
    #include <opencv2/imgcodecs.hpp>
    #include <opencv2/imgproc.hpp>
    
    #include <chrono>
    #include <cmath>
    #include <exception>
    #include <fstream>
    #include <iostream>
    #include <limits>
    #include <numeric>
    #include <string>
    #include <vector>
    
    template <typename T>
    T vectorProduct(const std::vector<T>& v)
    {
        return accumulate(v.begin(), v.end(), 1, std::multiplies<T>());
    }
    
    /**
     * @brief Operator overloading for printing vectors
     * @tparam T
     * @param os
     * @param v
     * @return std::ostream&
     */
    template <typename T>
    std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
    {
        os << "[";
        for (int i = 0; i < v.size(); ++i)
        {
            os << v[i];
            if (i != v.size() - 1)
            {
                os << ", ";
            }
        }
        os << "]";
        return os;
    }
    
    /**
     * @brief Print ONNX tensor data type
     * https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_c_api.h#L93
     * @param os
     * @param type
     * @return std::ostream&
     */
    std::ostream& operator<<(std::ostream& os,
        const ONNXTensorElementDataType& type)
    {
        switch (type)
        {
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED:
            os << "undefined";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
            os << "float";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
            os << "uint8_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8:
            os << "int8_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
            os << "uint16_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16:
            os << "int16_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32:
            os << "int32_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
            os << "int64_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING:
            os << "std::string";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
            os << "bool";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16:
            os << "float16";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE:
            os << "double";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32:
            os << "uint32_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64:
            os << "uint64_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64:
            os << "float real + float imaginary";
            break;
            case ONNXTensorElementDataType::
            ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128:
                os << "double real + float imaginary";
                break;
            case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16:
                os << "bfloat16";
                break;
            default:
                break;
        }
    
        return os;
    }
    
    std::vector<std::string> readLabels(std::string& labelFilepath)
    {
        std::vector<std::string> labels;
        std::string line;
        std::ifstream fp(labelFilepath);
        while (std::getline(fp, line))
        {
            labels.push_back(line);
        }
        return labels;
    }
    
    int main()
    {
        bool useCUDA{ false };
    
        std::string instanceName{ "image-classification-inference" };
        std::string modelFilepath{ "best-sim.onnx" };
        std::string imageFilepath{ "D:/barcode.jpg" };
        std::string labelFilepath{ "label.txt" };
        //读取 txt
        std::vector<std::string> labels{ readLabels(labelFilepath) };
        //指定日志严重性记录级别(onnxruntime必须指定)
        Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, instanceName.c_str());
        Ort::SessionOptions sessionOptions;
        //sessionOptions.SetIntraOpNumThreads(1);
        if (useCUDA)
        {
            // Using CUDA backend
            // https://github.com/microsoft/onnxruntime/blob/v1.8.2/include/onnxruntime/core/session/onnxruntime_cxx_api.h#L329
            //OrtCUDAProviderOptions cuda_options{ 0 };
            //sessionOptions.AppendExecutionProvider_CUDA(cuda_options);
        }
    
        // 设置优化等级
        // Available levels are
        // ORT_DISABLE_ALL -> To disable all optimizations
        // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals) 
        // ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions)
        // ORT_ENABLE_ALL -> To Enable All possible optimizations
        sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
        //读取 模型        
        std::wstring widestr = std::wstring(modelFilepath.begin(), modelFilepath.end());//string转wstring
        Ort::Session session(env, widestr.c_str(), sessionOptions);
    
        Ort::AllocatorWithDefaultOptions allocator;
    
        size_t numInputNodes = session.GetInputCount();
        size_t numOutputNodes = session.GetOutputCount();
        std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
        std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;
    
        const char* inputName = session.GetInputName(0, allocator);
        std::cout << "Input Name: " << inputName << std::endl;
    
        Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
        auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();
    
        ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
        std::cout << "Input Type: " << inputType << std::endl;
        std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
        std::cout << "Input Dimensions: " << inputDims << std::endl;
        const char* outputName = session.GetOutputName(0, allocator);
        std::cout << "Output Name: " << outputName << std::endl;
    
        Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
        auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();
    
        ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
        std::cout << "Output Type: " << outputType << std::endl;
        std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
        std::cout << "Output Dimensions: " << outputDims << std::endl;
    
        //【图像标准化】——————————————————————————————————————————————————————————————————————【begin】
        //操作思路:BGR图——缩放——转RGB——归一化[0,1]——HWC转CHW——转std::vector<float>(模型的输入)
        //OpenCV读入的图像是HWC格式,PyTorch是CHW(其依赖的cuda和cudnn被设计为CHW,便于卷积等运算)
        cv::Mat imageBGR = cv::imread(imageFilepath, cv::ImreadModes::IMREAD_COLOR);
        cv::Mat preprocessedImage;      //RRR-GGG-BBB顺序
        cv::dnn::blobFromImage(imageBGR, preprocessedImage,1/255.0, cv::Size(inputDims.at(2), inputDims.at(3)), cv::Scalar(0, 0, 0), true, false);  //缩放——转RGB——归一化[0,1]——HWC转CHW   
        //3通道转一维向量std::vector<float>
        std::vector<float> inputTensorValues;
        inputTensorValues.assign(preprocessedImage.begin<float>(), preprocessedImage.end<float>());
        //【图像标准化】——————————————————————————————————————————————————————————————————————【end】
    
        size_t outputTensorSize = vectorProduct(outputDims);
        //assert(("Output tensor size should equal to the label set size.", labels.size() == outputTensorSize));
        std::vector<float> outputTensorValues(outputTensorSize);
    
        std::vector<const char*> inputNames{ inputName };
        std::vector<const char*> outputNames{ outputName };
        //std::vector<Ort::Value> inputTensors;
        //std::vector<Ort::Value> outputTensors;
    
        Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
        Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(), inputTensorValues.size(), inputDims.data(), inputDims.size());
        //预测
        std::vector<Ort::Value> ort_outputs= session.Run(Ort::RunOptions{ nullptr }, inputNames.data(), &inputTensor, inputNames.size(), outputNames.data(), outputNames.size());
           
        //画框继续研究中…………
        //获取结果的类别序号、标签、置信度
        int predId = 0;
        float activation = 0;
        float maxActivation = std::numeric_limits<float>::lowest();
        float expSum = 0;
        for (int i = 0; i < labels.size(); i++)
        {
            activation = outputTensorValues.at(i);
            expSum += std::exp(activation);
            if (activation > maxActivation)
            {
                predId = i;
                maxActivation = activation;
            }
        }
        std::cout << "Predicted Label ID: " << predId << std::endl;
        std::cout << "Predicted Label: " << labels.at(predId) << std::endl;
        std::cout << "Uncalibrated Confidence: " << std::exp(maxActivation) / expSum << std::endl;
    
        // 100次平均值
        int numTests{ 100 };
        std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
        for (int i = 0; i < numTests; i++)
        {
            session.Run(Ort::RunOptions{ nullptr }, inputNames.data(), &inputTensor, inputNames.size(), outputNames.data(), outputNames.size());
        }
        std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
        std::cout << "Mean Inference Latency: "
            << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() / static_cast<float>(numTests)
            << " ms" << std::endl;
    }

    leimao博主的案例leimao/ONNX-Runtime-Inference: ONNX Runtime Inference C++ Example (github.com)

    官方教程onnxruntime-inference-examples/c_cxx at main · microsoft/onnxruntime-inference-examples (github.com)

  • 相关阅读:
    期末考试(优先队列)
    看病要排队《优先队列》
    Windows Message Queue(优先队列)
    Stones(优先队列)
    懒省事的小明(优先队列)
    产生冠军(set,map,拓扑结构三种方法)
    Web轻量级扫描工具Skipfish
    Web侦察工具HTTrack (爬取整站)
    文件上传漏洞绕过技巧
    Python爬虫之selenium的使用(八)
  • 原文地址:https://www.cnblogs.com/xixixing/p/15830977.html
Copyright © 2020-2023  润新知