• caffe网络在多线程中无法使用GPU的解决方案 | cpp caffe net run in multiple threads


    本文首发于个人博客https://kezunlin.me/post/8d877e63/,欢迎阅读!

    cpp caffe net run in multiple threads

    Guide

    set_mode

    Caffe fails to use GPU in a new thread ???
    see here

    the `Caffe::mode_` variable that controls this is thread-local,
    so ensure you’re calling `caffe.set_mode_gpu()` in each thread
    before running any Caffe functions. That should solve your issue.
    
    Caffe set_mode GPU 在多线程下失效
    在main thread中设置GPU模式,在worker thread中调用网络进行检测,
    GPU模式不起效,默认仍然使用CPU模式,所以速度很慢,和GPU相比慢了
    10倍左右。
    
    解决方案:在子线程中set_mode,然后调用网络进行检测。
    (1)创建网络在main thread。static 网络存储在全局静态数据区。
    worker thread可以直接使用。
    (2) 在worker thread中检测,需要在子线程中set_mode,然后调用网络进行检测。
    
    结论:
    (1)caffe的set_mode所在的线程必须和使用nets进行forward的线程相同。否则默认使用CPU模式,速度会很慢。
    (2)caffe的nets初始化可以在main thread也可以在worker thread。
    

    code example

    #include <iostream>
    #include <string>
    #include <thread>
    
    #include <gtest/gtest.h>
    #include <glog/logging.h>
    
    #include <boost/date_time/posix_time/posix_time.hpp>  
    
    // opencv
    #include <opencv2/core.hpp>
    #include <opencv2/highgui.hpp>
    #include <opencv2/imgproc.hpp>
    
    using namespace std;
    
    #include "algorithm/algorithm.h"
    using namespace kezunlin::algorithm;
    
    #pragma region net-demo
    
    void topwire_demo(bool run_in_worker_thread)
    {
    	if (run_in_worker_thread) {
    		CaffeApi::set_mode(true, 0, 1234);// set in worker thread-1, use GPU-0
    	}
        
        // do net detect 
        // ...
    }
    
    void railway_demo(bool run_in_worker_thread)
    {
    	if (run_in_worker_thread) {
    		CaffeApi::set_mode(true, 0, 1234);// set in worker thread-1, use GPU-0
    	}
        
        // do net detect 
        // ...
    }
    
    void sidewall_demo(bool run_in_worker_thread)
    {
    	if (run_in_worker_thread) {
    		CaffeApi::set_mode(true, 0, 1234);// set in worker thread-1, use GPU-0
    	}
    
        // do net detect 
        // ...
    }
    
    void lockcatch_demo(bool run_in_worker_thread)
    {
    	if (run_in_worker_thread) {
    		CaffeApi::set_mode(true, 0, 1234);// set in worker thread-1, use GPU-0
    	}
    
    	// do net detect 
        // ...
    }
    
    #pragma endregion
    
    
    #pragma region worker-thread-demo
    
    void worker_thread_topwire_demo(bool run_in_worker_thread)
    {
    	std::thread thr(topwire_demo, run_in_worker_thread);
    	thr.join();
    }
    
    void worker_thread_railway_demo(bool run_in_worker_thread)
    {
    	std::thread thr(railway_demo, run_in_worker_thread);
    	thr.join();
    }
    
    void worker_thread_sidewall_demo(bool run_in_worker_thread)
    {
    	std::thread thr(sidewall_demo, run_in_worker_thread);
    	thr.join();
    }
    
    void worker_thread_lockcatch_demo(bool run_in_worker_thread)
    {
    	std::thread thr(lockcatch_demo, run_in_worker_thread);
    	thr.join();
    }
    
    #pragma endregion
    
    enum DETECT_TYPE {
    	SET_IN_MAIN_DETECT_IN_MAIN, // 主线程set_mode,主线程检测,40ms左右,使用GPU
    	SET_IN_WORKER_DETECT_IN_WORKER, // 子线程set_mode,子线程检测,40ms左右,使用GPU
    	SET_IN_MAIN_DETECT_IN_WORKER // 主线程set_mode,子线程检测,400ms左右,慢了10倍左右,没有使用GPU
    };
    
    void thread_demo()
    {
    	DETECT_TYPE detect_type = SET_IN_MAIN_DETECT_IN_MAIN;
    	detect_type = SET_IN_WORKER_DETECT_IN_WORKER;
    	detect_type = SET_IN_MAIN_DETECT_IN_WORKER;
    
    	init_algorithm_api();
    
    	switch (detect_type)
    	{
    	case SET_IN_MAIN_DETECT_IN_MAIN:
    		topwire_demo(false);
    		railway_demo(false);
    		sidewall_demo(false);
    		lockcatch_demo(false);
    		break;
    	case SET_IN_WORKER_DETECT_IN_WORKER:
    		worker_thread_topwire_demo(true);
    		worker_thread_railway_demo(true);
    		worker_thread_sidewall_demo(true);
    		worker_thread_lockcatch_demo(true);
    		break;
    	case SET_IN_MAIN_DETECT_IN_WORKER:
    		worker_thread_topwire_demo(false);
    		worker_thread_railway_demo(false);
    		worker_thread_sidewall_demo(false);
    		worker_thread_lockcatch_demo(false);
    		break;
    	default:
    		break;
    	}
    
    	free_algorithm_api();
    }
    
    void test_algorithm_api()
    {
    	thread_demo();
    }
    
    TEST(algorithn_test, test_algorithm_api) {
    	test_algorithm_api();
    }
    
    • SET_IN_MAIN_DETECT_IN_MAIN, // 主线程set_mode,主线程检测,40ms左右,使用GPU
    • SET_IN_WORKER_DETECT_IN_WORKER, // 子线程set_mode,子线程检测,40ms左右,使用GPU
    • SET_IN_MAIN_DETECT_IN_WORKER // 主线程set_mode,子线程检测,400ms左右,慢了10倍左右,没有使用GPU

    Reference

    History

    • 20180712: created.

    Copyright

  • 相关阅读:
    Java静态代理学习
    Java反射学习三
    Java反射学习二
    Java反射学习一
    Java反射学习四
    linux下vim命令详解
    linux安装jdk
    转 知道这20个正则表达式,能让你少写1,000行代码
    excel函数2
    excel函数
  • 原文地址:https://www.cnblogs.com/kezunlin/p/11846646.html
Copyright © 2020-2023  润新知