• caffe 报错与解决


    1.error while loading shared libraries: libcaffe.so.1.0.0: cannot open shared object file: No such file or directory

    error while loading shared libraries: libcaffe.so.1.0.0: cannot open shared object file: No such file or directory
    

    直接从一个地方拷贝的caffe,然后运行的。
    解决方法:

    export LD_LIBRARY_PATH=/data_1/model_train/light_on/caffe_multi_focalloss_se/build_310/lib:$LD_LIBRARY_PATH
    

    2.src/caffe/common.cpp:35:5: error: ‘::gflags’ has not been declared

    
    CXX src/caffe/solver.cpp
    CXX src/caffe/internal_thread.cpp
    CXX src/caffe/common.cpp
    src/caffe/common.cpp: In function ‘void caffe::GlobalInit(int*, char***)’:
    src/caffe/common.cpp:35:5: error: ‘::gflags’ has not been declared
       ::gflags::ParseCommandLineFlags(pargc, pargv, true);
         ^
    make: *** [.build_release/src/caffe/common.o] Error 1
    

    解决方案:
    https://github.com/BVLC/caffe/blob/master/include/caffe/common.hpp#L21-L28

    注释掉ifndef
    I also got this problem, and I installed gflags by myself before .
    And I solved this problem :
    in the file include/caffe/common.hpp

    //#ifndef GFLAGS_GFLAGS_H_
    namespace gflags = google;
    //#endif  // GFLAGS_GFLAGS_H_
    

    comment ;
    namespace gflags = google;
    this code is important

    qt工程跑refinedet前向工程报错:

    g++ -Wl,-rpath,/home/yhl/Qt5.9.2/5.9.2/gcc_64/lib -o refinedet main.o   -L/home/yhl/software_install/opencv3.2/lib -L/data_1/2021biaozhushuju/obj_vehicle/RefineDet-master/build/lib -L/usr/local/ssl/lib -L/usr/local/cuda-8.0/lib64/ -L/usr/lib/x86_64-linux-gnu/hdf5/serial -L/lib/x86_64-linux-gnu/ -lopencv_core -lopencv_imgproc -lopencv_highgui -lopencv_video -lopencv_imgcodecs -lopencv_videoio -lboost_serialization -lboost_system -lboost_filesystem -lglog -lcaffe -lhdf5 -lhdf5_hl -lboost_thread -lprotobuf -latlas -lssl3 -ldl -lrt /usr/lib/x86_64-linux-gnu/libgflags.a -lcublas_static -lcudart -lculibos -lcurand_static -lcudnn -lcurand -L/home/yhl/Qt5.9.2/5.9.2/gcc_64/lib -lQt5Core -lpthread 
    /usr/bin/ld: /data_1/2021biaozhushuju/obj_vehicle/RefineDet-master/build/lib/libcaffe.so: undefined reference to symbol 'cudaGetDevice'
    /usr/local/cuda-8.0/lib64//libcudart.so: error adding symbols: DSO missing from command line
    Makefile:227: recipe for target 'refinedet' failed
    

    很奇怪的错误。。
    其中我的pro文件这么写的:

    QT += core
    QT -= gui
    
    CONFIG += c++11
    
    TARGET = refinedet
    CONFIG += console
    CONFIG -= app_bundle
    
    TEMPLATE = app
    DEFINES += USE_OPENCV
    
    
    QMAKE_CXXFLAGS += -std=c++11
    
    
    OPENCV_ROOT_PATH = /home/yhl/software_install/opencv3.2
    CAFFE_ROOT_PATH = /data_1/2021biaozhushuju/obj_vehicle/RefineDet-master
    
    
    INCLUDEPATH += $${OPENCV_ROOT_PATH}/include/
    INCLUDEPATH += $${OPENCV_ROOT_PATH}/include/opencv
    INCLUDEPATH += $${OPENCV_ROOT_PATH}/include/opencv2
    INCLUDEPATH += $${CAFFE_ROOT_PATH}/include/
    #INCLUDEPATH += $${CAFFE_ROOT_PATH}/build/src/
    INCLUDEPATH += $${CAFFE_ROOT_PATH}/build/include/
    INCLUDEPATH += /usr/local/cuda/include
    INCLUDEPATH += /usr/local/include/node
    
    LIBS += -L$${OPENCV_ROOT_PATH}/lib
    LIBS += -L$${CAFFE_ROOT_PATH}/build/lib
    LIBS += -L/usr/local/ssl/lib
    LIBS += -L/usr/local/cuda-8.0/lib64/
    LIBS += -L/usr/lib/x86_64-linux-gnu/hdf5/serial
    LIBS += -L/lib/x86_64-linux-gnu/
    
    LIBS += -lopencv_core
    LIBS += -lopencv_imgproc
    LIBS += -lopencv_highgui
    LIBS += -lopencv_video
    LIBS += -lopencv_imgcodecs
    LIBS += -lopencv_videoio
    LIBS += -lboost_serialization
    LIBS += -lboost_system
    LIBS += -lboost_filesystem
    LIBS += -lglog
    LIBS += -lcaffe
    LIBS += -lhdf5
    LIBS += -lhdf5_hl
    LIBS += -lboost_thread
    LIBS += -lprotobuf
    LIBS += -latlas
    LIBS += -lssl3
    LIBS += -lpthread
    LIBS += -ldl
    LIBS += -lrt
    LIBS += /usr/lib/x86_64-linux-gnu/libgflags.a
    
    LIBS += -lcublas_static
    LIBS += -lcudart
    LIBS += -lculibos
    LIBS += -lcurand_static
    LIBS += -lcudnn
    LIBS += -lcurand
    
    SOURCES += main.cpp
    

    折腾了一会儿,我把和cuda相关的库放到了前面:

    QT += core
    QT -= gui
    
    CONFIG += c++11
    
    TARGET = refinedet
    CONFIG += console
    CONFIG -= app_bundle
    
    TEMPLATE = app
    DEFINES += USE_OPENCV
    
    
    QMAKE_CXXFLAGS += -std=c++11
    
    
    OPENCV_ROOT_PATH = /home/yhl/software_install/opencv3.2
    CAFFE_ROOT_PATH = /data_1/2021biaozhushuju/obj_vehicle/RefineDet-master
    
    
    INCLUDEPATH += $${OPENCV_ROOT_PATH}/include/
    INCLUDEPATH += $${OPENCV_ROOT_PATH}/include/opencv
    INCLUDEPATH += $${OPENCV_ROOT_PATH}/include/opencv2
    INCLUDEPATH += $${CAFFE_ROOT_PATH}/include/
    #INCLUDEPATH += $${CAFFE_ROOT_PATH}/build/src/
    INCLUDEPATH += $${CAFFE_ROOT_PATH}/build/include/
    INCLUDEPATH += /usr/local/cuda/include
    INCLUDEPATH += /usr/local/include/node
    
    LIBS += -L$${OPENCV_ROOT_PATH}/lib
    LIBS += -L$${CAFFE_ROOT_PATH}/build/lib
    LIBS += -L/usr/local/ssl/lib
    LIBS += -L/usr/local/cuda-8.0/lib64/
    LIBS += -L/usr/lib/x86_64-linux-gnu/hdf5/serial
    LIBS += -L/lib/x86_64-linux-gnu/
    
    LIBS += -lcublas_static
    LIBS += -lcudart
    LIBS += -lculibos
    LIBS += -lcurand_static
    LIBS += -lcudnn
    LIBS += -lcurand
    
    
    LIBS += -lopencv_core
    LIBS += -lopencv_imgproc
    LIBS += -lopencv_highgui
    LIBS += -lopencv_video
    LIBS += -lopencv_imgcodecs
    LIBS += -lopencv_videoio
    LIBS += -lboost_serialization
    LIBS += -lboost_system
    LIBS += -lboost_filesystem
    LIBS += -lglog
    LIBS += -lcaffe
    LIBS += -lhdf5
    LIBS += -lhdf5_hl
    LIBS += -lboost_thread
    LIBS += -lprotobuf
    LIBS += -latlas
    LIBS += -lssl3
    LIBS += -lpthread
    LIBS += -ldl
    LIBS += -lrt
    LIBS += /usr/lib/x86_64-linux-gnu/libgflags.a
    
    SOURCES += main.cpp
    

    cuda相关的库放前面就可以了。居然和顺序有关,具体啥原因不清楚。

    caffe-lstm 报错:

    CXX src/caffe/common.cpp
    CXX src/caffe/3rdparty/ctc_entrypoint.cpp
    In file included from /usr/include/c++/5/tuple:35:0,
                     from ./include/caffe/3rdparty/detail/cpu_ctc.cuh:3,
                     from src/caffe/3rdparty/ctc_entrypoint.cpp:7:
    /usr/include/c++/5/bits/c++0x_warning.h:32:2: error: #error This file requires compiler and library support for the ISO C++ 2011 standard. This support must be enabled with the -std=c++11 or -std=gnu++11 compiler options.
     #error This file requires compiler and library support 
      ^
    In file included from src/caffe/3rdparty/ctc_entrypoint.cpp:7:0:
    ./include/caffe/3rdparty/detail/cpu_ctc.cuh:82:10: error: ‘tuple’ in namespace ‘std’ does not name a template type
         std::tuple<ProbT, bool>
              ^
    ./include/caffe/3rdparty/detail/cpu_ctc.cuh:188:6: error: ‘tuple’ in namespace ‘std’ does not name a template type
     std::tuple<ProbT, bool>
          ^
    ./include/caffe/3rdparty/detail/cpu_ctc.cuh: In member function ‘ctcStatus_t CpuCTC<ProbT>::cost_and_grad(const ProbT*, ProbT*, ProbT*, const int*, const int*, const int*)’:
    ./include/caffe/3rdparty/detail/cpu_ctc.cuh:383:24: error: ‘nullptr’ was not declared in this scope
         if (activations == nullptr ||
                            ^
    ./include/caffe/3rdparty/detail/cpu_ctc.cuh:425:9: error: ‘tie’ is not a member of ‘std’
             std::tie(costs[mb], mb_status) =
             ^
    ./include/caffe/3rdparty/detail/cpu_ctc.cuh: In member function ‘ctcStatus_t CpuCTC<ProbT>::score_forward(const ProbT*, ProbT*, const int*, const int*, const int*)’:
    ./include/caffe/3rdparty/detail/cpu_ctc.cuh:442:24: error: ‘nullptr’ was not declared in this scope
         if (activations == nullptr ||
                            ^
    src/caffe/3rdparty/ctc_entrypoint.cpp: In function ‘ctcStatus_t compute_ctc_loss(const float*, float*, const int*, const int*, const int*, int, int, float*, void*, ctcOptions)’:
    src/caffe/3rdparty/ctc_entrypoint.cpp:69:24: error: ‘nullptr’ was not declared in this scope
         if (activations == nullptr ||
                            ^
    src/caffe/3rdparty/ctc_entrypoint.cpp: In function ‘ctcStatus_t get_workspace_size(const int*, const int*, int, int, ctcOptions, size_t*)’:
    src/caffe/3rdparty/ctc_entrypoint.cpp:99:26: error: ‘nullptr’ was not declared in this scope
         if (label_lengths == nullptr ||
                              ^
    In file included from src/caffe/3rdparty/ctc_entrypoint.cpp:7:0:
    ./include/caffe/3rdparty/detail/cpu_ctc.cuh: In instantiation of ‘ctcStatus_t CpuCTC<ProbT>::cost_and_grad(const ProbT*, ProbT*, ProbT*, const int*, const int*, const int*) [with ProbT = float]’:
    src/caffe/3rdparty/ctc_entrypoint.cpp:52:45:   required from here
    ./include/caffe/3rdparty/detail/cpu_ctc.cuh:426:37: error: ‘cost_and_grad_kernel’ was not declared in this scope
                     cost_and_grad_kernel(grads + mb * alphabet_size_,
                                         ^
    Makefile:577: recipe for target '.build_release/src/caffe/3rdparty/ctc_entrypoint.o' failed
    make: *** [.build_release/src/caffe/3rdparty/ctc_entrypoint.o] Error 1
    make: *** 正在等待未完成的任务....
    src/caffe/common.cpp: In function ‘void caffe::GlobalInit(int*, char***)’:
    src/caffe/common.cpp:45:5: error: ‘::gflags’ has not been declared
       ::gflags::ParseCommandLineFlags(pargc, pargv, true);
         ^
    Makefile:577: recipe for target '.build_release/src/caffe/common.o' failed
    make: *** [.build_release/src/caffe/common.o] Error 1
    
    

    解决方案

    error: #error This file requires compiler and library support for the ISO C++ 2011 standard. This support is currently experimental, and must be enabled with the -std=c++11 or -std=gnu++11 compiler op
    
    caffe c++11编译问题
    
    问题:error: #error This file requires compiler and library support for the ISO C++ 2011 standard. This support is currently experimental, and must be enabled with the -std=c++11 or -std=gnu++11 compiler options.
    
    解决:修改Makefile文件
    
    CXXFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS) -std=c++11
    NVCCFLAGS += -D_FORCE_INLINES -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS) -std=c++11
    LINKFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS) -std=c++11
    
    CXX src/caffe/net.cpp
    NVCC src/caffe/layers/cudnn_lrn_layer.cu
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9220): error: argument of type "const void *" is incompatible with parameter of type "const float *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9231): error: argument of type "const void *" is incompatible with parameter of type "const float *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9244): error: argument of type "const void *" is incompatible with parameter of type "const double *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9255): error: argument of type "const void *" is incompatible with parameter of type "const double *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9268): error: argument of type "const void *" is incompatible with parameter of type "const float *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9279): error: argument of type "const void *" is incompatible with parameter of type "const float *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9292): error: argument of type "const void *" is incompatible with parameter of type "const double *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9303): error: argument of type "const void *" is incompatible with parameter of type "const double *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9316): error: argument of type "const void *" is incompatible with parameter of type "const int *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9327): error: argument of type "const void *" is incompatible with parameter of type "const int *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9340): error: argument of type "const void *" is incompatible with parameter of type "const long long *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9352): error: argument of type "const void *" is incompatible with parameter of type "const long long *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9365): error: argument of type "const void *" is incompatible with parameter of type "const int *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9376): error: argument of type "const void *" is incompatible with parameter of type "const int *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9389): error: argument of type "const void *" is incompatible with parameter of type "const long long *"
    
    /usr/lib/gcc/x86_64-linux-gnu/5/include/avx512fintrin.h(9401): error: argument of type "const void *" is incompatible with parameter of type "const long long *"
    

    这个错误真难解决,因为在服务器上,一些环境不敢随便动。
    然后找来高手:

    cd /usr/bin
    gcc 再敲键盘tab键
    
     gcc
    gcc             gcc-5           gcc-7           gcc-ar-4.9      gcc-ar-7        gcc-nm-4.9      gcc-nm-7        gcc-ranlib-4.9  gcc-ranlib-7    
    gcc-4.9         gcc_7           gcc-ar          gcc-ar-5        gcc-nm          gcc-nm-5        gcc-ranlib      gcc-ranlib-5    
    g++
    g++      g++-4.9  g++-5    g++-7 
    
    
    ln -snf gcc-4.9 gcc
    ln -snf g++-4.9 g++
    

    集群上面编译训练caffe

    改了makefile里面CUDA_DIR,BLAS这些。然后加载gcc5.4。编是遍通过了,但是训练的时候报错。
    说/usr/lib/x86_64-linux-gnu/libboost_system.so.1.65.0找不到。
    可是1.65我去/usr/lib/x86_64-linux-gnu目录下看是存在的啊。
    ldd caffe看了也是链接的
    libboost_system.so.1.67.0 => /usr/lib/x86_64-linux-gnu/libboost_system.so.1.65.0
    然后在训练的sh文件加上
    export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH
    也是不行!
    无解!~
    然后找来jiamin,他说编译的环境和实际运行的环境不一样! 敲命令:
    srun --gres=gpu:1 --pty bash
    进去一个环境,
    ll /usr/lib/x86_64-linux-gnu/libboost*
    ls: cannot access '/usr/lib/x86_64-linux-gnu/libboost*': No such file or directory
    这个环境下面确实没有libboost_system.so.1.65.0
    再ifconfig
    提示现在ip是192.168.80.19
    再exit退出:
    查看ip是192.168.80.2
    ip都不一样。
    然后再srun --gres=gpu:1 --pty bash
    环境下重新编译caffe。
    然后编译好的caffe链接的是
    libboost_system.so.1.67.0 => /home/nfs/admin0/apps/python/anaconda3/envs/py2.7_proc3.5/lib/libboost_system.so.1.67.0 (0x00001462e9873000)
    之前链接的是:
    libboost_system.so.1.65.1 => /usr/lib/x86_64-linux-gnu/libboost_system.so.1.65.1 (0x0000152e8f6e3000)。
    然后就可以正常训练了!
    厉害啊!还有这问题,编译的环境和实际运行的环境不一样。这个问题估计只有jiamin能解决了。

    上面的就是编译的caffe-lstm,在服务器上面编译不过,报错,把gcc改成4.9解决一个报错,然后又报错提示protobuf问题,一查又说是gcc需要5.4才能解决。可以是服务器上面gcc是5.5.没有5.4
    确实,拉到本地gcc是5.4版本的,编译可以过的。
    不会把5.5降低到5.4. 然后就整不了了,去集群试试的。

    好记性不如烂键盘---点滴、积累、进步!
  • 相关阅读:
    wordcloud库基本介绍和使用方法
    文本词频同意问题分析
    集合
    操作系统
    操作系统的发展史
    基础练习
    random库的使用
    【量化】五日均价策略
    【量化】多只股票策略
    【量化】指数数据
  • 原文地址:https://www.cnblogs.com/yanghailin/p/14931279.html
Copyright © 2020-2023  润新知