• Tensorflow r1.12及tensorflow serving r1.12 GPU版本编译遇到的问题


    1、git clone tensorflow serving 及tensorflow代码

    2、

    ERROR: /root/.cache/bazel/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/local_config_cuda/crosstool/BUILD:4:1:
     Traceback (most recent call last):
            File "/root/.cache/bazel/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/local_config_cuda/crosstool/BUILD", line 4
                    error_gpu_disabled()
            File "/root/.cache/bazel/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/local_config_cuda/crosstool/error_gpu_disabled.bzl", line 3, in error_gpu_disabled
                    fail("ERROR: Building with --config=c...")
    ERROR: Building with --config=cuda but TensorFlow is not configured to build with GPU support. Please re-run ./configure and enter 'Y' at the prompt to build with GPU support.
    ERROR: no such target '@local_config_cuda//crosstool:toolchain': target 'toolchain' not declared in package 'crosstool
    ' defined by /root/.cache/bazel/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/local_config_cuda/crosstool/BUILD
    INFO: Elapsed time: 0.093s
    INFO: 0 processes.
    FAILED: Build did NOT complete successfully (0 packages loaded)

    解决办法:

    export TF_NEED_CUDA="1"

    3、看起来./configure配置全部不能生效,因此需要自己配置变量指引cuda、cudnn、nccl等所有的非默认路径

    解决办法:通过变量设置

    export PATH=$PATH:/env/bazel-0.15.0/bin
    export TF_NEED_CUDA="1"
    export CUDNN_INSTALL_PATH="/usr/local/cudnn7.3_cuda9.0"
    export CUDA_INSTALL_PATH="/usr/local/cuda-9.0"
    export TF_CUDA_VERSION="9.0"
    export TF_CUDNN_VERSION="7"
    export TF_NCCL_VERSION="2.2"
    export NCCL_INSTALL_PATH="/env/nccl_2.2.13-1+cuda9.0_x86_64"
    export TEST_TMPDIR=/home

    4、遇到nvcc检测cuda版本与设置不一致的问题。

    ERROR: no such package '@local_config_cuda//crosstool': Traceback (most recent call last):
            File "/search/odin/zhangliang/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/org_tensorflow/third_party
    /gpus/cuda_configure.bzl", line 1447
                    _create_local_cuda_repository(repository_ctx)
            File "/search/odin/zhangliang/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/org_tensorflow/third_party/gpus/cuda_configure.bzl", line 1187, in _create_local_cuda_repository
                    _get_cuda_config(repository_ctx)
            File "/search/odin/zhangliang/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/org_tensorflow/third_party/gpus/cuda_configure.bzl", line 909, in _get_cuda_config
                    _cuda_version(repository_ctx, cuda_toolkit_path, c...)
            File "/search/odin/zhangliang/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/org_tensorflow/third_party/gpus/cuda_configure.bzl", line 492, in _cuda_version
                    auto_configure_fail(("CUDA version detected from nvc...)))
            File "/search/odin/zhangliang/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/org_tensorflow/third_party/gpus/cuda_configure.bzl", line 317, in auto_configure_fail
                    fail(("
    %sCuda Configuration Error:%...)))
    
    Cuda Configuration Error: CUDA version detected from nvcc (8.0.61) does not match TF_CUDA_VERSION (9.0)
    INFO: Elapsed time: 0.785s
    INFO: 0 processes.
    FAILED: Build did NOT complete successfully (1 packages loaded)

    解决办法,修改cuda toolkit的地址:

    export CUDA_TOOLKIT_PATH="/usr/local/cuda-9.0"

    5、

    ERROR: no such package '@local_config_cc//': Traceback (most recent call last):
            File "/search/odin/zhangliang/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/bazel_tools/tools/cpp/cc_configure.bzl", line 56
                    configure_unix_toolchain(repository_ctx, cpu_value, overriden...)
            File "/search/odin/zhangliang/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/bazel_tools/tools/cpp/unix_cc_configure.bzl", line 477, in configure_unix_toolchain
                    _find_generic(repository_ctx, "gcc", "CC", overriden...)
            File "/search/odin/zhangliang/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/bazel_tools/tools/cpp/unix_cc_configure.bzl", line 459, in _find_generic
                    auto_configure_fail(msg)
            File "/search/odin/zhangliang/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/bazel_tools/tools/cpp/lib_cc_configure.bzl", line 109, in auto_configure_fail
                    fail(("
    %sAuto-Configuration Error:%...)))
    
    Auto-Configuration Error: Cannot find gcc or CC (gcc -std=gnu99); either correct your path or set the CC environment variable
    ERROR: Analysis of target '//tensorflow_serving/model_servers:tensorflow_model_server' failed; build aborted: no such 
    package '@local_config_cc//': Traceback (most recent call last):
            File "/search/odin/zhangliang/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/bazel_tools/tools/cpp/cc_configure.bzl", line 56
                    configure_unix_toolchain(repository_ctx, cpu_value, overriden...)
            File "/search/odin/zhangliang/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/bazel_tools/tools/cpp/unix_cc_configure.bzl", line 477, in configure_unix_toolchain
                    _find_generic(repository_ctx, "gcc", "CC", overriden...)
            File "/search/odin/zhangliang/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/bazel_tools/tools/cpp/unix_cc_configure.bzl", line 459, in _find_generic
                    auto_configure_fail(msg)
            File "/search/odin/zhangliang/_bazel_root/f71d782da17fd83c84ed6253a342a306/external/bazel_tools/tools/cpp/lib_cc_configure.bzl", line 109, in auto_configure_fail
                    fail(("
    %sAuto-Configuration Error:%...)))
    
    Auto-Configuration Error: Cannot find gcc or CC (gcc -std=gnu99); either correct your path or set the CC environment variable
    INFO: Elapsed time: 2.579s
    INFO: 0 processes.
    FAILED: Build did NOT complete successfully (4 packages loaded)

    解决办法:

    CC=/usr/bin/gcc

     6、

    ERROR: /search/odin/zhangliang/code/serving-1.14/serving/tensorflow_serving/model_servers/BUILD:356:1: Linking of rule
     '//tensorflow_serving/model_servers:tensorflow_model_server' failed (Exit 1)
    /usr/bin/ld: bazel-out/k8-opt/bin/tensorflow_serving/model_servers/_objs/tensorflow_model_server/tensorflow_serving/mo
    del_servers/version.o: relocation R_X86_64_32 against `.rodata' can not be used when making a shared object; recompile with -fPIC
    bazel-out/k8-opt/bin/tensorflow_serving/model_servers/_objs/tensorflow_model_server/tensorflow_serving/model_servers/version.o: could not read symbols: Bad value
    collect2: error: ld returned 1 exit status
    Target //tensorflow_serving/model_servers:tensorflow_model_server failed to build
    Use --verbose_failures to see the command lines of failed build steps.
    INFO: Elapsed time: 697.810s, Critical Path: 331.33s
    INFO: 3321 processes: 3321 local.
    FAILED: Build did NOT complete successfully

    解决办法:

    编译tensorflow_model_server_main_lib时出错,查看了一下,发现是linkstamp “version.cc”时出错,提示应该加-fPIC。

    简单的绕过办法:

    BUILD

    cc_library(
        name = "tensorflow_model_server_main_lib",
        srcs = [
            "main.cc",
        ],
        #hdrs = [
        #    "version.h",
        #],
        #linkstamp = "version.cc",
        visibility = [
            ":tensorflow_model_server_custom_op_clients",
            "//tensorflow_serving:internal",
        ],
        deps = [
            ":server_lib",
            "@org_tensorflow//tensorflow/c:c_api",
            "@org_tensorflow//tensorflow/core:lib",
            "@org_tensorflow//tensorflow/core/platform/cloud:gcs_file_system",
            "@org_tensorflow//tensorflow/core/platform/hadoop:hadoop_file_system",
            "@org_tensorflow//tensorflow/core/platform/s3:s3_file_system",
        ],
    )

    main.cc

    //#include "tensorflow_serving/model_servers/version.h"
    
    ...
    if (display_version) {
        std::cout << "TensorFlow ModelServer: " << "r1.12" << "
    "
                  << "TensorFlow Library: " << TF_Version() << "
    ";
        return 0;
      }
  • 相关阅读:
    Java的Regex --正则表达式
    Java的包装类
    类的始祖Object
    abstract和interface关键字介绍
    内部类
    Accumulation Degree [换根dp,二次扫描]
    牛客练习赛61 [口胡]
    CF1334G Substring Search [bitset,乱搞]
    CF1175F The Number of Subpermutations [哈希,乱搞]
    CF793G Oleg and chess [线段树优化建边,扫描线,最大流]
  • 原文地址:https://www.cnblogs.com/zl1991/p/11465111.html
Copyright © 2020-2023  润新知