• openMP---第一篇


    openMP 处理for循环

    //////////////////////////////////////////////////////////////////////////////////////////////
    template <typename PointInT, typename PointOutT> void
    pcl::MovingLeastSquares<PointInT, PointOutT>::performProcessing (PointCloudOut &output)
    {
      // Compute the number of coefficients
      nr_coeff_ = (order_ + 1) * (order_ + 2) / 2;
    
      size_t mls_result_index = 0;
    
    #ifdef _OPENMP
      // (Maximum) number of threads
      const unsigned int threads = threads_ == 0 ? 1 : threads_;
      // Create temporaries for each thread in order to avoid synchronization
      typename PointCloudOut::CloudVectorType projected_points (threads);
      typename NormalCloud::CloudVectorType projected_points_normals (threads);
      std::vector<PointIndices> corresponding_input_indices (threads);
    #endif
    
      // For all points
    #ifdef _OPENMP
    #pragma omp parallel for schedule (dynamic,1000) num_threads (threads)
    #endif
      for (int cp = 0; cp < static_cast<int> (indices_->size ()); ++cp)
      {
        // Allocate enough space to hold the results of nearest neighbor searches
        // 
    ote resize is irrelevant for a radiusSearch ().
        std::vector<int> nn_indices;
        std::vector<float> nn_sqr_dists;
    
        // Get the initial estimates of point positions and their neighborhoods
        if (searchForNeighbors ((*indices_)[cp], nn_indices, nn_sqr_dists))
        {
          // Check the number of nearest neighbors for normal estimation (and later for polynomial fit as well)
          if (nn_indices.size () >= 3)
          {
            // This thread's ID (range 0 to threads-1)
    #ifdef _OPENMP
            const int tn = omp_get_thread_num ();
            // Size of projected points before computeMLSPointNormal () adds points
            size_t pp_size = projected_points[tn].size ();
    #else
            PointCloudOut projected_points;
            NormalCloud projected_points_normals;
    #endif
    
            // Get a plane approximating the local surface's tangent and project point onto it
            const int index = (*indices_)[cp];
    
            if (cache_mls_results_)
              mls_result_index = index; // otherwise we give it a dummy location.
    
    #ifdef _OPENMP
            computeMLSPointNormal (index, nn_indices, projected_points[tn], projected_points_normals[tn], corresponding_input_indices[tn], mls_results_[mls_result_index]);
    
            // Copy all information from the input cloud to the output points (not doing any interpolation)
            for (size_t pp = pp_size; pp < projected_points[tn].size (); ++pp)
              copyMissingFields (input_->points[(*indices_)[cp]], projected_points[tn][pp]);
    #else
            computeMLSPointNormal (index, nn_indices, projected_points, projected_points_normals, *corresponding_input_indices_, mls_results_[mls_result_index]);
    
            // Append projected points to output
            output.insert (output.end (), projected_points.begin (), projected_points.end ());
            if (compute_normals_)
              normals_->insert (normals_->end (), projected_points_normals.begin (), projected_points_normals.end ());
    #endif
          }
        }
      }
    
    #ifdef _OPENMP
      // Combine all threads' results into the output vectors
      for (unsigned int tn = 0; tn < threads; ++tn)
      {
        output.insert (output.end (), projected_points[tn].begin (), projected_points[tn].end ());
        corresponding_input_indices_->indices.insert (corresponding_input_indices_->indices.end (),
                                                      corresponding_input_indices[tn].indices.begin (), corresponding_input_indices[tn].indices.end ());
        if (compute_normals_)
          normals_->insert (normals_->end (), projected_points_normals[tn].begin (), projected_points_normals[tn].end ());
      }
    #endif
    
      // Perform the distinct-cloud or voxel-grid upsampling
      performUpsampling (output);
    }
    template <typename PointT> void
    pcl::FastBilateralFilterOMP<PointT>::applyFilter (PointCloud &output)
    {
      if (!input_->isOrganized ())
      {
        PCL_ERROR ("[pcl::FastBilateralFilterOMP] Input cloud needs to be organized.
    ");
        return;
      }
    
      copyPointCloud (*input_, output);
      float base_max = -std::numeric_limits<float>::max (),
            base_min = std::numeric_limits<float>::max ();
      bool found_finite = false;
      for (size_t x = 0; x < output.width; ++x)
      {
        for (size_t y = 0; y < output.height; ++y)
        {
          if (pcl_isfinite (output (x, y).z))
          {
            if (base_max < output (x, y).z)
              base_max = output (x, y).z;
            if (base_min > output (x, y).z)
              base_min = output (x, y).z;
            found_finite = true;
          }
        }
      }
      if (!found_finite)
      {
        PCL_WARN ("[pcl::FastBilateralFilterOMP] Given an empty cloud. Doing nothing.
    ");
        return;
      }
    #ifdef _OPENMP
    #pragma omp parallel for num_threads (threads_)
    #endif
      for (long int i = 0; i < static_cast<long int> (output.size ()); ++i)
        if (!pcl_isfinite (output.at(i).z))
          output.at(i).z = base_max;
    
      const float base_delta = base_max - base_min;
    
      const size_t padding_xy = 2;
      const size_t padding_z  = 2;
    
      const size_t small_width  = static_cast<size_t> (static_cast<float> (input_->width  - 1) / sigma_s_) + 1 + 2 * padding_xy;
      const size_t small_height = static_cast<size_t> (static_cast<float> (input_->height - 1) / sigma_s_) + 1 + 2 * padding_xy;
      const size_t small_depth  = static_cast<size_t> (base_delta / sigma_r_)   + 1 + 2 * padding_z;
    
      Array3D data (small_width, small_height, small_depth);
    #ifdef _OPENMP
    #pragma omp parallel for num_threads (threads_)
    #endif
      for (long int i = 0; i < static_cast<long int> (small_width * small_height); ++i)
      {
        size_t small_x = static_cast<size_t> (i % small_width);
        size_t small_y = static_cast<size_t> (i / small_width);
        size_t start_x = static_cast<size_t>( 
            std::max ((static_cast<float> (small_x) - static_cast<float> (padding_xy) - 0.5f) * sigma_s_ + 1, 0.f));
        size_t end_x = static_cast<size_t>( 
          std::max ((static_cast<float> (small_x) - static_cast<float> (padding_xy) + 0.5f) * sigma_s_ + 1, 0.f));
        size_t start_y = static_cast<size_t>( 
          std::max ((static_cast<float> (small_y) - static_cast<float> (padding_xy) - 0.5f) * sigma_s_ + 1, 0.f));
        size_t end_y = static_cast<size_t>( 
          std::max ((static_cast<float> (small_y) - static_cast<float> (padding_xy) + 0.5f) * sigma_s_ + 1, 0.f));
        for (size_t x = start_x; x < end_x && x < input_->width; ++x)
        {
          for (size_t y = start_y; y < end_y && y < input_->height; ++y)
          {
            const float z = output (x,y).z - base_min;
            const size_t small_z = static_cast<size_t> (static_cast<float> (z) / sigma_r_ + 0.5f) + padding_z;
            Eigen::Vector2f& d = data (small_x, small_y, small_z);
            d[0] += output (x,y).z;
            d[1] += 1.0f;
          }
        }
      }
    
      std::vector<long int> offset (3);
      offset[0] = &(data (1,0,0)) - &(data (0,0,0));
      offset[1] = &(data (0,1,0)) - &(data (0,0,0));
      offset[2] = &(data (0,0,1)) - &(data (0,0,0));
    
      Array3D buffer (small_width, small_height, small_depth);
      
      for (size_t dim = 0; dim < 3; ++dim)
      {
        for (size_t n_iter = 0; n_iter < 2; ++n_iter)
        {
          Array3D* current_buffer = (n_iter % 2 == 1 ? &buffer : &data);
          Array3D* current_data =(n_iter % 2 == 1 ? &data : &buffer);
    #ifdef _OPENMP
    #pragma omp parallel for num_threads (threads_)
    #endif
          for(long int i = 0; i < static_cast<long int> ((small_width - 2)*(small_height - 2)); ++i)
          {
            size_t x = static_cast<size_t> (i % (small_width - 2) + 1);
            size_t y = static_cast<size_t> (i / (small_width - 2) + 1);
            const long int off = offset[dim];
            Eigen::Vector2f* d_ptr = &(current_data->operator() (x,y,1));
            Eigen::Vector2f* b_ptr = &(current_buffer->operator() (x,y,1));
    
            for(size_t z = 1; z < small_depth - 1; ++z, ++d_ptr, ++b_ptr)
              *d_ptr = (*(b_ptr - off) + *(b_ptr + off) + 2.0 * (*b_ptr)) / 4.0;
          }
        }
      }
      // Note: this works because there are an even number of iterations. 
      // If there were an odd number, we would need to end with a:
      // std::swap (data, buffer);
    
      if (early_division_)
      {
        for (std::vector<Eigen::Vector2f, Eigen::aligned_allocator<Eigen::Vector2f> >::iterator d = data.begin (); d != data.end (); ++d)
          *d /= ((*d)[0] != 0) ? (*d)[1] : 1;
    
    #ifdef _OPENMP
    #pragma omp parallel for num_threads (threads_)
    #endif
        for (long int i = 0; i < static_cast<long int> (input_->size ()); ++i)
        {
          size_t x = static_cast<size_t> (i % input_->width);
          size_t y = static_cast<size_t> (i / input_->width);
          const float z = output (x,y).z - base_min;
          const Eigen::Vector2f D = data.trilinear_interpolation (static_cast<float> (x) / sigma_s_ + padding_xy,
                                                                  static_cast<float> (y) / sigma_s_ + padding_xy,
                                                                  z / sigma_r_ + padding_z);
          output(x,y).z = D[0];
        }
      }
      else
      {
    #ifdef _OPENMP
    #pragma omp parallel for num_threads (threads_)
    #endif
        for (long i = 0; i < static_cast<long int> (input_->size ()); ++i)
        {
          size_t x = static_cast<size_t> (i % input_->width);
          size_t y = static_cast<size_t> (i / input_->width);
          const float z = output (x,y).z - base_min;
          const Eigen::Vector2f D = data.trilinear_interpolation (static_cast<float> (x) / sigma_s_ + padding_xy,
                                                                  static_cast<float> (y) / sigma_s_ + padding_xy,
                                                                  z / sigma_r_ + padding_z);
          output (x,y).z = D[0] / D[1];
        }
      }
    }
    template <typename PointInT, typename PointOutT> void
    pcl::NormalEstimationOMP<PointInT, PointOutT>::computeFeature (PointCloudOut &output)
    {
      // Allocate enough space to hold the results
      // 
    ote This resize is irrelevant for a radiusSearch ().
      std::vector<int> nn_indices (k_);
      std::vector<float> nn_dists (k_);
    
      output.is_dense = true;
      // Save a few cycles by not checking every point for NaN/Inf values if the cloud is set to dense
      if (input_->is_dense)
      {
    #ifdef _OPENMP
    #pragma omp parallel for shared (output) private (nn_indices, nn_dists) num_threads(threads_)
    #endif
        // Iterating over the entire index vector
        for (int idx = 0; idx < static_cast<int> (indices_->size ()); ++idx)
        {
          Eigen::Vector4f n;
          if (this->searchForNeighbors ((*indices_)[idx], search_parameter_, nn_indices, nn_dists) == 0 ||
              !computePointNormal (*surface_, nn_indices, n, output.points[idx].curvature))
          {
            output.points[idx].normal[0] = output.points[idx].normal[1] = output.points[idx].normal[2] = output.points[idx].curvature = std::numeric_limits<float>::quiet_NaN ();
    
            output.is_dense = false;
            continue;
          }
    
          output.points[idx].normal_x = n[0];
          output.points[idx].normal_y = n[1];
          output.points[idx].normal_z = n[2];
    
          flipNormalTowardsViewpoint (input_->points[(*indices_)[idx]], vpx_, vpy_, vpz_,
                                      output.points[idx].normal[0], output.points[idx].normal[1], output.points[idx].normal[2]);
    
        }
      }
      else
      {
    #ifdef _OPENMP
    #pragma omp parallel for shared (output) private (nn_indices, nn_dists) num_threads(threads_)
    #endif
        // Iterating over the entire index vector
        for (int idx = 0; idx < static_cast<int> (indices_->size ()); ++idx)
        {
          Eigen::Vector4f n;
          if (!isFinite ((*input_)[(*indices_)[idx]]) ||
              this->searchForNeighbors ((*indices_)[idx], search_parameter_, nn_indices, nn_dists) == 0 ||
              !computePointNormal (*surface_, nn_indices, n, output.points[idx].curvature))
          {
            output.points[idx].normal[0] = output.points[idx].normal[1] = output.points[idx].normal[2] = output.points[idx].curvature = std::numeric_limits<float>::quiet_NaN ();
    
            output.is_dense = false;
            continue;
          }
    
          output.points[idx].normal_x = n[0];
          output.points[idx].normal_y = n[1];
          output.points[idx].normal_z = n[2];
    
          flipNormalTowardsViewpoint (input_->points[(*indices_)[idx]], vpx_, vpy_, vpz_,
                                      output.points[idx].normal[0], output.points[idx].normal[1], output.points[idx].normal[2]);
    
        }
      }
    }
  • 相关阅读:
    图论
    城市吸引力指数
    bzoj3529(莫比乌斯反演+离线+树状数组)
    强制关闭tomcat
    bzoj2154(莫比乌斯反演)
    等差数列求和模板
    联想的显示屏校准(困难)
    bzoj2301(莫比乌斯反演)
    莫比乌斯反演模版
    菜鸟物流的运输网络(网络流)
  • 原文地址:https://www.cnblogs.com/lovebay/p/11976053.html
Copyright © 2020-2023  润新知