openMP 处理for循环
////////////////////////////////////////////////////////////////////////////////////////////// template <typename PointInT, typename PointOutT> void pcl::MovingLeastSquares<PointInT, PointOutT>::performProcessing (PointCloudOut &output) { // Compute the number of coefficients nr_coeff_ = (order_ + 1) * (order_ + 2) / 2; size_t mls_result_index = 0; #ifdef _OPENMP // (Maximum) number of threads const unsigned int threads = threads_ == 0 ? 1 : threads_; // Create temporaries for each thread in order to avoid synchronization typename PointCloudOut::CloudVectorType projected_points (threads); typename NormalCloud::CloudVectorType projected_points_normals (threads); std::vector<PointIndices> corresponding_input_indices (threads); #endif // For all points #ifdef _OPENMP #pragma omp parallel for schedule (dynamic,1000) num_threads (threads) #endif for (int cp = 0; cp < static_cast<int> (indices_->size ()); ++cp) { // Allocate enough space to hold the results of nearest neighbor searches // ote resize is irrelevant for a radiusSearch (). std::vector<int> nn_indices; std::vector<float> nn_sqr_dists; // Get the initial estimates of point positions and their neighborhoods if (searchForNeighbors ((*indices_)[cp], nn_indices, nn_sqr_dists)) { // Check the number of nearest neighbors for normal estimation (and later for polynomial fit as well) if (nn_indices.size () >= 3) { // This thread's ID (range 0 to threads-1) #ifdef _OPENMP const int tn = omp_get_thread_num (); // Size of projected points before computeMLSPointNormal () adds points size_t pp_size = projected_points[tn].size (); #else PointCloudOut projected_points; NormalCloud projected_points_normals; #endif // Get a plane approximating the local surface's tangent and project point onto it const int index = (*indices_)[cp]; if (cache_mls_results_) mls_result_index = index; // otherwise we give it a dummy location. #ifdef _OPENMP computeMLSPointNormal (index, nn_indices, projected_points[tn], projected_points_normals[tn], corresponding_input_indices[tn], mls_results_[mls_result_index]); // Copy all information from the input cloud to the output points (not doing any interpolation) for (size_t pp = pp_size; pp < projected_points[tn].size (); ++pp) copyMissingFields (input_->points[(*indices_)[cp]], projected_points[tn][pp]); #else computeMLSPointNormal (index, nn_indices, projected_points, projected_points_normals, *corresponding_input_indices_, mls_results_[mls_result_index]); // Append projected points to output output.insert (output.end (), projected_points.begin (), projected_points.end ()); if (compute_normals_) normals_->insert (normals_->end (), projected_points_normals.begin (), projected_points_normals.end ()); #endif } } } #ifdef _OPENMP // Combine all threads' results into the output vectors for (unsigned int tn = 0; tn < threads; ++tn) { output.insert (output.end (), projected_points[tn].begin (), projected_points[tn].end ()); corresponding_input_indices_->indices.insert (corresponding_input_indices_->indices.end (), corresponding_input_indices[tn].indices.begin (), corresponding_input_indices[tn].indices.end ()); if (compute_normals_) normals_->insert (normals_->end (), projected_points_normals[tn].begin (), projected_points_normals[tn].end ()); } #endif // Perform the distinct-cloud or voxel-grid upsampling performUpsampling (output); }
template <typename PointT> void pcl::FastBilateralFilterOMP<PointT>::applyFilter (PointCloud &output) { if (!input_->isOrganized ()) { PCL_ERROR ("[pcl::FastBilateralFilterOMP] Input cloud needs to be organized. "); return; } copyPointCloud (*input_, output); float base_max = -std::numeric_limits<float>::max (), base_min = std::numeric_limits<float>::max (); bool found_finite = false; for (size_t x = 0; x < output.width; ++x) { for (size_t y = 0; y < output.height; ++y) { if (pcl_isfinite (output (x, y).z)) { if (base_max < output (x, y).z) base_max = output (x, y).z; if (base_min > output (x, y).z) base_min = output (x, y).z; found_finite = true; } } } if (!found_finite) { PCL_WARN ("[pcl::FastBilateralFilterOMP] Given an empty cloud. Doing nothing. "); return; } #ifdef _OPENMP #pragma omp parallel for num_threads (threads_) #endif for (long int i = 0; i < static_cast<long int> (output.size ()); ++i) if (!pcl_isfinite (output.at(i).z)) output.at(i).z = base_max; const float base_delta = base_max - base_min; const size_t padding_xy = 2; const size_t padding_z = 2; const size_t small_width = static_cast<size_t> (static_cast<float> (input_->width - 1) / sigma_s_) + 1 + 2 * padding_xy; const size_t small_height = static_cast<size_t> (static_cast<float> (input_->height - 1) / sigma_s_) + 1 + 2 * padding_xy; const size_t small_depth = static_cast<size_t> (base_delta / sigma_r_) + 1 + 2 * padding_z; Array3D data (small_width, small_height, small_depth); #ifdef _OPENMP #pragma omp parallel for num_threads (threads_) #endif for (long int i = 0; i < static_cast<long int> (small_width * small_height); ++i) { size_t small_x = static_cast<size_t> (i % small_width); size_t small_y = static_cast<size_t> (i / small_width); size_t start_x = static_cast<size_t>( std::max ((static_cast<float> (small_x) - static_cast<float> (padding_xy) - 0.5f) * sigma_s_ + 1, 0.f)); size_t end_x = static_cast<size_t>( std::max ((static_cast<float> (small_x) - static_cast<float> (padding_xy) + 0.5f) * sigma_s_ + 1, 0.f)); size_t start_y = static_cast<size_t>( std::max ((static_cast<float> (small_y) - static_cast<float> (padding_xy) - 0.5f) * sigma_s_ + 1, 0.f)); size_t end_y = static_cast<size_t>( std::max ((static_cast<float> (small_y) - static_cast<float> (padding_xy) + 0.5f) * sigma_s_ + 1, 0.f)); for (size_t x = start_x; x < end_x && x < input_->width; ++x) { for (size_t y = start_y; y < end_y && y < input_->height; ++y) { const float z = output (x,y).z - base_min; const size_t small_z = static_cast<size_t> (static_cast<float> (z) / sigma_r_ + 0.5f) + padding_z; Eigen::Vector2f& d = data (small_x, small_y, small_z); d[0] += output (x,y).z; d[1] += 1.0f; } } } std::vector<long int> offset (3); offset[0] = &(data (1,0,0)) - &(data (0,0,0)); offset[1] = &(data (0,1,0)) - &(data (0,0,0)); offset[2] = &(data (0,0,1)) - &(data (0,0,0)); Array3D buffer (small_width, small_height, small_depth); for (size_t dim = 0; dim < 3; ++dim) { for (size_t n_iter = 0; n_iter < 2; ++n_iter) { Array3D* current_buffer = (n_iter % 2 == 1 ? &buffer : &data); Array3D* current_data =(n_iter % 2 == 1 ? &data : &buffer); #ifdef _OPENMP #pragma omp parallel for num_threads (threads_) #endif for(long int i = 0; i < static_cast<long int> ((small_width - 2)*(small_height - 2)); ++i) { size_t x = static_cast<size_t> (i % (small_width - 2) + 1); size_t y = static_cast<size_t> (i / (small_width - 2) + 1); const long int off = offset[dim]; Eigen::Vector2f* d_ptr = &(current_data->operator() (x,y,1)); Eigen::Vector2f* b_ptr = &(current_buffer->operator() (x,y,1)); for(size_t z = 1; z < small_depth - 1; ++z, ++d_ptr, ++b_ptr) *d_ptr = (*(b_ptr - off) + *(b_ptr + off) + 2.0 * (*b_ptr)) / 4.0; } } } // Note: this works because there are an even number of iterations. // If there were an odd number, we would need to end with a: // std::swap (data, buffer); if (early_division_) { for (std::vector<Eigen::Vector2f, Eigen::aligned_allocator<Eigen::Vector2f> >::iterator d = data.begin (); d != data.end (); ++d) *d /= ((*d)[0] != 0) ? (*d)[1] : 1; #ifdef _OPENMP #pragma omp parallel for num_threads (threads_) #endif for (long int i = 0; i < static_cast<long int> (input_->size ()); ++i) { size_t x = static_cast<size_t> (i % input_->width); size_t y = static_cast<size_t> (i / input_->width); const float z = output (x,y).z - base_min; const Eigen::Vector2f D = data.trilinear_interpolation (static_cast<float> (x) / sigma_s_ + padding_xy, static_cast<float> (y) / sigma_s_ + padding_xy, z / sigma_r_ + padding_z); output(x,y).z = D[0]; } } else { #ifdef _OPENMP #pragma omp parallel for num_threads (threads_) #endif for (long i = 0; i < static_cast<long int> (input_->size ()); ++i) { size_t x = static_cast<size_t> (i % input_->width); size_t y = static_cast<size_t> (i / input_->width); const float z = output (x,y).z - base_min; const Eigen::Vector2f D = data.trilinear_interpolation (static_cast<float> (x) / sigma_s_ + padding_xy, static_cast<float> (y) / sigma_s_ + padding_xy, z / sigma_r_ + padding_z); output (x,y).z = D[0] / D[1]; } } }
template <typename PointInT, typename PointOutT> void pcl::NormalEstimationOMP<PointInT, PointOutT>::computeFeature (PointCloudOut &output) { // Allocate enough space to hold the results // ote This resize is irrelevant for a radiusSearch (). std::vector<int> nn_indices (k_); std::vector<float> nn_dists (k_); output.is_dense = true; // Save a few cycles by not checking every point for NaN/Inf values if the cloud is set to dense if (input_->is_dense) { #ifdef _OPENMP #pragma omp parallel for shared (output) private (nn_indices, nn_dists) num_threads(threads_) #endif // Iterating over the entire index vector for (int idx = 0; idx < static_cast<int> (indices_->size ()); ++idx) { Eigen::Vector4f n; if (this->searchForNeighbors ((*indices_)[idx], search_parameter_, nn_indices, nn_dists) == 0 || !computePointNormal (*surface_, nn_indices, n, output.points[idx].curvature)) { output.points[idx].normal[0] = output.points[idx].normal[1] = output.points[idx].normal[2] = output.points[idx].curvature = std::numeric_limits<float>::quiet_NaN (); output.is_dense = false; continue; } output.points[idx].normal_x = n[0]; output.points[idx].normal_y = n[1]; output.points[idx].normal_z = n[2]; flipNormalTowardsViewpoint (input_->points[(*indices_)[idx]], vpx_, vpy_, vpz_, output.points[idx].normal[0], output.points[idx].normal[1], output.points[idx].normal[2]); } } else { #ifdef _OPENMP #pragma omp parallel for shared (output) private (nn_indices, nn_dists) num_threads(threads_) #endif // Iterating over the entire index vector for (int idx = 0; idx < static_cast<int> (indices_->size ()); ++idx) { Eigen::Vector4f n; if (!isFinite ((*input_)[(*indices_)[idx]]) || this->searchForNeighbors ((*indices_)[idx], search_parameter_, nn_indices, nn_dists) == 0 || !computePointNormal (*surface_, nn_indices, n, output.points[idx].curvature)) { output.points[idx].normal[0] = output.points[idx].normal[1] = output.points[idx].normal[2] = output.points[idx].curvature = std::numeric_limits<float>::quiet_NaN (); output.is_dense = false; continue; } output.points[idx].normal_x = n[0]; output.points[idx].normal_y = n[1]; output.points[idx].normal_z = n[2]; flipNormalTowardsViewpoint (input_->points[(*indices_)[idx]], vpx_, vpy_, vpz_, output.points[idx].normal[0], output.points[idx].normal[1], output.points[idx].normal[2]); } } }