• 多线程处理N维度topk问题demo--[c++]


    问题

    -对多维度特征进行topk排序,使用c++ 优先队列模拟最大堆.

    
    /*
    ----------------------------------
    Version    : ??
    File Name :     dm2.py
    Description :   
    Author  :       xijun1
    Email   :      
    Date    :       2018/12/7
    -----------------------------------
    Change Activity  :   2018/12/7
    -----------------------------------
    __author__ = 'xijun1'
    */
    #include <iostream>
    #include <queue>
    #include <unordered_map>
    #include <future>
    using namespace std;
    
    class MinHeapUtil {
    private:
    
        struct HeapGreatcompare
        {
            // min heap
            bool operator()(const pair<unsigned int, int>& node_a, const pair<unsigned int, int>& node_b) {
                return node_a.second > node_b.second;
            }
        };
    
    
        std::vector< std::priority_queue< std::pair<unsigned int, int>, std::vector< std::pair<unsigned  int, int> > ,HeapGreatcompare> > pq_mat;
        unsigned  int max_topk;
    public:
         bool setShape(const int dim  , const unsigned int max_topk );
         int getdim(){
             return  this->pq_mat.size();
         }
         int getMaxTopK(){
             return this->max_topk;
         }
         size_t getSize( int k ){
             return this->pq_mat[ k ].size();
         }
         int push_back(std::vector<std::pair<unsigned int, int>  >& data , int k );
         int push_back( std::pair<unsigned int, int>   word_prob , int k );
        int push_back( std::pair<unsigned int, int>   &word_prob , int k );
         std::pair<unsigned int , int >get_itop(int k){
             auto word_prob = this->pq_mat[ k ].top();
             this->pq_mat[ k ].pop();
             return word_prob;
         }
    };
    
    bool MinHeapUtil::setShape(const int dim, const unsigned  int max_topk) {
        this->pq_mat.resize(dim );
        this->max_topk = max_topk;
        return true;
    }
     int  MinHeapUtil::push_back(std::vector< std::pair<unsigned int, int> > &data ,int k) {
    
        for (auto const& word_prob : data) {
            this->pq_mat[ k ].push(std::make_pair(word_prob.second, word_prob.first));
            if ( this->pq_mat[ k ].size() > this->max_topk)  //维护这个K大小
                this->pq_mat[ k ].pop();
        }
         return  0;
    }
    int MinHeapUtil::push_back(std::pair<unsigned int, int> word_prob, int k) {
    
        if( this->pq_mat[k].size() < this->max_topk || this->pq_mat[ k ].top().second < word_prob.second) {
            this->pq_mat[k].push(word_prob);
            if (this->pq_mat[k].size() > this->max_topk)  //维护这个K大小
                this->pq_mat[k].pop();
        }
        return  0;
    
    }
    
    
    int MinHeapUtil::push_back(std::pair<unsigned int, int> &word_prob, int k) {
        this->pq_mat[k].push(word_prob);
        if (this->pq_mat[k].size() > this->max_topk)  //维护这个K大小
            this->pq_mat[k].pop();
        return 0;
    }
    std::unordered_map<unsigned int , std::vector< int > > test_data;
    int  main() {
        MinHeapUtil top_words ;
    
        top_words.setShape(5,4);
        int total_size = 5;
    
    //    MinHeapUtil.push_back(std::pair<unsigned int, int>(8,2),0);
    //    MinHeapUtil.push_back(std::pair<unsigned int, int>(7,3),0);
    //    MinHeapUtil.push_back(std::pair<unsigned int, int>(6,4),0);
    //    MinHeapUtil.push_back(std::pair<unsigned int, int>(5,5),0);
    //    MinHeapUtil.push_back(std::pair<unsigned int, int>(4,6),0);
    //    MinHeapUtil.push_back(std::pair<unsigned int, int>(3,7),0);
    //    MinHeapUtil.push_back(std::pair<unsigned int, int>(2,4),0);
    //    MinHeapUtil.push_back(std::pair<unsigned int, int>(1,8),0);
        for (int j = 0; j < 1000000; ++j) {
            std::vector< int > tmp(5);
            for (int i = 0; i < 5; ++i) {
                tmp[i] = j*5 + i;
            }
            test_data[j] = std::move(tmp);
        }
        struct save_func
        {
            void operator () (int tn  ,int thread_nums,int total_size , MinHeapUtil *top_words )
            {
                std::cout<<tn <<" : "<< thread_nums<<" : "<<total_size<<std::endl;
                for (int i = tn; i < total_size ; i+=thread_nums) {
                    for ( auto word_key : test_data) {
                        //std::cout<<word_key.first;
                        top_words->push_back( std::pair<unsigned int, int>(word_key.first, word_key.second[ i ] ) , i );
                    }
                }
                std::cout << "func " << tn << std::endl;
            }
        } func;
        unsigned thread_nums = 8;
        std::vector<std::future< void >> muliti_topword_threads;
    
        for (unsigned tn = 0; tn < thread_nums; ++tn)
            muliti_topword_threads.emplace_back(
                    std::async(std::launch::async, func, (int)tn,(int)thread_nums,total_size,&top_words));
    
        for (unsigned tn = 0; tn < thread_nums; ++tn)
            muliti_topword_threads[tn].get();
    
        for (int k = 0; k < total_size; ++k) {
            size_t range = top_words.getSize( k );
            for (int i = 0; i < range; ++i) {
                auto temp = top_words.get_itop( k );
                std::cout <<"k:"<<k<<"  ( " << temp.first << "     " << temp.second << ")
    ";
            }
        }
        return 0;
    }
    
    
    
    

    --结果

    01 :  : 88 :  : 55
    
    2 : 8 : 5
    3 : 8 : 5
    4 : 8 : 5
    5 : 8 : 5
    func 5
    6 : 8 : 5
    func 6
    7 : 8 : 5
    func 7
    func 1
    func 4
    func 0
    func 2
    func 3
    k:0  ( 999996     4999980)
    k:0  ( 999997     4999985)
    k:0  ( 999998     4999990)
    k:0  ( 999999     4999995)
    k:1  ( 999996     4999981)
    k:1  ( 999997     4999986)
    k:1  ( 999998     4999991)
    k:1  ( 999999     4999996)
    k:2  ( 999996     4999982)
    k:2  ( 999997     4999987)
    k:2  ( 999998     4999992)
    k:2  ( 999999     4999997)
    k:3  ( 999996     4999983)
    k:3  ( 999997     4999988)
    k:3  ( 999998     4999993)
    k:3  ( 999999     4999998)
    k:4  ( 999996     4999984)
    k:4  ( 999997     4999989)
    k:4  ( 999998     4999994)
    k:4  ( 999999     4999999)
    
  • 相关阅读:
    最全的机器学习资源整理
    论文写作技巧
    制造业期刊-ZT
    机器学习中的算法(2)-支持向量机(SVM)基础
    定语从句
    Digital Twin的8种解读!
    详解BOM用途分类及在汽车企业中的应用
    Intellij IDEA运行报Command line is too long解法
    计算机行业各种职业技能树
    invocationCount和invocationTimeOut
  • 原文地址:https://www.cnblogs.com/gongxijun/p/10107444.html
Copyright © 2020-2023  润新知