• 利用正则表达式获取博客园随笔(四)


      我们前天讲到了需要加入多线程来缓解界面卡死的现象,那现在就让我来给大家介绍一个由博客园的某位大牛写的线程池。(实在是忘了是谁了、、)

    线程池的代码奉上:

      1     sealed class MyThreadPool
      2     {
      3         //线程锁对象
      4         private static object lockObj = new object();
      5         //任务队列
      6         private static Queue<ThreadStart> threadStartQueue = new Queue<ThreadStart>();
      7         //记录当前工作的任务集合,从中可以判断当前工作线程使用数,如果使用int判断的话可能会有问题,
      8         //用集合的话还能取得对象的引用,比较好
      9         private static HashSet<ThreadStart> threadsWorker = new HashSet<ThreadStart>();
     10         //当前允许最大工作线程数
     11         private static int maxThreadWorkerCount = 1;
     12         //当前允许最小工作线程数
     13         private static int minThreadWorkerCount = 0;
     14 
     15         /// <summary>
     16         /// 设定最大工作线程数
     17         /// </summary>
     18         /// <param name="maxThreadCount">数量</param>
     19         public static void SetMaxWorkThreadCount(int maxThreadCount)
     20         {
     21             maxThreadWorkerCount = minThreadWorkerCount > maxThreadCount ?
     22             minThreadWorkerCount : maxThreadCount;
     23         }
     24         /// <summary>
     25         /// 设定最小工作线程数
     26         /// </summary>
     27         /// <param name="maxThreadCount">数量</param>
     28         public static void SetMinWorkThreadCount(int minThreadCount)
     29         {
     30             minThreadWorkerCount = minThreadCount > maxThreadWorkerCount ?
     31             maxThreadWorkerCount : minThreadCount;
     32         }
     33         /// <summary>
     34         /// 启动线程池工作
     35         /// </summary>
     36         /// <param name="threadStartArray">任务数组</param>
     37         public static void MyQueueUserWorkItem(List<ThreadStart> threadStartArray)
     38         {
     39             //将任务集合都放入到线程池中
     40             AddAllThreadsToPool(threadStartArray);
     41             //线程池执行任务
     42             ExcuteTask();
     43         }
     44         /// <summary>
     45         /// 将单一任务加入队列中
     46         /// </summary>
     47         /// <param name="ts">单一任务对象</param>
     48         private static void AddThreadToQueue(ThreadStart ts)
     49         {
     50             lock (lockObj)
     51             {
     52                 threadStartQueue.Enqueue(ts);
     53             }
     54         }
     55 
     56         /// <summary>
     57         /// 将多个任务加入到线程池的任务队列中
     58         /// </summary>
     59         /// <param name="threadStartArray">多个任务</param>
     60         private static void AddAllThreadsToPool(List<ThreadStart> threadStartArray)
     61         {
     62             foreach (var threadStart in threadStartArray)
     63                 AddThreadToQueue(threadStart);
     64         }
     65 
     66         /// <summary>
     67         /// 执行任务,判断队列中的任务数量是否大于0,如果是则判断当前正在使用的工作线程的
     68         /// 数量是否大于等于允许的最大工作线程数,如果一旦有线程空闲的话
     69         /// 就会执行ExcuteTaskInQueen方法处理任务
     70         /// </summary>
     71         private static void ExcuteTask()
     72         {
     73             while (threadStartQueue.Count > 0)
     74             {
     75                 Thread.Sleep(100);
     76                 if (threadsWorker.Count < maxThreadWorkerCount)
     77                 {
     78                     ExcuteTaskInQueen();
     79                 }
     80             }
     81         }
     82 
     83         /// <summary>
     84         /// 执行出对列的任务,加锁保护
     85         /// </summary>
     86         private static void ExcuteTaskInQueen()
     87         {
     88             lock (lockObj)
     89             {
     90                 ExcuteTaskByThread(
     91 threadStartQueue.Dequeue());
     92             }
     93         }
     94 
     95         /// <summary>
     96         /// 实现细节,这里使用BackGroudWork来实现后台线程
     97         /// 注册doWork和Completed事件,当执行一个任务前,前将任务加入到
     98         /// 工作任务集合(表示工作线程少了一个空闲),一旦RunWorkerCompleted事件被触发则将任务从工作
     99         /// 任务集合中移除(表示工作线程也空闲了一个)
    100         /// </summary>
    101         /// <param name="threadStart"></param>
    102         private static void ExcuteTaskByThread(ThreadStart threadStart)
    103         {
    104             threadsWorker.Add(threadStart);
    105             BackgroundWorker worker = new BackgroundWorker();
    106             worker.DoWork += (o, e) => { threadStart.Invoke(); };
    107             worker.RunWorkerCompleted += (o, e) => { threadsWorker.Remove(threadStart); };
    108             worker.RunWorkerAsync();
    109         }
    110     }
    View Code

    然后呢再奉上有所修改的和新增的方法的代码:

     1         public List<CnblogsResult> getResult(string Html)
     2         {
     3             List<ThreadStart> StartArray = new List<ThreadStart>();
     4             Regex regexContent = new Regex("<div class="post_item_body">(?<content>.*?)<div class="clear"></div>", RegexOptions.Singleline);//获取单个随笔数据
     5             if (regexContent.IsMatch(Html))
     6             {
     7                 MatchCollection blog = regexContent.Matches(Html);
     8                 int i = 1;
     9                 foreach (Match item in blog)
    10                 {
    11                     chuancanshu ccs = new chuancanshu();
    12                     ccs.i = i++;
    13                     ccs.item = item;
    14                     StartArray.Add(new ThreadStart(() =>
    15                         {
    16                             Cnblogs(ccs);
    17                         }));
    18                 }
    19                 MyThreadPool.SetMaxWorkThreadCount(5);//设置每次从线程池中启动10个线程
    20                 MyThreadPool.MyQueueUserWorkItem(StartArray);//启动线程池
    21             }
    22             return results;
    23         }
    View Code
     1         private void  Cnblogs(object obj)
     2         {
     3             CnblogsResult result = new CnblogsResult();
     4             Regex regexProperty = new Regex("<h3><a.*?href="(?<href>.*?)".*?>(?<Title>.*?)</a></h3>.*?<a .*? class="lightblue".*?>(?<Author>.*?)</a>.*?发布于.*?(?<time>.*?)<span",
     5           RegexOptions.Singleline);//获取标题、时间、链接、作者等
     6             chuancanshu ccs = (chuancanshu)obj;
     7             Match item = ccs.item;
     8             int i = ccs.i;
     9             if (regexProperty.IsMatch(item.ToString()))
    10             {
    11                 var Property = regexProperty.Match(item.ToString());
    12                 result.Title = Property.Groups["Title"].Value;
    13                 result.Author = Property.Groups["Author"].Value;
    14                 result.time = Property.Groups["time"].Value;
    15                 result.href = Property.Groups["href"].Value;
    16                 result.Rank = i;
    17             }
    18             results.Add(result);
    19             if (getResults != null)
    20             {
    21                 getResults(results);
    22             }
    23         }
    View Code
    1  struct chuancanshu
    2         {
    3             public Match item;
    4             public int i;
    5         }
    View Code

    在这里呢,我把

            List<CnblogsResult> results = new List<CnblogsResult>();

    这段这个提取出来当作公共变量了。

    最后,奉上本例子的源码:点这里下载

  • 相关阅读:
    <转>Npoi导入导出Excel操作<载>
    将DataTable导出为Excel C#
    错误 X “X1”不包含“XX2”的定义,并且找不到可接受类型为“X1”的第一个参数的扩展方法“XX2”(是否缺少 using 指令或程序集引用?)
    错误 1 未知的服务器标记“asp:ScriptManager”。
    分析器错误消息: 类型“test.test.testx”不明确: 它可能来自程序集“F: estProjectin est.test.DLL”或程序集“F: estProjectin estProject.DLL”。请在类型名称中显式指定程序集。
    There are no interfaces on which a capture can be done.
    基础知识系列☞GET和POST→及相关知识
    基础知识系列☞Abstract和Virtual→及相关知识
    同源策略
    26Mybatis_一级缓存及其测试
  • 原文地址:https://www.cnblogs.com/suixingerxing/p/3236340.html
Copyright © 2020-2023  润新知