• 读取chinanews新闻列表及内容


    string[] urilist ={ "http://www.chinanews.com/rss/scroll-news.xml%22,//热点
                                "http://www.chinanews.com/rss/finance.xml%22,//财经
                                "http://www.chinanews.com/rss/sports.xml%22,//体育
                                "http://www.chinanews.com/rss/ent.xml%22,//娱乐
                                "http://www.chinanews.com/rss/health.xml%22,//健康
                                "http://www.chinanews.com/rss/auto.xml%22,//汽车
                                "http://www.chinanews.com/rss/society.xml%22,//社会
                         };
     private void ReadNews(string uri, ushort type)
            {
                try
                {
                    string xml = NetHelper.ReadHtml(uri, Encoding.GetEncoding("gb2312"));
                    XmlDocument doc = new XmlDocument();
                    doc.LoadXml(xml);
                    Dictionary<ushort, NewsEntity> newslist = new Dictionary<ushort, NewsEntity>();
                    XmlNodeList list = doc.SelectNodes("rss/channel/item");
                    for (int i = 0; i < list.Count; i++)
                    {
                        string title = System.Helpers.XmlHelper.GetChileNode(list[i], "title").InnerText.Replace("(图)""").Replace("(组图)""").Replace("(图)""");
                        string link = System.Helpers.XmlHelper.GetChileNode(list[i], "link").InnerText;
                        string result = "";
                        int end = 0;
                        string html = NetHelper.ReadHtml(link, Encoding.Default);
                        int start = html.IndexOf("<div class=left_zw>");
                        if (start > 0)
                            end = html.IndexOf("<!--正文-->", start);
                        result = html.Substring(start, end - start);

                        int _end = 0;
                        int _start = result.IndexOf(@"<div id=""function_code_page"">");
                        if (_start > 0)
                            _end = result.IndexOf("</div>", _start);
                        string pageStr = result.Substring(_start, _end - _start);

                        result = result.Replace(pageStr, "");
                        result = Regex.Replace(result, "\r""", RegexOptions.IgnoreCase);
                        result = Regex.Replace(result, "\n""", RegexOptions.IgnoreCase);
                        result = Regex.Replace(result, "<.*?>""", RegexOptions.IgnoreCase);                    
                        result = Regex.Replace(result, @"&(.{2,6});""", RegexOptions.IgnoreCase);  
                        result = Regex.Replace(result, "\r{2,}""\r", RegexOptions.IgnoreCase);
                        result = Regex.Replace(result, "\t{2,}""\t", RegexOptions.IgnoreCase);
                        result = Regex.Replace(result, @"\s{2,}""", RegexOptions.IgnoreCase);
                        Console.WriteLine(result);
                        result = result.Trim('\r''\n').TrimEnd();
                        if (!string.IsNullOrEmpty(title) && !string.IsNullOrEmpty(result))
                        {
                            
                        }
                    }
                             
            }
                catch
                {
                    
                }

            }
  • 相关阅读:
    面试只要问到分布式,必问分布式锁
    Java编程中忽略这些细节,Bug肯定找上你
    不止承上启下,带你了解工业物联网关
    论文解读二十七:文本行识别模型的再思考
    并发高?可能是编译优化引发有序性问题
    论文解读丨LayoutLM: 面向文档理解的文本与版面预训练
    SQL优化老出错,那是你没弄明白MySQL解释计划
    SQL反模式学习笔记1 开篇
    SQL Server中自定义函数:用指定的分隔符号分割字符串
    .NET软件开发与常用工具清单
  • 原文地址:https://www.cnblogs.com/94cool/p/2218576.html
Copyright © 2020-2023  润新知