• .Net/C#: 利用反射编写通用的 rss 2.0 的 reader


    /*
    .Net/C#: 利用反射编写通用的 rss 2.0 的 reader

    最近在写一个 Simple Rss Reader
    网上找到现成代码两种:
    1.代码简单的,但不够通用 (如: 本站的一些专用 rss reader)
    2.代码复杂的,但没有足够时间去消化 (如: rssbandit)

    遂自己动手:
    由于 rss 的基本属性大家都有!
    但一些特殊不通用属性,如:
    slash:comments
    wfw:comment
    wfw:commentRss
    trackbackping
    不一定存在!
    如何处理???
    我想到了 Reflection,就此提出以下解决方案:
    1. Class RssHeader 用于表示 Rss 的头信息
     你可以在为其添加新属性,原则是:
     成员变量 Fieild 的名称为 rss 的 XML 源对应的属性名称前加下划线,XML 属性名称含有 ":" 将其滤掉!
     如: <dc:language>zh-CHS</dc:language>
     将其影射为:
      private string _dclanguage
      public string DcLanguage
      {
       get
       {
        return this._dclanguage;
       }
      }

    2. Class RssItem 用于表示 Rss 的 Item
     添加新属性的原则同 RssHeader!

    3. 获取 rss 的 XML 源后通过递归遍历节点 (class SimpleRssReader)
     根据实际存在的 rss 属性,通过反射,"构造实例化" RssHeader 和 RssItem!
     请仔细参阅 class SimpleRssReader 的 Travel 方法!

    4. 数据库 (本文使用了 Micrshaoft Data Access Application Block 3.1)
     表:
     Channels (主表)
     ChannelsDetails (细表)
     字段名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!
     存储过程:
     SP_AddChannel
     SP_AddChannelsDetails
     参数名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!


     命令行编译:
    csc SimpleRsReader.cs /r:C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.Data.OracleClient.dll


    全部代码 SimpleRssReader.cs 在此下载
    https://files.cnblogs.com/Microshaoft/SimpleRssReader.rar

    */
    namespace Microshaoft
    {
     using System;
     using System.Xml;
     using System.Text;
     using System.Reflection;
     using System.Collections;
     using System.Text.RegularExpressions;

     public class RssHeader
     {
      //feed URL
      public RssHeader(string URL)
      {
       this._URL = URL;
      }

      public string Title
      {
       get
       {
        return this._title;
       }
      }

      public string Description
      {
       get
       {
        return this._description;
       }
      }

      public string Link
      {
       get
       {
        return this._link;
       }
      }

      public string Language
      {
       get
       {
        return this._language;
       }
      }

      public string Generator
      {
       get
       {
        return this._generator;
       }
      }

      public string Ttl
      {
       get
       {
        return this._ttl;
       }
      }

      public string Copyright
      {
       get
       {
        return this._copyright;
       }
      }

      public DateTime PubDate
      {
       get
       {
        return Util.ParseDateTime(this._pubDate);
       }
      }

      public string Category
      {
       get
       {
        return this._category;
       }
      }

      public DateTime LastBuildDate
      {
       get
       {
        return Util.ParseDateTime(this._lastBuildDate);
       }
      }
      public string ManagingEditor
      {
       get
       {
        return this._managingEditor;
       }
      }

      public string URL
      {
       get
       {
        return this._URL;
       }
      }

      public string DcLanguage
      {
       get
       {
        return this._dclanguage;
       }
      }

      //下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
      private string _dclanguage; //dc:language
      private string _URL;
      private string _managingEditor;
      private string _lastBuildDate;
      private string _title;
      private string _description;
      private string _link;
      private string _language;
      private string _generator;
      private string _ttl;
      private string _copyright;
      private string _pubDate;
      private string _category;
      

     }
     public class RssItem
     {
      private RssHeader _Header;

      public RssHeader Header
      {
       get
       {
        return this._Header;
       }
      }

      //下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
      private string _title;
      private string _link;
      private string _description;
      private string _category;
      private string _author;
      private string _pubDate;
      private string _comments;
      private string _guid;
      private string _slashcomments;
      private string _wfwcomment;
      private string _wfwcommentRss;
      private string _trackbackping;

      public string TrackbackPing
      {
       get
       {
        return this._trackbackping;
       }
      }

      public string WfwCommentRss
      {
       get
       {
        return this._wfwcommentRss;
       }
      }

      public string WfwComment
      {
       get
       {
        return this._wfwcomment;
       }
      }
      

      public string SlashComments
      {
       get
       {
        return this._slashcomments;
       }
      }
      public string Title
      {
       get
       {
        return this._title;
       }
      }

      public string Link
      {
       get
       {
        return this._link;
       }
      }

      public string Description
      {
       get
       {
        return this._description;
       }
      }

      public string Category
      {
       get
       {
        return this._category;
       }
      }

      public string Author
      {
       get
       {
        return this._author;
       }
      }

      public DateTime PubDate
      {
       get
       {
        return Util.ParseDateTime(this._pubDate);
       }
      }

      public string Comments
      {
       get
       {
        return this._comments;
       }
      }

      public string Guid
      {
       get
       {
        return this._guid;
       }
      }
     }
     public class SimpleRssReader
     {
      //RssHeader header 解析处理完毕事件
      public delegate void RssHeaderReceiveEventHandler(SimpleRssReader Sender, RssHeader Header);
      public event RssHeaderReceiveEventHandler RssHeaderReceive;

      //某一个 RssItem 解析处理完毕事件
      public delegate void RssItemReceiveEventHandler(SimpleRssReader Sender, RssItem Item);
      public event RssItemReceiveEventHandler RssItemReceive;

      private Type _TRS; //typeof(RssHeader)
      private Type _tri; //typeof(RssItem)

      private ArrayList _RssItemsAL;

      private RssHeader _rs;
      public RssHeader RssHeader
      {
       get
       {
        return this._rs;
       }
      }

      //用于存储所有的 RssItem
      private RssItem[] _RssItems;

      public RssItem[] RssItems
      {
       get
       {
        return this._RssItems;
       }
      }

      public void Rss(string URL)
      {
       XmlDocument xd = new XmlDocument();
       //如果效率不高可采用 WebRequest 替代
       xd.Load(URL);
       XmlNodeList xnl = xd.SelectNodes("/rss/channel");

       this._rs = new RssHeader(URL);

       this._TRS = typeof(RssHeader);
       this._tri = typeof(RssItem);

       this._RssItemsAL = new ArrayList();

       foreach (XmlNode xn in xnl)
       {
        //递归遍历
        this.Travel(xn, 0);
       }

       if (this._RssItemsAL.Count > 0)
       {
        this._RssItems = new RssItem[this._RssItemsAL.Count];
        int i = 0;
        foreach (object o in this._RssItemsAL)
        {
         this._RssItems[i++] = (RssItem) o;
        }
       }
      }

      /// <Header>
      /// 递归遍历
      /// </Header>
      /// <param name="xn">节点</param>
      /// <param name="i">项目数</param>
      private void Travel(XmlNode xn, int i)
      {
       if (xn.HasChildNodes)
       {
        foreach (XmlNode x in xn.ChildNodes)
        {
         if (x.ParentNode != null)
         {
          if (x.ParentNode.Name == "channel")
          {
           if (x.Name == "item")
           {
            i ++;
            if (i >= 1)
            {
             XmlNode node = null;
             bool b = false; //是否是 Rss Item
             RssItem ri = null;
             if (i == 1) //Header
             {
              node = xn;
              b = false;
             }
             else if (i > 1) //Item
             {
              node = x;
              b = true;
              ri = new RssItem();
             }

             foreach (XmlNode n in node.ChildNodes)
             {
              if (n.Name != "item")
              {
               if (!b) //Rss Header Header
               {
                //根据 XML 实际存在的属性,利用反射为 RssHeader 实例的私有成员赋值
                FieldInfo fi = this._TRS.GetField("_" + n.Name.Replace(":","") ,BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
                if (fi != null)
                {
                 fi.SetValue(this._rs,n.InnerText);
                }
               }
               else //Rss Item
               {
                //根据 XML 实际存在的属性,利用反射为 RssItem 实例的私有成员赋值
                FieldInfo fi = this._tri.GetField("_" + n.Name.Replace(":",""),BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
                if (fi != null)
                {
                 fi.SetValue(ri,n.InnerText);
                }
               }

              }
             }
             if (!b)
             {
              //触发 RssHeaderReceive 事件
              if (this.RssHeaderReceive != null)
              {
               this.RssHeaderReceive(this,this._rs);
              }
             }
             else
             {
              //制定 RssItem 实例的 Header/Header
              FieldInfo fi = this._tri.GetField("_Header",BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
              if (fi != null)
              {
               fi.SetValue(ri,this._rs);
              }

              //触发 RssItemReceive 事件
              if (this.RssItemReceive != null)
              {
               this.RssItemReceive(this,ri);
              }
              this._RssItemsAL.Add(ri);
             }
            }
           }
          }
         }
         if (!x.HasChildNodes)
         {
          this.Travel(x, i);
         }
        }
       }
      }
     }

     public class Util
     {
      public static DateTime ParseDateTime(string s)
      {
       DateTime dt;
       if (s == null || s.ToString().Length <= 0)
       {
        dt = DateTime.Now;
       }
       else
       {
        try
        {
         dt = DateTime.Parse(s);
        }
        catch
        {
         dt = DateTime.Now;
        }
       }
       return dt;
      }
      /// <Header>
      /// 去除 HTML tag
      /// </Header>
      /// <param name="HTML">源</param>
      /// <returns>结果</returns>
      public static string StripHTML(string HTML) //google "StripHTML" 得到
      {
       string[] Regexs =
            {
             @"<script[^>]*?>.*?</script>",
             @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
             @"([\r\n])[\s]+",
             @"&(quot|#34);",
             @"&(amp|#38);",
             @"&(lt|#60);",
             @"&(gt|#62);",
             @"&(nbsp|#160);",
             @"&(iexcl|#161);",
             @"&(cent|#162);",
             @"&(pound|#163);",
             @"&(copy|#169);",
             @"&#(\d+);",
             @"-->",
             @"<!--.*\n"
            };

       string[] Replaces =
            {
             "",
             "",
             "",
             "\"",
             "&",
             "<",
             ">",
             " ",
             "\xa1", //chr(161),
             "\xa2", //chr(162),
             "\xa3", //chr(163),
             "\xa9", //chr(169),
             "",
             "\r\n",
             ""
            };

       string s = HTML;
       for (int i = 0; i < Regexs.Length; i++)
       {
        s = new Regex(Regexs[i], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces[i]);
       }
       s.Replace("<", "");
       s.Replace(">", "");
       s.Replace("\r\n", "");
       return s;
      }
     }
    }

    //测试程序
    namespace Test
    {
     using System;
     using System.Data;
     using System.Reflection;
     using System.Data.SqlClient;

     using Microshaoft;
     using Microshaoft.Data;

     class ConsoleApplication
     {
      private SqlConnection _Connection;
      public string _Channel;

      public SqlConnection Connection
      {
       set
       {
        this._Connection = value;
       }
       get
       {
        return this._Connection;
       }
      }

      static void Main()
      {
       
       string s = "http://www.ccw.com.cn/rss/news2/1.xml";
       s = "http://dzh.mop.com/topic/rss.jsp?type=28";
       s = "http://www.ccw.com.cn/rss/news2/15.xml";
       s = "http://www.cnblogs.com/rss.aspx?id=-1";
       s = "http://localhost/rss.xml";
       //s = "http://weblog.siliconvalley.com/column/dangillmor/index.xml";
       //s= "http://www.skyone.com.cn/sub/rss/list_jjsc.xml";

       ConsoleApplication a = new ConsoleApplication();

       a.Connection = new SqlConnection("server=SERVER\\PSQLKE;user id=sa;password=;database=rss");
       a.Connection.Open();

       SimpleRssReader srr = new SimpleRssReader();

       srr.RssHeaderReceive += new Microshaoft.SimpleRssReader.RssHeaderReceiveEventHandler(a.srr_RssHeaderReceive);
       srr.RssItemReceive +=new Microshaoft.SimpleRssReader.RssItemReceiveEventHandler(a.srr_RssItemReceive);

       System.Console.WriteLine("waiting ....");
       srr.Rss(s); //以后改成多线程或异步

       System.Console.WriteLine("print all rss Header and items ....");
       System.Console.ReadLine();
       System.Console.WriteLine("Header: "+ srr.RssHeader.Title);
       foreach (RssItem ri in srr.RssItems)
       {
        System.Console.WriteLine("item: " + ri.Title);
       }
       System.Console.ReadLine();

      }

      private void srr_RssHeaderReceive(SimpleRssReader Sender, RssHeader Header)
      {
       System.Console.WriteLine("Header:" + Header.Link);
       System.Console.WriteLine("Header:" + Header.Title);

       this.SaveToDataBase("SP_AddChannel",typeof(RssHeader),Header);

      }

      private void srr_RssItemReceive(SimpleRssReader Sender, RssItem Item)
      {
       System.Console.WriteLine("Item: " + Item.Title);
       System.Console.WriteLine("Item: " + Item.Link);
       System.Console.WriteLine("Item: " + Util.StripHTML(Item.Description));

       this.SaveToDataBase("SP_AddChannelsDetails",typeof(RssItem),Item);

      }
      private void SaveToDataBase(string sp, Type t,object instance)
      {
       //获取 sp 所有参数
       SqlParameter[] spa = SqlHelperParameterCache.GetSpParameterSet(this.Connection, sp);
       System.Collections.Hashtable ht = new System.Collections.Hashtable();
       
       for (int i = 0; i < spa.Length; i++)
       {
        //保存 参数名称与其位置(次序) 的关系
        ht.Add(spa[i].ParameterName.ToLower().Replace("@", ""), i);

        //相当于为存储过程的所有参数赋初值
        spa[i].Value = null;
       }

       //得到所有的属性
       PropertyInfo[] pi = t.GetProperties();
       foreach (PropertyInfo x in pi)
       {
        if (ht.ContainsKey( x.Name.ToLower()))
        {
         //根据参数(属性)名称得到参数的次序!
         int i = (int) ht[x.Name.ToLower()];
         if (spa[i].Direction == System.Data.ParameterDirection.Input || spa[i].Direction == System.Data.ParameterDirection.InputOutput)
         {
          object o;
          if (x.PropertyType.Name == "String")
          {
           o = x.GetValue(instance,null);
           if (o != null)
           {
            string s = Util.StripHTML((string) o);
            o = s;
           }
          }
          else
          {
           o = x.GetValue(instance,null);
          }
          
          spa[i].Value = o;
         }
        }
        
       }

       if (t == typeof(RssItem))
       {
        spa[0].Value = ((RssItem) instance).Header.URL;
       }

       SqlHelper.ExecuteNonQuery(this.Connection, CommandType.StoredProcedure, sp, spa);
       if (spa[spa.Length - 1].Value != System.DBNull.Value)
       {
        System.Console.WriteLine("Save to ID: {0} successful!", spa[spa.Length - 1].Value);
       }
       else
       {
        System.Console.WriteLine("save failed! may be duplicate!");
       }
      }
     }
    }

    //==========================================================================================================
    /*
    --sql Script
    if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannel]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
    drop procedure [dbo].[SP_AddChannel]
    GO

    if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannelsDetails]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
    drop procedure [dbo].[SP_AddChannelsDetails]
    GO

    if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Channels]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
    drop table [dbo].[Channels]
    GO

    if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[ChannelsDetails]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
    drop table [dbo].[ChannelsDetails]
    GO

    CREATE TABLE [dbo].[Channels] (
     [ID] [int] IDENTITY (1, 1) NOT NULL ,
     [URL] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,
     [Channel] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
     [Title] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
     [Description] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,
     [link] [varchar] (500) COLLATE Chinese_PRC_CI_AS NULL ,
     [language] [varchar] (10) COLLATE Chinese_PRC_CI_AS NULL ,
     [generator] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
     [ttl] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
     [copyright] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
     [pubDate] [datetime] NULL ,
     [category] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
     [dclanguage] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL
    ) ON [PRIMARY]
    GO

    CREATE TABLE [dbo].[ChannelsDetails] (
     [ID] [int] IDENTITY (1, 1) NOT NULL ,
     [ChannelID] [int] NULL ,
     [title] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
     [link] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
     [description] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
     [category] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
     [author] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
     [pubDate] [datetime] NULL ,
     [comments] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
     [guid] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
     [trackbackping] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL
    ) ON [PRIMARY]
    GO

    SET QUOTED_IDENTIFIER ON
    GO
    SET ANSI_NULLS ON
    GO


    CREATE   proc SP_AddChannel
    @URL varchar(8000)
    ,@link varchar(8000)
    ,@Channel varchar(8000)
    ,@Title varchar(8000)
    ,@Image varchar(8000)
    ,@Description varchar(7999)
    ,@language varchar(8000)
    ,@generator varchar(8000)
    ,@ttl varchar(8000)
    ,@copyright varchar(8000)
    ,@pubDate datetime
    ,@category varchar(8000)
    ,@Docs varchar(8000)
    ,@ManagingEditor varchar(8000)
    ,@dclanguage varchar(8000)
    ,@ int out
    as
    set @ = 0
    insert into Channels ([URL],[Channel],[Title],[Description],[link],[language],[generator],[ttl],[copyright],[pubDate],[category],[dclanguage])
    select @URL,@Channel,@Title,@Description,@link,@language,@generator,@ttl,@copyright,@pubDate,@category,@dclanguage
    where not exists(select 1 from Channels where [URL] = @URL)
    select @ = SCOPE_IDENTITY()
    GO
    SET QUOTED_IDENTIFIER OFF
    GO
    SET ANSI_NULLS ON
    GO

    SET QUOTED_IDENTIFIER ON
    GO
    SET ANSI_NULLS ON
    GO

    CREATE     proc SP_AddChannelsDetails
    @URL varchar(8000)
    ,@Title varchar(8000)
    ,@Description varchar(7000)
    ,@link varchar(8000)
    ,@pubDate datetime
    ,@category varchar(8000)
    ,@Comments varchar(8000)
    ,@Guid varchar(8000)
    ,@trackbackping varchar(8000)
    ,@ int out
    as
    set @ = 0
    insert into ChannelsDetails ([ChannelID],[Title],[Description],[link],[pubDate],[category],[comments],[guid],[trackbackping])
    select id,@Title,@Description,@link,@pubDate,@category,@comments,isnull(@guid,@link),@trackbackping
    from Channels
    where not exists (select 1 from ChannelsDetails where guid = isnull(@guid,@link)) and URL = @URL
    select @ = SCOPE_IDENTITY()
    GO
    SET QUOTED_IDENTIFIER OFF
    GO
    SET ANSI_NULLS ON
    GO
    */

  • 相关阅读:
    使用postman解决接口之间的接口依赖
    loadrunner11安装
    记录一次搭建jmeter分布式压测环境时creatermikeystore.bat不是内部命令
    jmeter分布式压测环境搭建(jmeter版本5.1.1,jdk版本jdk1.8.0_221)
    jmeter基于windows分布式压测环境搭建
    Chrome浏览器一直请求clients1.google.com:443
    Nginx 配置支持C++
    Ubuntu Sublime Text 设置等宽字体
    取任意四边形最长边
    调用Lua脚本print(xxx)报attempt to call a nil value (global 'print')错误
  • 原文地址:https://www.cnblogs.com/Microshaoft/p/126669.html
Copyright © 2020-2023  润新知