• 正则匹配 获取QQ空间日志


    using System;   
    using System.Collections.Generic;   
    using System.Net;   
    using System.Text.RegularExpressions;   
    namespace QQ   
    {   
        class QZone   
        {   
            private List<long> BlogList = null;   
            public long QNumber;   
            public Dictionary<long, QBlogData> BlogDataDictionary = null;  
            #region 正则匹配表达式   
            private Regex RgxItem = new Regex("<p class=\"list_tit\">.*?</p>", RegexOptions.Singleline | RegexOptions.Compiled);   
            private Regex RgxID = new Regex("(?<=Blog\\().*?(?=\\))", RegexOptions.Singleline | RegexOptions.Compiled);   
            private Regex RgxTitle = new Regex("(?<=<span >).*?(?=</span>)", RegexOptions.Singleline | RegexOptions.Compiled);   
            private Regex RgxCategory = new Regex("(?<=Category\\(')个人日记(?='\\))", RegexOptions.Singleline | RegexOptions.Compiled);   
            private Regex RgxContext = new Regex("(?<=<div id=\"blogDetailDiv\".*?>).*?(?=</div>)", RegexOptions.Singleline | RegexOptions.Compiled);   
            private Regex RgxTime = new Regex("(?<=(发表|转载)于).*?(?=<)", RegexOptions.Singleline | RegexOptions.Compiled);  
            #endregion   
            public QZone(long QNumber)   
            {   
                this.QNumber = QNumber;   
            }   
            public List<long> GetBlogList()   
            {   
                if(BlogList == null)   
                {   
                    WebClient wc = new WebClient();   
                    string RecData = wc.DownloadString(string.Format(@"http://b.qzone.qq.com/cgi-bin/blognew/blog_output_toppage?uin={0}&property=GoRE&numperpage=1000&maxlen=1000&direct=1", QNumber));   
                    MatchCollection mc = RgxItem.Matches(RecData);   
                    BlogList = new List<long>();   
                    foreach(Match match in mc)   
                    {   
                        string item = match.Value;   
                        string id = RgxID.Match(item).Value;   
                        BlogList.Add(long.Parse(id));   
                    }   
                }   
                return BlogList;   
            }   
            public Dictionary<long, QBlogData> GetBlogDataDictionary()   
            {   
                if(BlogList == null)   
                {   
                    GetBlogList();   
                }   
                if(BlogDataDictionary == null)   
                {   
                    WebClient wc = new WebClient();   
                    BlogDataDictionary = new Dictionary<long, QBlogData>();   
                    foreach(long BlogID in BlogList)   
                    {   
                        string RecData = wc.DownloadString(string.Format(@"http://b.qzone.qq.com/cgi-bin/blognew/blog_output_data?uin={0}&blogid={1}&numperpage=1000&property=GoRE", QNumber, BlogID));   
                        QBlogData data = new QBlogData();   
                        data.BlogID = BlogID;   
                        data.Title = RgxTitle.Match(RecData).Value;   
                        data.Context = RgxContext.Match(RecData).Value;   
                        data.Category = RgxCategory.Match(RecData).Value;   
                        data.Time = DateTime.Parse(RgxTime.Match(RecData).Value);
                        BlogDataDictionary.Add(BlogID, data);   
                    }   
                }   
                return BlogDataDictionary;   
            }   
            public QBlogData GetBlogData(long BlogID)   
            {   
                if(BlogDataDictionary == null)   
                {   
                    GetBlogDataDictionary();   
                }   
                return BlogDataDictionary[BlogID];   
            }
            static void Main(string[] args)
            {
                QZone qz = new QZone(这里是QQ号);
                qz.GetBlogDataDictionary();
     
            }  
    
        }   
        struct QBlogData   
        {   
            public long BlogID;   
            public string Title;   
            public string Context;   
            public string Category;   
            public DateTime Time;   
        }   
    
    }  
    
    
  • 相关阅读:
    文件传输, socketserver模块
    模拟ssh, hashlib模块, struct模块, subprocess模块
    面向对象多继承, 网络基础, 编写网络相关的程序
    主动调用其他类的成员, 特殊成员
    约束, 自定义异常, hashlib, logging
    isinstance / issubclass / type, 方法和函数, 反射
    类的成员和嵌套(建模)
    面向对象
    模块和包
    异常处理
  • 原文地址:https://www.cnblogs.com/dachie/p/1734499.html
Copyright © 2020-2023  润新知