• 网页内容扫描器


    程序简介

    使用RestSharp即时获取目标网页的内容,使用正则表达式提取需要识别的内容并对比结果。

    c#正则表达式的[贪婪]模式

    正则表达式的引擎是贪婪,只要模式允许,它将匹配尽可能多的字符。

    如何匹配满足条件的最短字符 ?

    通过在“重复描述字符”(如*,如+)后面添加“?”,可以将匹配模式改成非贪婪。

    代码

    主窗体代码

    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Linq;
    using System.Text;
    using System.Windows.Forms;
    using Wsion.ClassLib;
    using RestSharp;
    using System.Net;
    using System.Text.RegularExpressions;
    using System.Threading;
    
    namespace WebpageScanner
    {
        public partial class Form1 : Form
        {
            Thread thread = null;
    
            ~Form1()
            {
                thread = null;
            }
    
            public Form1()
            {
                InitializeComponent();
            }
    
            private void button1_Click(object sender, EventArgs e)
            {
                Thread thread = new Thread(new ThreadStart(process));
                thread.Start();
                button1.Enabled = false;
            }
    
            private void button2_Click(object sender, EventArgs e)
            {
                textBoxLog.Text = string.Empty;
    
            }
    
            private void button3_Click(object sender, EventArgs e)
            {
                button1.Enabled = true;
            }
    
            private void process()
            {
                while (!button1.Enabled)
                {
                    RestClient client = new RestClient();
                    client.BaseUrl = new Uri(textBoxUrl.Text);
                    //设置代理
                    //client.Proxy = new WebProxy("proxy.net", 8080);
                    //client.Proxy.Credentials = System.Net.CredentialCache.DefaultCredentials;
    
                    RestRequest request = new RestRequest();
                    request.Method = Method.GET;
                    IRestResponse responsebody = client.Execute(request);
    
                    string resBody = EncodingHelper.Instance.BytesToUnicode(responsebody.RawBytes);
                    string result;
                    Regex regex = new Regex(textBoxRegex.Text, RegexOptions.Multiline & RegexOptions.IgnoreCase);
                    result = regex.Match(resBody).Groups[1].ToString().Replace('
    ', ' ').Replace('
    ', ' ').Trim();
    
    
                    SetText(textBoxLog, textBoxLog.Text + string.Format("[{0}]{1}{2}{1}{1}",
                        DateTime.Now.ToLongTimeString(), Environment.NewLine, result));
    
                    if (result != textBoxTarget.Text)
                    {
                        Alert();
                        SetEnabled(button1, true);
                    }
    
                    System.Threading.Thread.Sleep(2000);
                }
            }
    
            /// <summary>
            /// 提示扫描到信息更新
            /// </summary>
            private void Alert()
            {
                System.Diagnostics.Process.Start(textBoxPath.Text);
            }
    
    
    
            #region SetVal Template
            /*
             * 跨线程改变控件的属性值
             */
    
            delegate void SetValueHandler<T>(Control control, T val);
    
            private void SetText(Control control, string val)
            {
                if (control.InvokeRequired)
                {
                    SetValueHandler<string> handler = new SetValueHandler<string>(SetText);
                    control.Invoke(handler, new object[] { control, val });
                }
                else
                {
                    control.Text = val;
                }
            }
    
            private void SetEnabled(Control control, bool val)
            {
                if (control.InvokeRequired)
                {
                    SetValueHandler<bool> handler = new SetValueHandler<bool>(SetEnabled);
                    control.Invoke(handler, new object[] { control, val });
                }
                else
                {
                    control.Enabled = val;
                }
            }
    
            #endregion
    
    
        }
    }

    以下为代码转换器部分

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace Wsion.ClassLib
    {
        /// <summary>
        /// 编码转换器
        /// </summary>
        public class EncodingHelper
        {
            public static readonly EncodingHelper Instance = new EncodingHelper();
    
            public string EncodingConvert(string fromString, Encoding fromEncoding, Encoding toEncoding)
            {
                byte[] fromBytes = fromEncoding.GetBytes(fromString);
                byte[] toBytes = Encoding.Convert(fromEncoding, toEncoding, fromBytes);
    
                string toString = toEncoding.GetString(toBytes);
                return toString;
            }
    
            public string GB2312ToUtf8(string gb2312String)
            {
                Encoding fromEncoding = Encoding.GetEncoding("gb2312");
                Encoding toEncoding = Encoding.UTF8;
                return EncodingConvert(gb2312String, fromEncoding, toEncoding);
            }
    
            public string Utf8ToGB2312(string utf8String)
            {
                Encoding fromEncoding = Encoding.UTF8;
                Encoding toEncoding = Encoding.GetEncoding("gb2312");
                return EncodingConvert(utf8String, fromEncoding, toEncoding);
            }
    
            public string GbkToUnicode(string gbkString)
            {
                Encoding fromEncoding = Encoding.GetEncoding("gbk");
                Encoding toEncoding = Encoding.Unicode;
                return EncodingConvert(gbkString, fromEncoding, toEncoding);
            }
    
            public string BytesToUnicode(byte[] bytes)
            {
                Encoding toEncoding = Encoding.GetEncoding("gbk");
                return toEncoding.GetString(bytes);
            }
        }
    }

    目前多线程Thread.Abort()时程序偶尔会崩溃,希望得到指正。

    源码下载地址:

    http://files.cnblogs.com/files/wsion/WebpageScanner.7z

    原创博文,转载请注明出处

  • 相关阅读:
    今天是元旦啊
    [待解决]python 函数加括号和不加括号的区别
    Jupyter Notebook的快捷键列表误操作发现的新大陆
    Series选择和过滤
    做鸢尾花切片练习中的'&'问题:(&,|)和(and,or)
    报错合集
    关于随机数种子seed的问题尽量使用numpy下的seed
    pandas创建Series序列/hashable
    在jupyter notebook中插入截图
    xml反序列化时,如何生成与之对应的类文件
  • 原文地址:https://www.cnblogs.com/wsion/p/4357229.html
Copyright © 2020-2023  润新知