• C#获取网页信息并存入数据库


    1,获取以及商品分类信息

    给一网页获取网页上商品信息的分类

    using Skay.WebBot;
    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Linq;
    using System.Text;
    using System.Threading;
    using System.Windows.Forms;
    using Ivony.Html;
    using Ivony.Html.Parser;
    using System.Data.SqlClient;
    
    namespace catchGoods
    {
        public partial class Form1 : Form
        {
            public Form1()
            {
                InitializeComponent();
            }
            public static Thread th;
            private void button1_Click(object sender, EventArgs e)
            {
                th = new Thread(GetJDData);
                th.Start();
             }
            public void GetJDData()
            {
                SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
                conn.Open();
    
                string str = "http://www.htluxe.com";
                HttpUtility http = new HttpUtility();
                string html = http.GetHtmlText(str);
                var documenthtml = new JumonyParser().Parse(html);
                var items = documenthtml.Find(".categroup dl");
                foreach(var item in items)
                {
                    string name = item.FindFirst("h4 a").InnerText();
                    string remarkOdd = item.FindFirst("h4 a").Attribute("href").Value();
                    string remark = remarkOdd.Split('=')[1];
                    this.Invoke((EventHandler)(delegate
                    {
                        listBox1.Items.Add(name);
    
                    }));//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼(委托?
                    string into = string.Format("insert into exerciseOneSort (className, remark) values ('" + name + "', '" + remark + "')");
                    SqlCommand com = new SqlCommand(into, conn);
                    int i = com.ExecuteNonQuery();
    
                    var elements = item.Find("dt p a");
                    foreach(var element in elements)
                    {
                        string nameTwo = element.InnerText();
                        string url = "http://www.htluxe.com/" + element.Attribute("href").Value();
                        string intoTwo = string.Format("insert into exerciseTwoSort (className, url, idplus) values ('" + nameTwo + "', '" + url + "', '" + remark + "')");
                        SqlCommand comTwo = new SqlCommand(intoTwo, conn);
                        int j = comTwo.ExecuteNonQuery();
                    }
                }
            }
        }
    }

     完整版

    using Skay.WebBot;
    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Linq;
    using System.Text;
    using System.Threading;
    using System.Windows.Forms;
    using Ivony.Html;
    using Ivony.Html.Parser;
    using System.Data.SqlClient;
    using Newtonsoft.Json.Linq;
    using Newtonsoft.Json;
    
    namespace catchGoods
    {
        public partial class Form1 : Form
        {
            public Form1()
            {
                InitializeComponent();
            }
            public static Thread th;
            private void button1_Click(object sender, EventArgs e)
            {
                th = new Thread(GetJDDataOne);
                th.Start();
                //SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
                //conn.Open();
                //string str = string.Format("delete from exerciseTwoSort");
                //SqlCommand com = new SqlCommand(str, conn);
                //int w = com.ExecuteNonQuery();
             }
            public void GetJDDataOne()
            {
                SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
                conn.Open();
    
                string str = "http://www.htluxe.com";
                HttpUtility http = new HttpUtility();
                string html = http.GetHtmlText(str);
                var documenthtml = new JumonyParser().Parse(html);
                var items = documenthtml.Find(".categroup dl");
                foreach(var item in items)
                {
                    string name = item.FindFirst("h4 a").InnerText();
                    string remarkOdd = item.FindFirst("h4 a").Attribute("href").Value();
                    string remark = remarkOdd.Split('=')[1];
                    this.Invoke((EventHandler)(delegate
                    {
                        listBox1.Items.Add(name+"  "+remark);
    
                    }));//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼
                    string into = string.Format("insert into exerciseOneSort (className, remark) values ('" + name + "', '" + remark + "')");
                    SqlCommand com = new SqlCommand(into, conn);
                    int i = com.ExecuteNonQuery();
    
                    var elements = item.Find("dt p a");
                    foreach(var element in elements)
                    {
                        string nameTwo = element.InnerText();
                        string url = "http://www.htluxe.com/" + element.Attribute("href").Value();
                        this.Invoke((EventHandler)(delegate
                        {
                            listBox1.Items.Add(nameTwo + "  " +url + "  " + remark);
    
                        }));//有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼
                        string intoTwo = string.Format("insert into exerciseTwoSort (className, url, idplus) values ('" + nameTwo + "', '" + url + "', '" + remark + "')");
                        SqlCommand comTwo = new SqlCommand(intoTwo, conn);
                        int j = comTwo.ExecuteNonQuery();
                    }
                }
            }
            int page = 0;
            string surl;
            public static Thread th2;
            private void button2_Click(object sender, EventArgs e)
            {
                listBox1.Items.Clear();
                th2 = new Thread(threadTwo);
                th2.Start();
                //SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
                //conn.Open();
                //string str = string.Format("delete from GoodsList");
                //SqlCommand com = new SqlCommand(str, conn);
                //int d = com.ExecuteNonQuery();
                //MessageBox.Show(Convert.ToString(d));
            }
            public void threadTwo()
            {
                SqlConnection conn = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
                conn.Open();
                //如果字符串中含有单引号,解决方法1----------------------------------
                //string titlestr = "念佛'夜晚访'问欧诺'法";
                //string pricestr = "99.00";
                //string sqlstr = string.Format("insert into goods (name,price) values (@name,'" + pricestr + "')");
                //SqlCommand sqlcom = new SqlCommand(sqlstr, conn);
                //sqlcom.Parameters.Add("@name", titlestr);
                //sqlcom.ExecuteNonQuery();
                //解决方法2-----------------------------------------------------------------------
                //string bufffuck = "fdgjjf'fgfgf";
                //bufffuck = bufffuck.Replace("'", "''");
                //string sqlstr = string.Format("insert into goods (name) values ('"+bufffuck+"')");
                //SqlCommand sqlcom = new SqlCommand(sqlstr, conn);
                //int y = sqlcom.ExecuteNonQuery();
    
    
                string sel = string.Format("select url from exerciseTwoSort");
                DataTable dt = new DataTable();
                SqlDataAdapter dapt = new SqlDataAdapter(sel, conn);
                dapt.Fill(dt);
    
                for (int i = 0; i < dt.Rows.Count; i++)
                {
                    surl = dt.Rows[i][0].ToString();
                    HttpUtility httpTwo = new HttpUtility();
                    string htmlTwo = httpTwo.GetHtmlText(surl);
                    var documenthtml = new JumonyParser().Parse(htmlTwo);
                    var pageto = Convert.ToString(documenthtml.FindFirst(".goods-page-min label").InnerText());
                    page = Convert.ToInt32(pageto.Split('/')[1]);
                    GetJDData();
                }
            }
             void GetJDData()
             {
                  for (int j = 1; j <= page; j++)
                  {
                      string htmlTwo = surl + "&price_min=0&price_max=0&page=" + j + "&sort=sort_order%20asc,last_update&order=DESC";
                      HttpUtility httpMid = new HttpUtility();
                string htmlMid = httpMid.GetHtmlText(htmlTwo);
                var documenthtmlMid = new JumonyParser().Parse(htmlMid);
                var items = documenthtmlMid.Find(".piclist li");
                foreach(var item in items)
                {
                    string title = item.FindFirst(".base a").InnerText();
                    title = title.Replace("'", "''");
                    //string goodsurl = "http://www.htluxe.com/"+item.FindFirst(".base a").Attribute("href").Value();
                    //string subhtml = http.GetHtmlText(goodsurl, "utf-8", "text/html; charset=utf-8");
                    //string Area_Html = http.GetHtmlText(goodsurl.Split('?')[0] + "?act=price&" + goodsurl.Split('?')[1], "utf-8", "text/html;charset=utf-8", "");
                    try
                    {                     
                        string nowPrice = item.FindFirst(".minprice").InnerText();
                        string oldPrice = item.FindFirst(".maxprice").InnerText();
                        string popular = item.FindFirst(".ratecount strong").InnerText();
                        string sales = item.FindFirst(".soldnum strong").InnerText();
                        string contents = item.FindFirst(".commentcount strong").InnerText().ToString();
                        string htmlThree = "http://www.htluxe.com/" + item.FindFirst("dt a").Attribute("href").Value().ToString();
                        HttpUtility httpThree = new HttpUtility();
                        string htmlBuff = httpThree.GetHtmlText(htmlThree);
                        var documenthtmlThree = new JumonyParser().Parse(htmlBuff);
                        string sben = documenthtmlThree.FindFirst(".promotionMiddleTop p").InnerText().ToString();
                        string num = sben.Split('')[1];
                       
                           string starLevel = documenthtmlThree.FindFirst(".m-ratescore i").InnerText().ToString();
                           bufff(title, nowPrice, oldPrice, popular, sales, num, contents, starLevel);
                           this.Invoke((EventHandler)(delegate
                           {
                               listBox1.Items.Add(title + "  " + nowPrice + "  " + num + "  " + oldPrice + "  " + sales + "  " + popular + " " + contents + " " + starLevel);
    
                           }));
                            //有线程时listbox添加东西的时候要这么写不然报错谁知道什么鬼
                        //this.listBox1.Items.Add("");
                        //listBox1.Items.Add(title + "  " + nowPrice + "  " + num + "  " + oldPrice + "  " + sales + "  " + popular);                   
                           
                    }
                    catch
                    {
                        MessageBox.Show("异常");
                    }
                    
                }
    
                  }     
            }
            private static void bufff(string title, string nowPrice, string oldPrice,
                string popular, string sales, string num, string contents, string starLevel)
            {
                SqlConnection conn2 = new SqlConnection("Data Source=.;Initial Catalog=StuTinafirst;User ID=sa;Password=123456");
                conn2.Open();
    
                string strstr = string.Format("insert into GoodsList (name, num, sales, popular, starLevel, contents, price, oldPrice) values ('" + title + "', '" +num + "',  '" + sales + "', '" + popular + "', '"+starLevel+"', '"+contents+"', '" + nowPrice + "', '" + oldPrice + "')");
                SqlCommand com2 = new SqlCommand(strstr, conn2);
                int g = com2.ExecuteNonQuery();
            }
        }
    }
  • 相关阅读:
    scrapy-redis使用以及剖析
    完全理解 Python 迭代对象、迭代器、生成器
    Python操作 RabbitMQ、Redis、Memcache、SQLAlchemy
    HTTP状态码
    Py西游攻关之RabbitMQ、Memcache、Redis
    Django contenttypes 应用
    cookie和session 以及Django中应用
    RESTful规范
    rest_framework框架的认识
    vue 总结
  • 原文地址:https://www.cnblogs.com/Tinamei/p/5162163.html
Copyright © 2020-2023  润新知