• 多站点图片爬虫框架


    调用方法:

     protected void Page_Load(object sender, EventArgs e)
        {
            IDatabaseDAO objBook = DAOFactory.CreateBookDAO();

            string sql = string.Format("select top 1 * from BookInfo with(nolock) where Img is null or img=''");
            DataTable dt = objBook.ExecuteDataSetSQL(sql).Tables[0];
            StringBuilder sb = new StringBuilder();

            for (int i = 0; i < dt.Rows.Count; i++)
            {
                NewImg ni = new NewImg(ImgSpec.imgWidth, ImgSpec.imgHeight);
                ni.filltype = 0;
                ImgManager im = new ImgManager(ni, dt.Rows[i]["Title"].ToString(), 1);
                string msg = "";
                List<NewImg> lstni = new List<NewImg>();
                lstni = im.getImgList(ref msg);
                if (lstni.Count > 0)
                {
                    NewImg ni2 = lstni[0];
                    ni2.imgid = int.Parse(dt.Rows[i]["bid"].ToString());
                    imgSave(ni2);
                }
                else
                {
                    if (sb.Length > 0) sb.Append("|");
                    sb.Append(HttpUtility.UrlEncode(dt.Rows[i]["Title"].ToString()));
                }
                if (sb.Length > 100)
                {
                    configSrv.configSoapClient csc = new configSrv.configSoapClient();
                    csc.addWordToQueue(sb.ToString(), "Google", 2239);//2239是用来识别哪个站点发起的,可以是任意整数
                    sb = new StringBuilder();
                }
                Thread.Sleep(1000);
            }
            if (sb.Length > 0)
            {
                configSrv.configSoapClient csc = new configSrv.configSoapClient();
                csc.addWordToQueue(sb.ToString(), "Google", 2239);//2239是用来识别哪个站点发起的,可以是任意整数
            }

        }

        private void imgSave(NewImg ni)
        {
            try
            {
                IDatabaseDAO objBook = DAOFactory.CreateBookDAO();
                //string sql = "update BookInfo set Img=@Img,ImgThumb=@ImgThumb where bid=@bid";
                string sql = "update BookInfo set Img=@Img where bid=@bid";
                SqlParameter[] para = {
                                          new SqlParameter("@Img",SqlDbType.VarChar),
                                          new SqlParameter("@ImgThumb",SqlDbType.VarChar),
                                          new SqlParameter("@bid",SqlDbType.Int)
                                          };
                string subPath = "\\Images\\{0}×{1}\\{2}";
                string Img = string.Format(subPath, ni.imgwidth, ni.imghight, ni.imgid + ni.ext);
                string imgPath = Sxmobi.FileHelper.GetMapPath("~") + Img;
                Sxmobi.FileHelper.EnsureDir(Path.GetDirectoryName(imgPath));

                File.Copy(ni.generatePath, imgPath,true);
                File.Delete(ni.generatePath);//删除temp文件

                string ImgThumb = string.Format(subPath, ImgSpec.imgThumbWidth, ImgSpec.imgThumbHeight, ni.imgid + ni.ext);
                ni.imgwidth = ImgSpec.imgThumbWidth;//图片新宽
                ni.imghight = ImgSpec.imgThumbHeight;//图片新高
                ni.generatePath = Sxmobi.FileHelper.GetMapPath("~") + ImgThumb;
                ImgManager.ImgMgrLocal(ni);
                if (File.Exists(ni.srcPath))
                    File.Delete(ni.srcPath);//删除temp文件

                para[0].Value = Img;
                para[1].Value = ImgThumb;
                para[2].Value = ni.imgid;
                objBook.ExecuteNonQuerySQL(sql, para);
            }
            catch (Exception ex)
            {
                Sxmobi.LogHelper.Error(this.GetType().ToString(), ex.Message, ex);
            }
        }

    (以上所涉及的类均可在下面爬虫框架的打包文件中找到)

    爬虫框架下载地址http://download.csdn.net/detail/dasihg/4133312

  • 相关阅读:
    vscode已有64位版本。
    git代码回滚的两种选择
    代码维护的问题
    草珊瑚理解IFC(inline formatting context)
    nodejs的dependency.md
    如何实现在H5里调起高德地图APP?(下)
    如何实现在H5里调起高德地图APP?(上)
    酸奶妈妈再次回归~
    【从零开始学】如何在安卓平台上实现定位?
    如何将四大名著和地图相结合?
  • 原文地址:https://www.cnblogs.com/dashi/p/4034717.html
Copyright © 2020-2023  润新知