1.站点地图构建:
/// <summary> /// 实现XML格式 站点地图输出 /// By:rhythmk.cnblogs.com /// </summary> public class SitemapResult : ActionResult { public SitemapResult(Sitemap sitemap) { this.Sitemap = sitemap; } public Sitemap Sitemap { get; private set; } public override void ExecuteResult(ControllerContext context) { context.HttpContext.Response.ContentType = "text/xml"; XmlSerializer serializer = new XmlSerializer(typeof(Sitemap)); serializer.Serialize(context.HttpContext.Response.Output, this.Sitemap); } } /// <summary> /// 站点地图实体 /// By:rhythmk.cnblogs.com /// </summary> [XmlRoot(ElementName = "urlset", Namespace = "http://www.sitemaps.org/schemas/sitemap/0.9")] [Serializable] public class Sitemap : List<SitemapUrl> { [XmlInclude(typeof(SitemapUrl))] public void Serialize(TextWriter writer) { XmlSerializer serializer = new XmlSerializer(typeof(Sitemap)); XmlTextWriter xmlTextWriter = new XmlTextWriter(writer); serializer.Serialize(xmlTextWriter, this); } } [XmlRoot(ElementName = "url")] [XmlType(TypeName = "url")] [Serializable] public class SitemapUrl { private DateTime lastModified; [XmlElement(ElementName = "loc")] public string Location { get; set; } [XmlElement(ElementName = "lastmod")] public string LastModified { get { if (DateTime.MinValue.Equals(this.lastModified)) { this.lastModified = DateTime.Now; } return this.lastModified.ToString("yyyy-MM-dd"); } set { this.lastModified = DateTime.Parse(value); } } [XmlElement(ElementName = "changefreq")] public ChangeFrequency ChangeFrequency { get; set; } [XmlElement(ElementName = "priority")] public double Priority { get; set; } } public enum ChangeFrequency { always, hourly, daily, weekly, monthly, yearly, never }
使用:
/// <summary> /// 站点地图 /// </summary> /// <param name="pageIndex"> 页面索引</param> /// <param name="subjectID"> 科目 </param> /// <returns></returns> public ActionResult Index(int subjectID = 0, int pageIndex = 1) { int total = 0; int pageSize=2000; var list = GetExamList(subjectID, pageIndex, pageSize,ref total); Sitemap site = new Sitemap(); var time= DateTime.Now.ToLongDateString() ; foreach (var p in list) { site.Add(new SitemapUrl() { ChangeFrequency = ChangeFrequency.weekly, LastModified = time, Location = string.Format("http://rhythmk/home/view/{0}?t={1}", p.ExaminationID , p.ExaminationTitle), Priority = 0.7 }); } return new SitemapResult(site); }
2.判断是否为搜索引擎:
/// <summary> /// 判断是否为搜索引擎访问 /// </summary> /// <param name="useragent"></param> /// <returns></returns> public static bool IsSearchEngine(string useragent) { bool engine = false; if (!string.IsNullOrEmpty(useragent)) { var SpiderKey = System.Configuration.ConfigurationManager.AppSettings["SpiderKey"].ToString(); // spiderkey=@"Googlebot|Feedfetcher-Google|Baiduspider|Yahoo\s*\!\s*Slurp|YodaoBot|Sosoimagespider|Sosospider|Sogou\s*Web\s*Sprider" System.Text.RegularExpressions.Regex rx = new System.Text.RegularExpressions.Regex(SpiderKey, RegexOptions.IgnoreCase | RegexOptions.Compiled); engine = rx.IsMatch(useragent); } return engine; } public ActionResult View() { bool engine = IsSearchEngine( this.HttpContext.Request.ServerVariables["Http_User_Agent"]); // 判断是否为搜索引擎 然后使用不同的试图呈现数据 if (engine) { return View("SearchView", entity); } else { return View(entity); } }
3. 检验是否成功:
修改浏览器 Http_User_Agent 值,以达到模拟搜索引擎爬虫浏览效果。
以火狐为例:
3.1地址栏键入:about:config 回车
3.2 设置:general.useragent.override–>"Baiduspider" 。可以达到模拟百度爬虫效果。
火符默认:Mozilla/5.0 (Windows; U; Windows NT 6.0; zh-CN) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.4.154.29 Safari/525.19
设置完成后可以通过浏览http://www.docin.com/p-259119935.html 此网址 于其他未设置的浏览器比较 浏览页面效果。