因为需要,自己写了个批量查询qs的小软件。从网站中抓出需要的数据,格式化显示:
对字符串进行检测处理,先用Replace函数去掉字符串的空格,再用正则表达式匹配,返回匹配的字符串,如果没有匹配,则返回空字符串:
//检测输入字符是否合法 private string CheckText(string waitregexstr) { waitregexstr = waitregexstr.Replace(" ", ""); string qsregex = "[qQ][sS][0-9]{12}"; return Regex.Match(waitregexstr,qsregex).Value; }
获取网页内容。这部分我还是不太会,拿了别人的代码。但它就是用用HttpWebRequest和HttpWebResponse的各个函数与stream来获取网页内容。
#region GetWebContent 根据url返回网页编码 HttpWebRequest httpReq; HttpWebResponse httpResp; string strBuff = ""; char[] cbuffer = new char[256]; int byteRead = 0; //string filename = @"c:log.txt"; ///定义写入流操作 public string GetWebContent(string url) { url = url.Replace(" ", ""); Uri httpURL = new Uri(url); ///HttpWebRequest类继承于WebRequest,并没有自己的构造函数,需通过WebRequest的Creat方法 建立,并进行强制的类型转换 httpReq = (HttpWebRequest)WebRequest.Create(httpURL); ///通过HttpWebRequest的GetResponse()方法建立HttpWebResponse,强制类型转换 httpResp = (HttpWebResponse)httpReq.GetResponse(); ///GetResponseStream()方法获取HTTP响应的数据流,并尝试取得URL中所指定的网页内容 ///若成功取得网页的内容,则以System.IO.Stream形式返回,若失败则产生ProtoclViolationException错 误。在此正确的做法应将以下的代码放到一个try块中处理。这里简单处理 Stream respStream = httpResp.GetResponseStream(); ///返回的内容是Stream形式的,所以可以利用StreamReader类获取GetResponseStream的内容,并以 //StreamReader类的Read方法依次读取网页源程序代码每一行的内容,直至行尾(读取的编码格式:UTF8) StreamReader respStreamReader = new StreamReader(respStream, Encoding.UTF8); byteRead = respStreamReader.Read(cbuffer, 0, 256); while (byteRead != 0) { string strResp = new string(cbuffer, 0, byteRead); strBuff = strBuff + strResp; byteRead = respStreamReader.Read(cbuffer, 0, 256); } respStream.Close(); return strBuff; } #endregion
定义从网页内容中获取特定字符的函数。调用获取网页源码后,对字符串进行多次处理,首先用正则匹配regex.Match匹配到需要的字符串,再用Replace处理函数,得到自己所要显示的函数,这部分如果有大牛有更好的方案,请指教,感谢!
private string operastr( string textboxtext) { try { string url = "避嫌,这里去掉了网址" + textboxtext.ToUpper() + ".htm"; string webcontent = GetWebContent(url); string pattern = @"<h3>[sS]*?</table>"; string butifulwoman = ""; Regex regex = new Regex(pattern); if (regex.IsMatch(webcontent)) { int flag = 0; string fatwoman = regex.Match(webcontent).Value; butifulwoman = fatwoman.Replace("<td class="info-field"><div>", " ").Replace("</div></td>", "").Replace("<td class="info-value"><div style="">", ":").Replace(" ", "").Replace("</tr>", "").Replace("<tr>", "").Replace("</tbody></table>", "").Replace("<table class="info-table"><tbody>", "").Replace("<h3>", "").Replace("</h3>", "").Replace(" ", ""); } loadingGrid.IsOpen = false; return butifulwoman; } catch (Exception ex) { return ""; } }
定义多行字符串的处理方法。根据TextBox的LineCount属性与getLinetext函数,对字符串进行处理
//多行字符串的处理 private void MutilpleStr(TextBox tb,Label lb) { int k = 0; string linestr=""; for (int i = 0; i <tb.LineCount; i++) { linestr = CheckText(tb.GetLineText(i)); if (linestr == "") { wrong("qs格式错误!"); return; } else { if (lb.Content==null) lb.Content = operastr(tb.GetLineText(i)); else { lb.Content += " ------------------------------------"; lb.Content += " " + operastr(tb.GetLineText(i)); } } } }
主体。根据用户输入的qs编号行数,分开一行与多行处理,
if (mutipleText.Text == "") { wrong("文本框不能为空!"); // mutipleText.Style =Style("mytextbox"); } else if (this.mutipleText.LineCount>1) { loadingGrid.IsOpen = true; mutipleText.BorderBrush = Brushes.White; MutilpleStr(mutipleText,my); } else if(this.mutipleText.LineCount==1) { if (CheckText(mutipleText.Text).Length < 1) { wrong("请输入正确的qs!"); return; } mutipleText.BorderBrush = Brushes.White; string text = mutipleText.Text.Replace(" ", ""); string myneedstr = operastr(text); my.Content = myneedstr; }
最新测试,抓取的数据太快会导致返回的qs全部是同一个,因为抓取的速度太快了,有空我改善下
项目已经上传: http://files.cnblogs.com/files/ssvip/qs.rar