• C#网页采集


      /// <summary>
            /// 返回提取数组
            /// </summary>
            /// <param name="rex">正则</param>
            /// <param name="urlValue">字符串</param>
            /// <returns></returns>
            private string[] rexID(string rex, string urlValue)
            {
                ArrayList al = new ArrayList();
                string strRegex = rex;
                Regex r = new Regex(strRegex, RegexOptions.IgnoreCase);
                MatchCollection m = r.Matches(urlValue);
                for (int i = 0; i <= m.Count - 1; i++)
                {
                    bool rep = false;
                    string strNew = m[i].ToString();
                    string zregexStr = rex;
                    Regex l = new Regex(zregexStr, RegexOptions.None);
                    Match mc = l.Match(strNew);
                    string dataStr = mc.Groups["key"].Value;
                    // 过滤重复的URL 
                    foreach (string str in al)
                    {
                        if (strNew == str)
                        {
                            rep = true;
                            break;
                        }
                    }
                    if (!rep)
                    {
                        al.Add(dataStr);
                    }
                }
                string[] shuzu = new string[al.Count];
                int id = 0;
                foreach (string item in al)
                {
                    shuzu[id] = item;
                    id++;
                }
                return shuzu;
            }
    放下电子产品,每天进步一点点
  • 相关阅读:
    UVA10361
    △UVA10494
    △UVA465
    △UVA10106
    △UVA424
    阶乘的精确值
    小学生算术
    UVA156
    △UVA120
    linux应用之ntpdate命令联网同步时间
  • 原文地址:https://www.cnblogs.com/vienna/p/3514856.html
Copyright © 2020-2023  润新知