• 移除重复字符的几个算法简单比较


       1:      class Program
       2:      {
       3:          static void Main(string[] args)
       4:          {
       5:              string s = File.ReadAllText(@"e:	est.txt");
       6:              Program p = new Program();
       7:              string r = p.RemoveDuplicatedChar_1(s);
       8:              r = p.RemoveDuplicatedChar_2(s);
       9:              r = p.RemoveDuplicatedChar_6(s);
      10:          }
      11:   
      12:          /// <summary>
      13:          /// 既然是C#,首先应该想到的是利用类库现有的实现
      14:          /// HashSet是理智的选择,要注意这是3.5才有的类
      15:          /// </summary>
      16:          public string RemoveDuplicatedChar_1(string s)
      17:          {
      18:              if (string.IsNullOrEmpty(s) || s.Length < 2)
      19:              {
      20:                  return s;
      21:              }
      22:   
      23:              HashSet<char> hs = new HashSet<char>(s.ToCharArray());
      24:              return new string(hs.ToArray());
      25:          }
      26:   
      27:          /// <summary>
      28:          /// Distinct函数也可以做到
      29:          /// </summary>
      30:          public string RemoveDuplicatedChar_2(string s)
      31:          {
      32:              if (string.IsNullOrEmpty(s) || s.Length < 2)
      33:              {
      34:                  return s;
      35:              }
      36:        
      37:              return new string(s.Distinct().ToArray());
      38:          }
      39:   
      40:          //Distinct的实现使用到了链表和哈希,感兴趣的可以参考一下实现
      41:          /****
      42:                  [__DynamicallyInvokable]
      43:                  public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source)
      44:                  {
      45:                      if (source == null)
      46:                      {
      47:                          throw Error.ArgumentNull("source");
      48:                      }
      49:                      return DistinctIterator<TSource>(source, null);
      50:                  }
      51:          
      52:                  private static IEnumerable<TSource> DistinctIterator<TSource>(IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
      53:                  {
      54:                      Set<TSource> iteratorVariable0 = new Set<TSource>(comparer);
      55:                      foreach (TSource iteratorVariable1 in source)
      56:                      {
      57:                          if (iteratorVariable0.Add(iteratorVariable1))
      58:                          {
      59:                              yield return iteratorVariable1;
      60:                          }
      61:                      }
      62:                  }
      63:           
      64:                  public bool Add(TElement value)
      65:                  {
      66:                      return !this.Find(value, true);
      67:                  }
      68:  
      69:                  private bool Find(TElement value, bool add)
      70:                  {
      71:                      int hashCode = this.InternalGetHashCode(value);
      72:                      for (int i = this.buckets[hashCode % this.buckets.Length] - 1; i >= 0; i = this.slots[i].next)
      73:                      {
      74:                          if ((this.slots[i].hashCode == hashCode) && this.comparer.Equals(this.slots[i].value, value))
      75:                          {
      76:                              return true;
      77:                          }
      78:                      }
      79:                      if (add)
      80:                      {
      81:                          int freeList;
      82:                          if (this.freeList >= 0)
      83:                          {
      84:                              freeList = this.freeList;
      85:                              this.freeList = this.slots[freeList].next;
      86:                          }
      87:                          else
      88:                          {
      89:                              if (this.count == this.slots.Length)
      90:                              {
      91:                                  this.Resize();
      92:                              }
      93:                              freeList = this.count;
      94:                              this.count++;
      95:                          }
      96:                          int index = hashCode % this.buckets.Length;
      97:                          this.slots[freeList].hashCode = hashCode;
      98:                          this.slots[freeList].value = value;
      99:                          this.slots[freeList].next = this.buckets[index] - 1;
     100:                          this.buckets[index] = freeList + 1;
     101:                      }
     102:                      return false;
     103:                  }
     104:           ****/
     105:   
     106:          /// <summary>
     107:          /// 别忘了我们是在做题!所以假设来了
     108:          /// 第一个假设:字符存在范围'a'-'z'
     109:          /// 于是26个字母可以用32位的整型值来影射
     110:          /// </summary>
     111:          public string RemoveDuplicatedChar_3(string s)
     112:          {
     113:              if (string.IsNullOrEmpty(s) || s.Length < 2)
     114:              {
     115:                  return s;
     116:              }
     117:   
     118:              char[] charArray = s.ToCharArray();
     119:              Int32 flags = 0;
     120:              int newIndex = 0;
     121:              for (int i = 0; i < charArray.Length; i++)
     122:              {
     123:                  if (charArray[i] < 'a' || charArray[i] > 'z')
     124:                  {
     125:                      throw new ArgumentException("char should be in range(a-z)");
     126:                  }
     127:   
     128:                  int relative_position = (charArray[i]-'a') % 32;
     129:                  if ((flags & (1 << relative_position))==0)
     130:                  {
     131:                      charArray[newIndex] = charArray[i];
     132:                      newIndex++;
     133:                      flags |= (1 << relative_position);
     134:                  }
     135:              }
     136:   
     137:              return new string(charArray, 0, newIndex);
     138:          }
     139:   
     140:          /// <summary>
     141:          /// 256个ASCII码可以用8个32位整型值映射
     142:          /// </summary>
     143:          public string RemoveDuplicatedChar_4(string s)
     144:          {
     145:              if (string.IsNullOrEmpty(s) || s.Length < 2)
     146:              {
     147:                  return s;
     148:              }
     149:   
     150:              char[] charArray = s.ToCharArray();
     151:              Int32[] flags = new Int32[8];
     152:              int newIndex = 0;
     153:   
     154:              for (int i = 0; i < charArray.Length; i++)
     155:              {
     156:                  if (charArray[i]>255)
     157:                  {
     158:                      throw new ArgumentException("char should be in ASCII");
     159:                  }
     160:   
     161:                  int index = charArray[i] / 32;
     162:                  int relative_position = charArray[i] % 32;
     163:                  if ((flags[index] & (1 << relative_position)) == 0)
     164:                  {
     165:                      charArray[newIndex]=charArray[i];
     166:                      newIndex++;
     167:                      flags[index] |= (1 << relative_position);
     168:                  }
     169:              }
     170:   
     171:              return new string(charArray, 0, newIndex);
     172:          }
     173:   
     174:          /// <summary>
     175:          /// 似曾相识
     176:          /// </summary>
     177:          public string RemoveDuplicatedChar_5(string s)
     178:          {
     179:              if (string.IsNullOrEmpty(s) || s.Length < 2)
     180:              {
     181:                  return s;
     182:              }
     183:   
     184:              char[] charArray = s.ToCharArray();
     185:              bool[] flags = new bool[256];
     186:              int newIndex = 0;
     187:   
     188:              for (int i = 0; i < charArray.Length; i++)
     189:              {
     190:                  if (charArray[i] > 255)
     191:                  {
     192:                      throw new ArgumentException("char should be in ASCII");
     193:                  }
     194:   
     195:                  char c = charArray[i];
     196:                  if (!flags[c])
     197:                  {
     198:                      charArray[newIndex] = charArray[i];
     199:                      newIndex++;
     200:                      flags[c] = true; 
     201:                  }
     202:              }
     203:   
     204:              return new string(charArray, 0, newIndex);
     205:          }
     206:   
     207:          /// <summary>
     208:          /// O(n平方)的实现,没有借助额外的buffer数组或高级数据结构
     209:          /// 不用考虑是否为ASCII,在实际使用中是唯一有通用意义的算法,
     210:          /// 但是至于在处理大字符串时其时间效率应该要远低于使用第一个和第二个方法
     211:          /// 稍后给出运行时间比较
     212:          /// </summary>
     213:          public string RemoveDuplicatedChar_6(string s)
     214:          {
     215:              if (string.IsNullOrEmpty(s) || s.Length < 2)
     216:              {
     217:                  return s;
     218:              }
     219:   
     220:              char[] charArray = s.ToCharArray();
     221:              int newIndex = 1;
     222:   
     223:              for (int i = 1; i < charArray.Length; i++)
     224:              {
     225:                  int j = 0;
     226:                  for (j = 0; j < newIndex; j++)
     227:                  {
     228:                      if (charArray[i] == charArray[j])
     229:                      {
     230:                          break;
     231:                      }
     232:                  }
     233:   
     234:                  //no duplcaited char found in existed read sub-string
     235:                  //then set the new char as the new index
     236:                  if (j == newIndex)
     237:                  {
     238:                      charArray[newIndex] = charArray[i];
     239:                      newIndex++;
     240:                  }
     241:              }
     242:   
     243:              return new string(charArray, 0, newIndex);
     244:          }
     245:      }

    在处理百万级别的文本时,1,2,6的运行时间比较:

    image

    /****
    对于纯ASCII的大字符串,给出了5个方法的时间比较如下图
    可以看出4,5在时间效率上超过了1和2
    所以在处理混合型字符串时,是否应该考虑综合应用这些算法呢?
    答案是明显的
    ****/

    (A1]6ZQ_W67HUTS@4S`OEOK

  • 相关阅读:
    记某农信社面试
    sublime2 c++的一些使用配置
    switch case
    sizeof与strlen的区别 浅谈
    求最大公约数
    Android基础控件——PopupWindow模仿ios底部弹窗
    Android基础控件——ListView实现时间轴效果
    Android基础控件——CardView的使用、仿支付宝银行卡
    Android基础控件——AlertDialog、ProgressDialog实现单选对话框、多选对话框、进度条对话框、输入框对话框
    Android基础——适配安卓6.0新权限系统
  • 原文地址:https://www.cnblogs.com/dancewithautomation/p/3493772.html
Copyright © 2020-2023  润新知