• Finder(文件内容搜索工具)


    搜索文件夹内容的小工具

    Github

    两种搜索模式的算法:

    BoyerMooreSearch.cs

    using System.Threading;
    using System.Collections.Generic;
    using System.Linq;
    
    namespace Finder.Algorithms
    {
        /// <summary>
        /// An implemention of Boyer-Moore algorithm.
        /// <para/>author : Ornithopter
        /// </summary>
        class BoyerMooreSearch : SearchBase
        {
            /// <summary>
            /// 
            /// </summary>
            /// <param name="source"></param>
            /// <param name="pattern"></param>
            /// <param name="token"></param>
            /// <returns>An array of matched index</returns>
            public int[] Search(string source, string pattern, CancellationToken token)
            {
                var matchIndexes = new List<int>();
    
                // step increasment.
                int delta;
    
                // prepare a map providing delta for each char in pattern string.
                var deltaMap = CreateDeltaMap(pattern);
    
                // start searching.
                for (var i = pattern.Length - 1; i < source.Length; i += delta)
                {
                    token.ThrowIfCancellationRequested();
                    // find next match and update delta.
                    if (FindNext(source, pattern, i, deltaMap, token, out delta))
                    {
                        // add to result list if found.
                        matchIndexes.Add(i - (pattern.Length - 1));
                    }
                }
                return matchIndexes.ToArray();
            }
    
            private static bool Match(string source, int[] deltaMap, string pattern, CancellationToken token)
            {
                // step increasment.
                int delta;
    
                // start searching.
                for (var i = pattern.Length - 1; i < source.Length; i += delta)
                {
                    token.ThrowIfCancellationRequested();
                    // find next match and update delta.
                    if (FindNext(source, pattern, i, deltaMap, token, out delta))
                    {
                        return true;
                    }
                }
                return false;
            }
    
            /// <summary>
            /// Find the next matched index and update delte at the same time.
            /// </summary>
            /// <param name="source"></param>
            /// <param name="pattern"></param>
            /// <param name="start"></param>
            /// <param name="deltaMap"></param>
            /// <param name="delta"></param>
            /// <returns>true if found one, otherwise false.</returns>
            private static bool FindNext(string source, string pattern, int start, int[] deltaMap, CancellationToken token, out int delta)
            {
                int i = pattern.Length - 1,
                    index = 0;
    
                // start comparing from the last char in pattern.
                while (source[start - index] == pattern[i - index])
                {
                    token.ThrowIfCancellationRequested();
                    if (index != pattern.Length - 1)
                    {
                        index++;
                    }
                    else
                    {
                        // matchs to the end. So it's a search result.
                        delta = pattern.Length;
                        return true;
                    }
                }
    
                // found one dismatched char at (start - index), get delta from map.
                var c = source[start - index];
                delta = /*c > 128 ? 0 : */deltaMap[c];
    
                if (delta == 0)
                {
                    // this means the source[start] char is the last char in pattern
                    // and only appears once. So delta should be the length of pattern.
                    delta = pattern.Length;
                }
                return false;
            }
    
            static int[] CreateDeltaMap(string pattern)
            {
                const int alphabetSize = 0xffff;
                var patternLength = pattern.Length;
                var deltaMap = new int[alphabetSize];
    
                // initialize the map.
                for (var i = 0; i < alphabetSize; i++)
                {
                    deltaMap[i] = patternLength;
                }
    
                // start from 0, which means any duplicated char will only have
                // the index nearest to the end.
                for (var i = 0; i < patternLength; i++)
                {
                    var index = pattern[i];
                    deltaMap[index] = patternLength - i - 1;
                }
                return deltaMap;
            }
    
            protected override void Build(CancellationToken token)
            {
                //throw new NotImplementedException();
            }
    
            public override string[] Search(string keyword, Dictionary<string, object> config, CancellationToken token)
            {
                var fileList = FileList;
    
                var deltaMap = CreateDeltaMap(keyword);
    
                return fileList.Where(filePath => Match(ReadContent(filePath), deltaMap, keyword, token)).ToArray();
            }
        }
    }
    View Code

    Trie.cs

    using System.Threading;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    
    namespace Finder.Algorithms
    {
        class Trie : SearchBase
        {
            class Node
            {
                public Dictionary<char, Node> Next { get; private set; }
                public Dictionary<int, bool> FileIndex { get; private set; }
    
                public Node()
                {
                    FileIndex = new Dictionary<int, bool>();
                    Next = new Dictionary<char, Node>();
                }
            }
    
            private static readonly HashSet<char> Ignorance = new HashSet<char>
            {
                '[',
                '(',
                '{',
            };
    
            private static readonly HashSet<char> Spliters = new HashSet<char>
            {
                ' ',
                ',',
                '
    ',
                '
    ',
                ';',
                ']',
                ')',
                '}',
            };
    
            private Node _root;
    
    
            protected override void Build(CancellationToken token)
            {
                _root = new Node();
    
                var index = 0;
                foreach (var filePath in FileList)
                {
                    token.ThrowIfCancellationRequested();
                    Build(index++, ReadContent(filePath));
                }
            }
    
            private void Build(int fileIndex, string content)
            {
                var node = _root;
                foreach (var t in content)
                {
                    var c = t;
    
                    if (Ignorance.Contains(c))
                    {
                        continue;
                    }
                    if (Spliters.Contains(c))
                    {
                        node.FileIndex[fileIndex] = true;
                        node = _root;
                        continue;
                    }
    
                    /*if (!Char.IsLetter(c))
                        continue;*/
    
                    if (Char.IsUpper(c))
                        c = Char.ToLower(c);
    
                    Node next;
                    if (!node.Next.TryGetValue(c, out next))
                    {
                        next = new Node();
                        node.Next[c] = next;
                    }
                    node = next;
                    node.FileIndex[fileIndex] = false;
                }
                node.FileIndex[fileIndex] = true;
            }
    
            private string[] Search(string keyword, bool matchWholeWord, CancellationToken token)
            {
                var node = _root;
                foreach (var t in keyword)
                {
                    token.ThrowIfCancellationRequested();
                    var c = t;
                    if (Char.IsUpper(c))
                        c = Char.ToLower(c);
    
                    Node next;
                    if (!node.Next.TryGetValue(c, out next))
                    {
                        return new string[0];
                    }
                    node = next;
                }
    
                if (matchWholeWord)
                {
                    return node.FileIndex.Where(_ => _.Value).Select(_ => FileList[_.Key]).ToArray();
                }
                return node.FileIndex.Select(_ => FileList[_.Key]).ToArray();
            }
    
            public const string ConfigMatchWholeWord = "ConfigMatchWholeWord";
    
            public override string[] Search(string keyword, Dictionary<string, object> config, CancellationToken token)
            {
                var matchWholeWord = (bool)config[ConfigMatchWholeWord];
                return Search(keyword, matchWholeWord, token);
            }
        }
    }
    View Code
  • 相关阅读:
    快速幂模板
    部分有关素数的题
    POJ 3624 Charm Bracelet (01背包)
    51Nod 1085 背包问题 (01背包)
    POJ 1789 Truck History (Kruskal 最小生成树)
    HDU 1996 汉诺塔VI
    HDU 2511 汉诺塔X
    HDU 2175 汉诺塔IX (递推)
    HDU 2077 汉诺塔IV (递推)
    HDU 2064 汉诺塔III (递推)
  • 原文地址:https://www.cnblogs.com/ornithopter/p/3812497.html
Copyright © 2020-2023  润新知