相信使用过Everything的人都对其超快的搜索速度印象非常深刻,它的主要原理是通过扫描NTFS磁盘的USN Journal读取的文件列表,而不是磁盘目录,由于USN Journal非常小,因此能实现快速搜索。在CodePlex上也有人对这个功能进行了.Net的封装:MFT Scanner in VB.NET。
由于.Net程序的Dll基本上是通用的,在C#中也可以直接使用它。今天发现了有人将其翻译成了C#的版本《使用MFT Scanner遍巡USN Journal,快速找出磁碟內的所有檔案》,使用起来更为方便了。不过,原文貌似有点Bug,编译不过去,这里我改了一下,附录如下:
1 public class MFTScanner 2 { 3 private static IntPtr INVALID_HANDLE_VALUE = new IntPtr(-1); 4 private const uint GENERIC_READ = 0x80000000; 5 private const int FILE_SHARE_READ = 0x1; 6 private const int FILE_SHARE_WRITE = 0x2; 7 private const int OPEN_EXISTING = 3; 8 private const int FILE_READ_ATTRIBUTES = 0x80; 9 private const int FILE_NAME_IINFORMATION = 9; 10 private const int FILE_FLAG_BACKUP_SEMANTICS = 0x2000000; 11 private const int FILE_OPEN_FOR_BACKUP_INTENT = 0x4000; 12 private const int FILE_OPEN_BY_FILE_ID = 0x2000; 13 private const int FILE_OPEN = 0x1; 14 private const int OBJ_CASE_INSENSITIVE = 0x40; 15 private const int FSCTL_ENUM_USN_DATA = 0x900b3; 16 17 [StructLayout(LayoutKind.Sequential)] 18 private struct MFT_ENUM_DATA 19 { 20 public long StartFileReferenceNumber; 21 public long LowUsn; 22 public long HighUsn; 23 } 24 25 [StructLayout(LayoutKind.Sequential)] 26 private struct USN_RECORD 27 { 28 public int RecordLength; 29 public short MajorVersion; 30 public short MinorVersion; 31 public long FileReferenceNumber; 32 public long ParentFileReferenceNumber; 33 public long Usn; 34 public long TimeStamp; 35 public int Reason; 36 public int SourceInfo; 37 public int SecurityId; 38 public FileAttributes FileAttributes; 39 public short FileNameLength; 40 public short FileNameOffset; 41 } 42 43 [StructLayout(LayoutKind.Sequential)] 44 private struct IO_STATUS_BLOCK 45 { 46 public int Status; 47 public int Information; 48 } 49 50 [StructLayout(LayoutKind.Sequential)] 51 private struct UNICODE_STRING 52 { 53 public short Length; 54 public short MaximumLength; 55 public IntPtr Buffer; 56 } 57 58 [StructLayout(LayoutKind.Sequential)] 59 private struct OBJECT_ATTRIBUTES 60 { 61 public int Length; 62 public IntPtr RootDirectory; 63 public IntPtr ObjectName; 64 public int Attributes; 65 public int SecurityDescriptor; 66 public int SecurityQualityOfService; 67 } 68 69 //// MFT_ENUM_DATA 70 [DllImport("kernel32.dll", ExactSpelling = true, SetLastError = true, CharSet = CharSet.Auto)] 71 private static extern bool DeviceIoControl(IntPtr hDevice, int dwIoControlCode, ref MFT_ENUM_DATA lpInBuffer, int nInBufferSize, IntPtr lpOutBuffer, int nOutBufferSize, ref int lpBytesReturned, IntPtr lpOverlapped); 72 73 [DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Auto)] 74 private static extern IntPtr CreateFile(string lpFileName, uint dwDesiredAccess, int dwShareMode, IntPtr lpSecurityAttributes, int dwCreationDisposition, int dwFlagsAndAttributes, IntPtr hTemplateFile); 75 76 [DllImport("kernel32.dll", ExactSpelling = true, SetLastError = true, CharSet = CharSet.Auto)] 77 private static extern Int32 CloseHandle(IntPtr lpObject); 78 79 [DllImport("ntdll.dll", ExactSpelling = true, SetLastError = true, CharSet = CharSet.Auto)] 80 private static extern int NtCreateFile(ref IntPtr FileHandle, int DesiredAccess, ref OBJECT_ATTRIBUTES ObjectAttributes, ref IO_STATUS_BLOCK IoStatusBlock, int AllocationSize, int FileAttribs, int SharedAccess, int CreationDisposition, int CreateOptions, int EaBuffer, 81 int EaLength); 82 83 [DllImport("ntdll.dll", ExactSpelling = true, SetLastError = true, CharSet = CharSet.Auto)] 84 private static extern int NtQueryInformationFile(IntPtr FileHandle, ref IO_STATUS_BLOCK IoStatusBlock, IntPtr FileInformation, int Length, int FileInformationClass); 85 86 private IntPtr m_hCJ; 87 private IntPtr m_Buffer; 88 private int m_BufferSize; 89 90 private string m_DriveLetter; 91 92 private class FSNode 93 { 94 public long FRN; 95 public long ParentFRN; 96 public string FileName; 97 98 public bool IsFile; 99 public FSNode(long lFRN, long lParentFSN, string sFileName, bool bIsFile) 100 { 101 FRN = lFRN; 102 ParentFRN = lParentFSN; 103 FileName = sFileName; 104 IsFile = bIsFile; 105 } 106 } 107 108 private IntPtr OpenVolume(string szDriveLetter) 109 { 110 111 IntPtr hCJ = default(IntPtr); 112 //// volume handle 113 114 m_DriveLetter = szDriveLetter; 115 hCJ = CreateFile(@"\." + szDriveLetter, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, IntPtr.Zero, OPEN_EXISTING, 0, IntPtr.Zero); 116 117 return hCJ; 118 119 } 120 121 122 private void Cleanup() 123 { 124 if (m_hCJ != IntPtr.Zero) 125 { 126 // Close the volume handle. 127 CloseHandle(m_hCJ); 128 m_hCJ = INVALID_HANDLE_VALUE; 129 } 130 131 if (m_Buffer != IntPtr.Zero) 132 { 133 // Free the allocated memory 134 Marshal.FreeHGlobal(m_Buffer); 135 m_Buffer = IntPtr.Zero; 136 } 137 138 } 139 140 141 public IEnumerable<String> EnumerateFiles(string szDriveLetter) 142 { 143 try 144 { 145 var usnRecord = default(USN_RECORD); 146 var mft = default(MFT_ENUM_DATA); 147 var dwRetBytes = 0; 148 var cb = 0; 149 var dicFRNLookup = new Dictionary<long, FSNode>(); 150 var bIsFile = false; 151 152 // This shouldn't be called more than once. 153 if (m_Buffer.ToInt32() != 0) 154 { 155 throw new Exception("invalid buffer"); 156 } 157 158 // Assign buffer size 159 m_BufferSize = 65536; 160 //64KB 161 162 // Allocate a buffer to use for reading records. 163 m_Buffer = Marshal.AllocHGlobal(m_BufferSize); 164 165 // correct path 166 szDriveLetter = szDriveLetter.TrimEnd('\'); 167 168 // Open the volume handle 169 m_hCJ = OpenVolume(szDriveLetter); 170 171 // Check if the volume handle is valid. 172 if (m_hCJ == INVALID_HANDLE_VALUE) 173 { 174 string errorMsg = "Couldn't open handle to the volume."; 175 if (!IsAdministrator()) 176 errorMsg += "Current user is not administrator"; 177 178 throw new Exception(errorMsg); 179 } 180 181 mft.StartFileReferenceNumber = 0; 182 mft.LowUsn = 0; 183 mft.HighUsn = long.MaxValue; 184 185 do 186 { 187 if (DeviceIoControl(m_hCJ, FSCTL_ENUM_USN_DATA, ref mft, Marshal.SizeOf(mft), m_Buffer, m_BufferSize, ref dwRetBytes, IntPtr.Zero)) 188 { 189 cb = dwRetBytes; 190 // Pointer to the first record 191 IntPtr pUsnRecord = new IntPtr(m_Buffer.ToInt32() + 8); 192 193 while ((dwRetBytes > 8)) 194 { 195 // Copy pointer to USN_RECORD structure. 196 usnRecord = (USN_RECORD)Marshal.PtrToStructure(pUsnRecord, usnRecord.GetType()); 197 198 // The filename within the USN_RECORD. 199 string FileName = Marshal.PtrToStringUni(new IntPtr(pUsnRecord.ToInt32() + usnRecord.FileNameOffset), usnRecord.FileNameLength / 2); 200 201 bIsFile = !usnRecord.FileAttributes.HasFlag(FileAttributes.Directory); 202 dicFRNLookup.Add(usnRecord.FileReferenceNumber, new FSNode(usnRecord.FileReferenceNumber, usnRecord.ParentFileReferenceNumber, FileName, bIsFile)); 203 204 // Pointer to the next record in the buffer. 205 pUsnRecord = new IntPtr(pUsnRecord.ToInt32() + usnRecord.RecordLength); 206 207 dwRetBytes -= usnRecord.RecordLength; 208 } 209 210 // The first 8 bytes is always the start of the next USN. 211 mft.StartFileReferenceNumber = Marshal.ReadInt64(m_Buffer, 0); 212 213 214 } 215 else 216 { 217 break; // TODO: might not be correct. Was : Exit Do 218 219 } 220 221 } while (!(cb <= 8)); 222 223 // Resolve all paths for Files 224 foreach (FSNode oFSNode in dicFRNLookup.Values.Where(o => o.IsFile)) 225 { 226 string sFullPath = oFSNode.FileName; 227 FSNode oParentFSNode = oFSNode; 228 229 while (dicFRNLookup.TryGetValue(oParentFSNode.ParentFRN, out oParentFSNode)) 230 { 231 sFullPath = string.Concat(oParentFSNode.FileName, @"", sFullPath); 232 } 233 sFullPath = string.Concat(szDriveLetter, @"", sFullPath); 234 235 yield return sFullPath; 236 } 237 } 238 finally 239 { 240 //// cleanup 241 Cleanup(); 242 } 243 } 244 245 public static bool IsAdministrator() 246 { 247 WindowsIdentity identity = WindowsIdentity.GetCurrent(); 248 WindowsPrincipal principal = new WindowsPrincipal(identity); 249 return principal.IsInRole(WindowsBuiltInRole.Administrator); 250 } 251 }
原文还提供了一个扩展方法,方便我们获取某个磁盘下的所有的文件名。
1 public static class DriveInfoExtension 2 { 3 public static IEnumerable<String> EnumerateFiles(this DriveInfo drive) 4 { 5 return (new MFTScanner()).EnumerateFiles(drive.Name); 6 } 7 }
需要注意的是,读取USN Journal是需要管理员权限的,因此使用这个类需要管理员权限才能正常运行。
另外,这个类封装的也略为简单,只读取了文件名,实际上还可以读取文件大小,属性等常用信息,修改一下代码非常容易获取这些属性。通过它们可以非常方便写出一些分析磁盘空间占用的程序,这里就不举例了。