很多TX的硬盘上有很多很多很多的各种图片,这其中不知有多少重复的,浪费了空间不说,还减少了其他MM图片的数量,哈哈。
在ACDSEE9中有一个查找重复图片的功能,但是删除是要一个一个确认,太麻烦,就不揣简陋,花半个多小时搞了个简单的控制台删除重复文件工具。
也许对某些TX可以派点用场 。
比较方法:
1、如果文件长度不同,视作不同
2、如果长度相同,则比较其MD5值,如果MD5不同,视作不同文件
主函数,用于指定待扫描的文件夹:
static void ScanDupFiles ( )
{
Stopwatch sw = new Stopwatch( );
sw.Start( );
_list.Append( @"C:\图片文件夹", true );
sw.Stop( );
Console.WriteLine( "累计耗时 {0:###,##0} 毫秒,扫描了 {1} 个文件,总长度{2} M!", sw.ElapsedMilliseconds, _list.FileCount, _list.FileTotalLength / ( 1024 * 1024 ) );
Console.WriteLine( "下面开始按文件尺寸分类" );
Dictionary<long, List<FileInfoEntry>> _listGroupbySize;
_listGroupbySize = _list.CreateDupLengthFileGroup( );
Console.WriteLine( "累计耗时 {0:###,##0} 毫秒,共计有 {1} 种长度的文件!", sw.ElapsedMilliseconds, _listGroupbySize.Count );
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Console.WriteLine( "针对重复尺寸文件,计算其MD5值,用于下一步判断是否重复的依据,Press any key ..." );
Console.ReadLine( );
foreach ( KeyValuePair<long, List<FileInfoEntry>> pair in _listGroupbySize ) {
if ( pair.Value.Count > 1 ) { // 发现有相等长度的文件,计算它们的MD5,并比较
Console.WriteLine( "\t发现{0}个长度为{1:#,##0}的重复文件,下面计算其 MD5 ", pair.Value.Count, pair.Key );
foreach ( FileInfoEntry fi in pair.Value ) {
fi.CreateMD5( );
Console.WriteLine( "\t\t{0}\t{1}", fi.MD5, fi.FileName );
}
}
}
Dictionary<string, List<FileInfoEntry>> _dupFiles;
_dupFiles = _list.CreateDupMd5FileGroup( );
Console.WriteLine( "累计耗时 {0:###,##0} 毫秒,共计有 {1} 个不同的MD5文件清单!", sw.ElapsedMilliseconds, _dupFiles.Count );
// 删除重复文件(保留List<>中的第一个文件)
foreach ( KeyValuePair<string, List<FileInfoEntry>> pair in _dupFiles ) {
if ( pair.Value.Count > 1 ) {
for ( int i = 1; i < pair.Value.Count; i++ ) {
File.Delete( pair.Value[ i ].FullPath );
}
}
}
Console.ReadLine( );
}
关键的2个类,其一:文件信息类。用于登记文件信息,包括名称、长度,还有一个MD5摘要值
public class FileInfoEntry
{
#region 成员变量
private string _FileName; // 文件名称
private string _FullPath; // 文件路径全称
private string _FileType; // 文件类型(默认为扩展名,例如“JPG”"BMP")
private long _FileLength; // 文件大小
private string _MD5; // 计算出的MD5值(如果需要)
#endregion
#region 属性实现代码块
public string FileName
{
get { return _FileName; }
}
public string FullPath
{
get { return _FullPath; }
}
public string FileType
{
get { return _FileType; }
}
public long FileLength
{
get { return _FileLength; }
}
public string MD5
{
get { return _MD5; }
}
#endregion 属性实现代码块
public FileInfoEntry ( FileInfo fi )
{
_FileName = fi.FullName;
_FullPath = fi.FullName;
_FileLength = fi.Length;
_FileType = fi.Extension;
_MD5 = null;
int i;
if ( ( i = _FullPath.LastIndexOf( "\\" ) ) > 0 ) {
_FileName = _FullPath.Substring( i++ );
}
}
public FileInfoEntry ( string fileFullPath, long fileLength )
{
_FileName = fileFullPath;
_FullPath = fileFullPath;
_FileLength = fileLength;
_FileType = null;
_MD5 = null;
int i;
if ( ( i = _FullPath.LastIndexOf( "\\" ) ) > 0 ) {
_FileName = _FullPath.Substring( i++ );
}
}
/// <summary>
/// 对当前文件创建 MD5
/// </summary>
public void CreateMD5 ( )
{
Stream sr = new FileStream( _FullPath, FileMode.Open, FileAccess.Read, FileShare.Read );
MD5 md5 = new MD5CryptoServiceProvider( );
byte[] buf = md5.ComputeHash( sr );
StringBuilder sb = new StringBuilder( );
for ( int i = 0; i < buf.Length; i++ )
sb.AppendFormat( "{0:X2}", buf[ i ] );
_MD5 = sb.ToString( );
sr.Close( );
}
}
其二:FileInfoEntryList
用于文件信息项的容器。
当用于文件的比较,当长度相同时,就计算MD5然后比较MD5
public class FileInfoEntryList
{
private List<FileInfoEntry> _FileLists;
public List<FileInfoEntry> FileLists
{
get { return ( _FileLists ); }
}
public FileInfoEntry this[ int index ]
{
get
{
if ( index >= 0 && index < _FileLists.Count )
return ( _FileLists[ index ] );
else
return ( null );
}
}
public int FileCount
{
get { return ( _FileLists.Count ); }
}
public long FileTotalLength
{
get
{
long lng = 0;
foreach ( FileInfoEntry fi in _FileLists ) {
lng += fi.FileLength;
}
return ( lng );
}
}
public FileInfoEntryList ( )
{
_FileLists = new List<FileInfoEntry>( );
}
/// <summary>
/// 将指定文件夹下的文件加入管理清单
/// 注意:根据 includeSubFolder 决定是否包括子文件夹
/// </summary>
public void Append ( string folderName, bool includeSubFolder )
{
string[] files = Directory.GetFiles( folderName );
Console.WriteLine( "Sanning {0} files in {1} ...", files.Length, folderName );
for ( int i = 0; i < files.Length; i++ ) {
FileInfoEntry fi = new FileInfoEntry( new FileInfo( files[ i ] ) );
_FileLists.Add( fi );
}
if ( includeSubFolder ) {
string[] folders = Directory.GetDirectories( folderName );
for ( int i = 0; i < folders.Length; i++ ) {
Append( folders[ i ], includeSubFolder );
}
}
}
/// <summary>
/// 以文件的大小为Key,建立(重复长度文件组)
/// </summary>
public Dictionary<long, List<FileInfoEntry>> CreateDupLengthFileGroup ( )
{
Dictionary<long, List<FileInfoEntry>> result = new Dictionary<long, List<FileInfoEntry>>( );
foreach ( FileInfoEntry fi in _FileLists ) {
long length = fi.FileLength;
List<FileInfoEntry> lst = null;
if ( result.ContainsKey( length ) ) {
lst = result[ length ];
lst.Add( fi );
} else {
lst = new List<FileInfoEntry>( );
lst.Add( fi );
result.Add( length, lst );
}
}
return ( result );
}
/// <summary>
/// 以文件的MD5为Key,建立(重复md5值文件组)
/// </summary>
public Dictionary<string, List<FileInfoEntry>> CreateDupMd5FileGroup ( )
{
Dictionary<string, List<FileInfoEntry>> result = new Dictionary<string, List<FileInfoEntry>>( );
foreach ( FileInfoEntry fi in _FileLists ) {
string key = fi.MD5;
if ( !string.IsNullOrEmpty( key ) ) {
List<FileInfoEntry> lst = null;
if ( result.ContainsKey( key ) ) {
lst = result[ key ];
lst.Add( fi );
} else {
lst = new List<FileInfoEntry>( );
lst.Add( fi );
result.Add( key, lst );
}
}
}
return ( result );
}
}
对于有水印的相同文件就无能为力了,欢迎大家批评指正