在之前的blog中,曾经写到过关于搜索本地文件的技术文章
如:
下面说说python中关于线程来搜索本地文件
利用多个线程处理搜索的问题,我们可以发现他很快....
========================================================
下面是代码部分:
========================================================
1 # A parallelized "find(1)" using the thread module. 2 3 # This demonstrates the use of a work queue and worker threads. 4 # It really does do more stats/sec when using multiple threads, 5 # although the improvement is only about 20-30 percent. 6 # (That was 8 years ago. In 2002, on Linux, I can't measure 7 # a speedup. :-( ) 8 9 # I'm too lazy to write a command line parser for the full find(1) 10 # command line syntax, so the predicate it searches for is wired-in, 11 # see function selector() below. (It currently searches for files with 12 # world write permission.) 13 14 # Usage: parfind.py [-w nworkers] [directory] ... 15 # Default nworkers is 4 16 17 18 import sys 19 import getopt 20 import time 21 import os 22 from stat import * 23 import _thread as thread 24 25 26 # Work queue class. Usage: 27 # wq = WorkQ() 28 # wq.addwork(func, (arg1, arg2, ...)) # one or more calls 29 # wq.run(nworkers) 30 # The work is done when wq.run() completes. 31 # The function calls executed by the workers may add more work. 32 # Don't use keyboard interrupts! 33 34 class WorkQ: 35 36 # Invariants: 37 38 # - busy and work are only modified when mutex is locked 39 # - len(work) is the number of jobs ready to be taken 40 # - busy is the number of jobs being done 41 # - todo is locked iff there is no work and somebody is busy 42 43 def __init__(self): 44 self.mutex = thread.allocate() 45 self.todo = thread.allocate() 46 self.todo.acquire() 47 self.work = [] 48 self.busy = 0 49 50 def addwork(self, func, args): 51 job = (func, args) 52 self.mutex.acquire() 53 self.work.append(job) 54 self.mutex.release() 55 if len(self.work) == 1: 56 self.todo.release() 57 58 def _getwork(self): 59 self.todo.acquire() 60 self.mutex.acquire() 61 if self.busy == 0 and len(self.work) == 0: 62 self.mutex.release() 63 self.todo.release() 64 return None 65 job = self.work[0] 66 del self.work[0] 67 self.busy = self.busy + 1 68 self.mutex.release() 69 if len(self.work) > 0: 70 self.todo.release() 71 return job 72 73 def _donework(self): 74 self.mutex.acquire() 75 self.busy = self.busy - 1 76 if self.busy == 0 and len(self.work) == 0: 77 self.todo.release() 78 self.mutex.release() 79 80 def _worker(self): 81 time.sleep(0.00001) # Let other threads run 82 while 1: 83 job = self._getwork() 84 if not job: 85 break 86 func, args = job 87 func(*args) 88 self._donework() 89 90 def run(self, nworkers): 91 if not self.work: 92 return # Nothing to do 93 for i in range(nworkers-1): 94 thread.start_new(self._worker, ()) 95 self._worker() 96 self.todo.acquire() 97 98 99 # Main program 100 101 def main(): 102 nworkers = 4 103 #print(getopt.getopt(sys.argv[1:], '-w:')) 104 opts, args = getopt.getopt(sys.argv[1:], '-w:') 105 for opt, arg in opts: 106 if opt == '-w': 107 nworkers = int(arg) 108 if not args: 109 #print(os.curdir) 110 args = [os.curdir] 111 112 wq = WorkQ() 113 for dir in args: 114 wq.addwork(find, (dir, selector, wq)) 115 116 t1 = time.time() 117 wq.run(nworkers) 118 t2 = time.time() 119 120 sys.stderr.write('Total time %r sec. ' % (t2-t1)) 121 122 123 # The predicate -- defines what files we look for. 124 # Feel free to change this to suit your purpose 125 126 def selector(dir, name, fullname, stat): 127 # Look for world writable files that are not symlinks 128 return (stat[ST_MODE] & 0o002) != 0 and not S_ISLNK(stat[ST_MODE]) 129 130 131 # The find procedure -- calls wq.addwork() for subdirectories 132 133 def find(dir, pred, wq): 134 try: 135 names = os.listdir(dir) 136 except os.error as msg: 137 print(repr(dir), ':', msg) 138 return 139 for name in names: 140 if name not in (os.curdir, os.pardir): 141 fullname = os.path.join(dir, name) 142 try: 143 stat = os.lstat(fullname) 144 except os.error as msg: 145 print(repr(fullname), ':', msg) 146 continue 147 if pred(dir, name, fullname, stat): 148 print(fullname) 149 if S_ISDIR(stat[ST_MODE]): 150 if not os.path.ismount(fullname): 151 wq.addwork(find, (fullname, pred, wq)) 152 153 154 # Call the main program 155 156 main()
更多信息:http://www.oschina.net/code/explore/Python-3.1.3/Demo/threads/find.py
========================================================
More reading,and english is important.
I'm Hongten
大哥哥大姐姐,觉得有用打赏点哦!多多少少没关系,一分也是对我的支持和鼓励。谢谢。
Hongten博客排名在100名以内。粉丝过千。
Hongten出品,必是精品。
E | hongtenzone@foxmail.com B | http://www.cnblogs.com/hongten
========================================================