TimeCacheMap是Twitter Storm里面一个类, Storm使用它来保存那些最近活跃的对象,并且可以自动删除那些已经过期的对象。
不过在storm0.8之后TimeCacheMap被弃用了,取而代之的是RotatingMap。
RotatingMap与TimeCacheMap的区别如下:
- 1.前者去掉了自动清理的线程,让用户自己去控制清理过期的数据,控制清理数据用rotate()方法,就是去尾加新头。
- 2.前者get,put等方法都不加锁了,需要用户自己控制锁
总之就是提供了更大的自由度,让开发者去控制这个数据结构!下面先具体分析TimeCacheMap,而后RotatingMap就一目了然了
我直接在源码中,加上中文的注释分析源码TimeCacheMap
package backtype.storm.utils; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.Map; import java.util.Map.Entry; import backtype.storm.utils.Time; /** * Expires keys that have not been updated in the configured number of seconds. * The algorithm used will take between expirationSecs and * expirationSecs * (1 + 1 / (numBuckets-1)) to actually expire the message. * * get, put, remove, containsKey, and size take O(numBuckets) time to run. * * The advantage of this design is that the expiration thread only locks the object * for O(1) time, meaning the object is essentially always available for gets/puts. */ /** *如果在配置的时间内没有更新数据,这个数据就会被删 *expirationSecs * (1 + 1 / (numBuckets-1))解释: * *假设_cleaner线程刚刚清理数据,put函数调用发生将key放入桶中,那么一条数据的超时时间为: *expirationSecs / (numBuckets-1) * numBuckets = expirationSecs * (1 + 1 / (numBuckets-1)) *然而,假设put函数调用刚刚执行结束,_cleaner线程就开始清理数据,那么一条数据的超时时间为: *expirationSecs / (numBuckets-1) * numBuckets - expirationSecs / (numBuckets-1) = expirationSecs * *这个数据结构最大的好处是:数据分成多个桶,锁的粒度小,只要O(1)的复杂度就可以删掉过期数据。因此,大部分时间都可以进行get和put操作 */ //deprecated in favor of non-threaded RotatingMap //虽然在storm0.8之后TimeCacheMap被弃用了,不过其设计还是很独到的,值得一探究竟 @Deprecated public class TimeCacheMap<K, V> { //this default ensures things expire at most 50% past the expiration time private static final int DEFAULT_NUM_BUCKETS = 3; //回调函数实现这个接口就可以,至少可以把删掉的元素传回去 public static interface ExpiredCallback<K, V> { public void expire(K key, V val); } //把数据分成多个桶,用链表是因为在头尾的增减操作时O(1) private LinkedList<HashMap<K, V>> _buckets; private final Object _lock = new Object(); private Thread _cleaner; private ExpiredCallback _callback; public TimeCacheMap(int expirationSecs, int numBuckets, ExpiredCallback<K, V> callback) { if(numBuckets<2) { throw new IllegalArgumentException("numBuckets must be >= 2"); } //构造函数中,按照桶的数量,初始桶 _buckets = new LinkedList<HashMap<K, V>>(); for(int i=0; i<numBuckets; i++) { _buckets.add(new HashMap<K, V>()); } _callback = callback; final long expirationMillis = expirationSecs * 1000L; final long sleepTime = expirationMillis / (numBuckets-1); _cleaner = new Thread(new Runnable() { public void run() { try { while(true) { Map<K, V> dead = null; Time.sleep(sleepTime); synchronized(_lock) { //删掉最后一个桶,在头补充一个新的桶,最后一个桶的数据是最旧的 dead = _buckets.removeLast(); _buckets.addFirst(new HashMap<K, V>()); } if(_callback!=null) { for(Entry<K, V> entry: dead.entrySet()) { _callback.expire(entry.getKey(), entry.getValue()); } } } } catch (InterruptedException ex) { } } }); //作为守护线程运行,一旦主线程不在,这个线程自动结束 _cleaner.setDaemon(true); _cleaner.start(); } public TimeCacheMap(int expirationSecs, ExpiredCallback<K, V> callback) { this(expirationSecs, DEFAULT_NUM_BUCKETS, callback); } public TimeCacheMap(int expirationSecs) { this(expirationSecs, DEFAULT_NUM_BUCKETS); } public TimeCacheMap(int expirationSecs, int numBuckets) { this(expirationSecs, numBuckets, null); } public boolean containsKey(K key) { synchronized(_lock) { for(HashMap<K, V> bucket: _buckets) { if(bucket.containsKey(key)) { return true; } } return false; } } public V get(K key) { synchronized(_lock) { for(HashMap<K, V> bucket: _buckets) { if(bucket.containsKey(key)) { return bucket.get(key); } } return null; } } public void put(K key, V value) { synchronized(_lock) { Iterator<HashMap<K, V>> it = _buckets.iterator(); HashMap<K, V> bucket = it.next(); //在第一个桶上更新数据 bucket.put(key, value); //去掉后面桶的数据 while(it.hasNext()) { bucket = it.next(); bucket.remove(key); } } } public Object remove(K key) { synchronized(_lock) { for(HashMap<K, V> bucket: _buckets) { if(bucket.containsKey(key)) { return bucket.remove(key); } } return null; } } public int size() { synchronized(_lock) { int size = 0; for(HashMap<K, V> bucket: _buckets) { size+=bucket.size(); } return size; } }
//这个方法也太迷惑人了,作用就是把清理线程杀掉,这样数据就不会过期了,应该改名叫neverCleanup public void cleanup() { //中断清理线程中的sleep,_cleaner线程会抛出异常,然后_cleaner线程就死了,不再清理过期数据了 _cleaner.interrupt(); //调用了interrupt后,再跑sleep就会抛InterruptedException异常
} }
RotatingMap源码几乎和TimeCacheMap一样,就是去掉清理线程去掉锁,加了一个rotate()方法开发者自己清理过期数据
package backtype.storm.utils; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.Map; import java.util.Map.Entry; /** * Expires keys that have not been updated in the configured number of seconds. * The algorithm used will take between expirationSecs and * expirationSecs * (1 + 1 / (numBuckets-1)) to actually expire the message. * * get, put, remove, containsKey, and size take O(numBuckets) time to run. * * The advantage of this design is that the expiration thread only locks the object * for O(1) time, meaning the object is essentially always available for gets/puts. */ public class RotatingMap<K, V> { //this default ensures things expire at most 50% past the expiration time private static final int DEFAULT_NUM_BUCKETS = 3; public static interface ExpiredCallback<K, V> { public void expire(K key, V val); } private LinkedList<HashMap<K, V>> _buckets; private ExpiredCallback _callback; public RotatingMap(int numBuckets, ExpiredCallback<K, V> callback) { if(numBuckets<2) { throw new IllegalArgumentException("numBuckets must be >= 2"); } _buckets = new LinkedList<HashMap<K, V>>(); for(int i=0; i<numBuckets; i++) { _buckets.add(new HashMap<K, V>()); } _callback = callback; } public RotatingMap(ExpiredCallback<K, V> callback) { this(DEFAULT_NUM_BUCKETS, callback); } public RotatingMap(int numBuckets) { this(numBuckets, null); } public Map<K, V> rotate() { Map<K, V> dead = _buckets.removeLast(); _buckets.addFirst(new HashMap<K, V>()); if(_callback!=null) { for(Entry<K, V> entry: dead.entrySet()) { _callback.expire(entry.getKey(), entry.getValue()); } } return dead; } public boolean containsKey(K key) { for(HashMap<K, V> bucket: _buckets) { if(bucket.containsKey(key)) { return true; } } return false; } public V get(K key) { for(HashMap<K, V> bucket: _buckets) { if(bucket.containsKey(key)) { return bucket.get(key); } } return null; } public void put(K key, V value) { Iterator<HashMap<K, V>> it = _buckets.iterator(); HashMap<K, V> bucket = it.next(); bucket.put(key, value); while(it.hasNext()) { bucket = it.next(); bucket.remove(key); } } public Object remove(K key) { for(HashMap<K, V> bucket: _buckets) { if(bucket.containsKey(key)) { return bucket.remove(key); } } return null; } public int size() { int size = 0; for(HashMap<K, V> bucket: _buckets) { size+=bucket.size(); } return size; } }