顺序锁与读写自旋锁非常类似,只是赋予了写者较高的优先级:即使在读者正在读的时候也允许写者继续运行。这样的好处是写者不用等待(除非另一个写者正在写),缺点是有些时候读者不得不反复多次读相同的数据直到它获得有效的副本。
每个顺序锁就是包括两个字段的seqlock_t
结构:一个类型为spinlock_t
的lock
字段和一个整型类型的sequence
字段,第二个字段是一个顺序计数器。每个读者都必须在读数据前后两次读顺序计数器,并检查两次读到的值是否相同,如果不相同,说明新的写者已经开始写并增加了顺序计数器,因此暗示读者刚读到的数据是无效的。
以下是Linux-2.6.30版本实现seqlock.h代码:
#ifndef __LINUX_SEQLOCK_H
#define __LINUX_SEQLOCK_H
/*
* Reader/writer consistent mechanism without starving writers. This type of
* lock for data where the reader wants a consistent set of information
* and is willing to retry if the information changes. Readers never
* block but they may have to retry if a writer is in
* progress. Writers do not wait for readers.
*
* This is not as cache friendly as brlock. Also, this will not work
* for data that contains pointers, because any writer could
* invalidate a pointer that a reader was following.
*
* Expected reader usage:
* do {
* seq = read_seqbegin(&foo);
* ...
* } while (read_seqretry(&foo, seq));
*
*
* On non-SMP the spin locks disappear but the writer still needs
* to increment the sequence variables because an interrupt routine could
* change the state of the data.
*
* Based on x86_64 vsyscall gettimeofday
* by Keith Owens and Andrea Arcangeli
*/
#include <linux/spinlock.h>
#include <linux/preempt.h>
typedef struct {
unsigned sequence; // 顺序计数器
spinlock_t lock; //锁
} seqlock_t;
/*
* These macros triggered gcc-3.x compile-time problems. We think these are
* OK now. Be cautious.
*/
#define __SEQLOCK_UNLOCKED(lockname)
{ 0, __SPIN_LOCK_UNLOCKED(lockname) }
#define SEQLOCK_UNLOCKED
__SEQLOCK_UNLOCKED(old_style_seqlock_init)
// 顺序锁初始化
#define seqlock_init(x)
do {
(x)->sequence = 0;
spin_lock_init(&(x)->lock);
} while (0)
#define DEFINE_SEQLOCK(x)
seqlock_t x = __SEQLOCK_UNLOCKED(x)
/* Lock out other writers and update the count.
* Acts like a normal spin_lock/unlock.
* Don't need preempt_disable() because that is in the spin_lock already.
*/
static inline void write_seqlock(seqlock_t *sl) //写加锁
{
spin_lock(&sl->lock); //利用自旋锁加锁
++sl->sequence; //增加顺序计数器,值总为奇数,表明写者正在写
smp_wmb();
}
static inline void write_sequnlock(seqlock_t *sl) //写解锁
{
smp_wmb();
sl->sequence++; //递增顺序计数器,写结束时再递增一次,值总为偶数,表明写者结束写操作了。
spin_unlock(&sl->lock);
}
static inline int write_tryseqlock(seqlock_t *sl)
{
int ret = spin_trylock(&sl->lock);
if (ret) {
++sl->sequence;
smp_wmb();
}
return ret;
}
/* Start of read calculation -- fetch last complete writer token */
static __always_inline unsigned read_seqbegin(const seqlock_t *sl) //开始读
{
unsigned ret;
repeat:
ret = sl->sequence;
smp_rmb();
// 奇偶数判断,如果是奇数,说明写者还没有完成写操作,读者需要重复读取顺序计数器直到值为偶数,说明写者操作完成了。
if (unlikely(ret & 1)) {
cpu_relax();
goto repeat;
}
return ret;
}
/*
* Test if reader processed invalid data.
*
* If sequence value changed then writer changed data while in section.
*/
static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start) // 再次读取顺序计数器,与之前的做对比,检测是否有写者修改
{
smp_rmb();
return (sl->sequence != start);
}
/*
* Version using sequence counter only.
* This can be used when code has its own mutex protecting the
* updating starting before the write_seqcountbeqin() and ending
* after the write_seqcount_end().
*/
// 顺序锁简化版本,如果外部代码实现了锁保证
typedef struct seqcount {
unsigned sequence;
} seqcount_t;
#define SEQCNT_ZERO { 0 }
#define seqcount_init(x) do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0)
/* Start of read using pointer to a sequence counter only. */
static inline unsigned read_seqcount_begin(const seqcount_t *s)
{
unsigned ret;
repeat:
ret = s->sequence;
smp_rmb();
if (unlikely(ret & 1)) {
cpu_relax();
goto repeat;
}
return ret;
}
/*
* Test if reader processed invalid data because sequence number has changed.
*/
static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
{
smp_rmb();
return s->sequence != start;
}
/*
* Sequence counter only version assumes that callers are using their
* own mutexing.
*/
static inline void write_seqcount_begin(seqcount_t *s)
{
s->sequence++;
smp_wmb();
}
static inline void write_seqcount_end(seqcount_t *s)
{
smp_wmb();
s->sequence++;
}
/*
* Possible sw/hw IRQ protected versions of the interfaces.
*/
#define write_seqlock_irqsave(lock, flags)
do { local_irq_save(flags); write_seqlock(lock); } while (0)
#define write_seqlock_irq(lock)
do { local_irq_disable(); write_seqlock(lock); } while (0)
#define write_seqlock_bh(lock)
do { local_bh_disable(); write_seqlock(lock); } while (0)
#define write_sequnlock_irqrestore(lock, flags)
do { write_sequnlock(lock); local_irq_restore(flags); } while(0)
#define write_sequnlock_irq(lock)
do { write_sequnlock(lock); local_irq_enable(); } while(0)
#define write_sequnlock_bh(lock)
do { write_sequnlock(lock); local_bh_enable(); } while(0)
#define read_seqbegin_irqsave(lock, flags)
({ local_irq_save(flags); read_seqbegin(lock); })
#define read_seqretry_irqrestore(lock, iv, flags)
({
int ret = read_seqretry(lock, iv);
local_irq_restore(flags);
ret;
})
#endif /* __LINUX_SEQLOCK_H */
用法示例:
/*
写者:
write_seqlock(seq);
修改keyvalue
write_sequnlock(seq);
读者:
do {
seq = read_seqbegin(&foo);
读取keyvalue
} while (read_seqretry(&foo, seq));
*/
可关注微信公众号:让我思考一下,与我交流互鉴。