• PostgreSQL 在何处真正开始写数据


    基本关系是:

    BackgroundWriterMain 循环中,调用  BgBufferSync()  -->SyncOneBuffer -->FlushBuffer -->smgrwrite

    看代码:

    /*                    
     * Main entry point for bgwriter process                    
     *                    
     * This is invoked from AuxiliaryProcessMain, which has already created the                    
     * basic execution environment, but not enabled signals yet.                    
     */                    
    void                    
    BackgroundWriterMain(void)                    
    {                    
        ……                
        /*                
         * Loop forever                
         */                
        for (;;)                
        {                
            ……            
                        
            /*            
             * Do one cycle of dirty-buffer writing.            
             */            
            can_hibernate = BgBufferSync();            
            ……            
        }                
    }                    

    再看:

    /*                            
     * BgBufferSync -- Write out some dirty buffers in the pool.                            
     *                            
     * This is called periodically by the background writer process.                            
     *                            
     * Returns true if it's appropriate for the bgwriter process to go into                            
     * low-power hibernation mode.    (This happens if the strategy clock sweep                        
     * has been "lapped" and no buffer allocations have occurred recently,                            
     * or if the bgwriter has been effectively disabled by setting                            
     * bgwriter_lru_maxpages to 0.)                            
     */                            
    bool                            
    BgBufferSync(void)                            
    {                            
        ……                        
        /* Execute the LRU scan */                        
        while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)                        
        {                        
            int    buffer_state = SyncOneBuffer(next_to_clean, true);                
                                
            if (++next_to_clean >= NBuffers)                    
            {                    
                next_to_clean = 0;                
                next_passes++;                
            }                    
            num_to_scan--;                    
                                
            if (buffer_state & BUF_WRITTEN)                    
            {                    
                reusable_buffers++;                
                if (++num_written >= bgwriter_lru_maxpages)                
                {                
                    BgWriterStats.m_maxwritten_clean++;            
                    break;            
                }                
            }                    
            else if (buffer_state & BUF_REUSABLE)                    
                reusable_buffers++;                
        }                        
        ……                        
    }                            

    再看:

    /*                        
     * SyncOneBuffer -- process a single buffer during syncing.                        
     *                        
     * If skip_recently_used is true, we don't write currently-pinned buffers, nor                        
     * buffers marked recently used, as these are not replacement candidates.                        
     *                        
     * Returns a bitmask containing the following flag bits:                        
     *    BUF_WRITTEN: we wrote the buffer.                    
     *    BUF_REUSABLE: buffer is available for replacement, ie, it has                    
     *        pin count 0 and usage count 0.                
     *                        
     * (BUF_WRITTEN could be set in error if FlushBuffers finds the buffer clean                        
     * after locking it, but we don't care all that much.)                        
     *                        
     * Note: caller must have done ResourceOwnerEnlargeBuffers.                        
     */                        
    static int                        
    SyncOneBuffer(int buf_id, bool skip_recently_used)                        
    {                        
        volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];                    
        int            result = 0;        
                            
        /*                    
         * Check whether buffer needs writing.                    
         *                    
         * We can make this check without taking the buffer content lock so long                    
         * as we mark pages dirty in access methods *before* logging changes with                    
         * XLogInsert(): if someone marks the buffer dirty just after our check we                    
         * don't worry because our checkpoint.redo points before log record for                    
         * upcoming changes and so we are not required to write such dirty buffer.                    
         */                    
        LockBufHdr(bufHdr);                    
                            
        if (bufHdr->refcount == 0 && bufHdr->usage_count == 0)                    
            result |= BUF_REUSABLE;                
        else if (skip_recently_used)                    
        {                    
            /* Caller told us not to write recently-used buffers */                
            UnlockBufHdr(bufHdr);                
            return result;                
        }                    
                            
        if (!(bufHdr->flags & BM_VALID) || !(bufHdr->flags & BM_DIRTY))                    
        {                    
            /* It's clean, so nothing to do */                
            UnlockBufHdr(bufHdr);                
            return result;                
        }                    
                            
        /*                    
         * Pin it, share-lock it, write it.  (FlushBuffer will do nothing if the                    
         * buffer is clean by the time we've locked it.)                    
         */                    
        PinBuffer_Locked(bufHdr);                    
        LWLockAcquire(bufHdr->content_lock, LW_SHARED);                    
                            
        FlushBuffer(bufHdr, NULL);                    
                            
        LWLockRelease(bufHdr->content_lock);                    
        UnpinBuffer(bufHdr, true);                    
                            
        return result | BUF_WRITTEN;                    
    }                        

    再看:

    /*                        
     * FlushBuffer                        
     *        Physically write out a shared buffer.                
     *                        
     * NOTE: this actually just passes the buffer contents to the kernel; the                        
     * real write to disk won't happen until the kernel feels like it.  This                        
     * is okay from our point of view since we can redo the changes from WAL.                        
     * However, we will need to force the changes to disk via fsync before                        
     * we can checkpoint WAL.                        
     *                        
     * The caller must hold a pin on the buffer and have share-locked the                        
     * buffer contents.  (Note: a share-lock does not prevent updates of                        
     * hint bits in the buffer, so the page could change while the write                        
     * is in progress, but we assume that that will not invalidate the data                        
     * written.)                        
     *                        
     * If the caller has an smgr reference for the buffer's relation, pass it                        
     * as the second parameter.  If not, pass NULL.  In the latter case, the                        
     * relation will be marked as "transient" so that the corresponding                        
     * kernel-level file descriptors are closed when the current transaction ends,                        
     * if any.                        
     */                        
    static void                        
    FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)                        
    {                        
        XLogRecPtr    recptr;                
        ErrorContextCallback errcontext;                    
        instr_time    io_start,                
                    io_time;        
                            
        /*                    
         * Acquire the buffer's io_in_progress lock.  If StartBufferIO returns                    
         * false, then someone else flushed the buffer before we could, so we need                    
         * not do anything.                    
         */                    
        if (!StartBufferIO(buf, false))                    
            return;                
                            
        /* Setup error traceback support for ereport() */                    
        errcontext.callback = shared_buffer_write_error_callback;                    
        errcontext.arg = (void *) buf;                    
        errcontext.previous = error_context_stack;                    
        error_context_stack = &errcontext;                    
                            
        /* Find smgr relation for buffer, and mark it as transient */                    
        if (reln == NULL)                    
        {                    
            reln = smgropen(buf->tag.rnode, InvalidBackendId);                
            smgrsettransient(reln);                
        }                    
                            
        TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum,                    
                        buf->tag.blockNum,    
                        reln->smgr_rnode.node.spcNode,    
                        reln->smgr_rnode.node.dbNode,    
                        reln->smgr_rnode.node.relNode);    
                            
        /*                    
         * Force XLOG flush up to buffer's LSN.  This implements the basic WAL                    
         * rule that log updates must hit disk before any of the data-file changes                    
         * they describe do.                    
         */                    
        recptr = BufferGetLSN(buf);                    
        XLogFlush(recptr);                    
                            
        /*                    
         * Now it's safe to write buffer to disk. Note that no one else should                    
         * have been able to write it while we were busy with log flushing because                    
         * we have the io_in_progress lock.                    
         */                    
                            
        /* To check if block content changes while flushing. - vadim 01/17/97 */                    
        LockBufHdr(buf);                    
        buf->flags &= ~BM_JUST_DIRTIED;                    
        UnlockBufHdr(buf);                    
                            
        if (track_io_timing)                    
            INSTR_TIME_SET_CURRENT(io_start);                
                            
        smgrwrite(reln,                    
                  buf->tag.forkNum,            
                  buf->tag.blockNum,            
                  (char *) BufHdrGetBlock(buf),            
                  false);            
                            
        if (track_io_timing)                    
        {                    
            INSTR_TIME_SET_CURRENT(io_time);                
            INSTR_TIME_SUBTRACT(io_time, io_start);                
            pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));                
            INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);                
        }                    
                            
        pgBufferUsage.shared_blks_written++;                    
                            
        /*                    
         * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and                    
         * end the io_in_progress state.                    
         */                    
        TerminateBufferIO(buf, true, 0);                    
                            
        TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(buf->tag.forkNum,                    
                               buf->tag.blockNum,
                               reln->smgr_rnode.node.spcNode,
                               reln->smgr_rnode.node.dbNode,
                               reln->smgr_rnode.node.relNode);
                            
        /* Pop the error context stack */                    
        error_context_stack = errcontext.previous;                    
    }                        

    循环里面一次写一个 buffer哇, 怪异否? 也许是有一点就写一点,设计者是故意的?

  • 相关阅读:
    使用xtrabackup对MySQL进行备份和恢复
    魔棒工具RegionGrow算法简介
    编程之美扫雷篇
    从繁体字到书法
    谈读书如何才能提升你的工作能力
    魔兽争霸拼图照片一张
    做你心目中的达文西
    六一儿童节的礼物那些游戏中你不知道的玩法
    DIY手工制作Rhombicuboctahedron
    ul样式与jquery1.4.1冲突
  • 原文地址:https://www.cnblogs.com/gaojian/p/2737470.html
Copyright © 2020-2023  润新知