• 如何寻找bug(6)


    继续在第117~170行之间添加DEBUG_TEXT. 出错范围缩小到133~165行。

    133    DEBUG_TEXT(DFDB_ROSCORE, 8, "FragmentRequest::execute calling dc->getFragment for L1Id "<< m_level1Id<< std::endl);//已被打印
    134    for (std::vector<DataChannel*>::iterator dc=chanStartIter; dc!=chanEndIter; dc++) {
    135       DEBUG_TEXT(DFDB_ROSCORE, 20, "FragmentRequest::execute calling dc->getFragment for L1Id "<< m_level1Id<< std::endl);
    136       EventFragment* subFragment = ((*dc)->getFragment(m_ticket[index])) ;
    137       if (subFragment != 0) {
    138          partsReceived++;
    139          DEBUG_TEXT(DFDB_ROSCORE, 20, "FragmentRequest::execute calling builder->appendFragment for L1Id "<< m_level1Id<< std::endl    );
    140          m_builder->appendFragment(m_eventFragment,subFragment);
    141          TS_RECORD(TS_H1,2350);
    142 
    143          fragmentOk=subFragment->fragmentReady();
    144 
    145          s_mutex->lock() ;
    146          delete subFragment;
    147          s_mutex->unlock() ;
    148       }
    149       else {
    150          DEBUG_TEXT(DFDB_ROSCORE, 8, "FragmentRequest for L1Id "<< m_level1Id << " missing data aborting
    ");
    151          fragmentOk=false;
    152          TS_RECORD(TS_H1,2360);
    153       }
    154 
    155       index++;
    156    }
    157 
    158 
    159    bool retired=false;
    160    if (!fragmentOk) {
    161      retired=checkAge(s_maxAge);
    162    }
    163 
    164 
    165    DEBUG_TEXT(DFDB_ROSCORE, 8, "FragmentRequest::execute L1Id:"<< m_level1Id<< ",fragmentOk="<< fragmentOk<<",retired="<<retired<<"    ,partsReceived="<<partsReceived<< std::endl);//未被打印

    先在log文件找到requestOk:-1,确定是196635 出现问题。

    [lhaaso@cmm03node01 part_dk_ef]$ grep "FragmentRequest::execute calling dc->getFragment for L1Id 196635" ROS-Eth-00_cmm03node01_1487253328.out
    Debug(13,140405022635776): FragmentRequest::execute calling dc->getFragment for L1Id 196635
    [lhaaso@cmm03node01 part_dk_ef]$ grep "Debug(13,140405022635776): FragmentRequest::execute L1Id:196635" ROS-Eth-00_cmm03node01_1487253328.out
    [lhaaso@cmm03node01 part_dk_ef]$ 
    

    继续加打印缩小范围,定位到是在第136行出现的问题:

    136       EventFragment* subFragment = ((*dc)->getFragment(m_ticket[index])) ;

    PCMemoryDataChannel.cpp 里的PCMemoryDataChannel::getFragment()第200行有打印出来,猜测错误应该是出现在try块里。继续缩小范围,定位到第203行出现问题。
    198     if((intptr_t)ed == EventInputManager::EIM_MAYCOME || (intptr_t)ed == EventInputManager::EIM_NEVERTOCOME)
    199     {
    200     DEBUG_TEXT(DFDB_ROSFM, 8, "EIM_MAYCOME || EIM_NEVERTOCOME " << ticket << std::endl);
    201       try
    202       {
    203   fragment = new ROBFragment(m_memoryPool, ticket, m_sourceIdentifier, 0); //create an empty ROB fragment //出现问题的代码
    204   Buffer *mem_buffer = fragment->buffer();                                 //get the Buffer of the ROB fragment
    205   Buffer::page_iterator mem_page_i = mem_buffer->begin();
    206   MemoryPage *mem_page = const_cast<MemoryPage *>(*mem_page_i);            //get the memory page of the buffer
    207   mem_page->lock();
    208 
    209   evDesc_t *ed = m_eventInputManager->getEventDescriptor(mem_page);        //get a pointer to the event descriptor
    210   ed->L1id = ticket;                                                       //set the L1ID
    211   m_eventInputManager->createEvent(ed);                                    //Insert the event into the Event Input Manager
    212 
    213   if ((intptr_t)ed == EventInputManager::EIM_MAYCOME)
    214   {
    215     m_statistics->fragmentsMissed++;
    216     fragment->setStatus(EventFragment::STATUS_MAYCOME);
    217     DEBUG_TEXT(DFDB_ROSFM, 10, "PCMemoryDataChannel::getFragment: Fragment for L1ID " << ticket << " has not yet arrived");
    218   }
    219 
    220   if ((intptr_t)ed == EventInputManager::EIM_NEVERTOCOME)
    221   {
    222     m_statistics->fragmentsLost++;
    223     fragment->setStatus(EventFragment::STATUS_LOST);
    224     DEBUG_TEXT(DFDB_ROSFM, 10, "PCMemoryDataChannel::getFragment: Fragment for L1ID " << ticket << " does not exist");
    225           CREATE_ROS_EXCEPTION(ex1, CoreException, PCMEMCHAN_LOST, "
     L1ID = " << ticket << ", ROL physical addr = " << physicalAd    dress());
    226           ers::warning(ex1);
    227   }

    查看ROBFragment.cpp中ROBFragment的构造函数如下:

    198 /********************************************************************************************/
    199 ROBFragment::ROBFragment(MemoryPool* mempool, u_int level1Id, u_int sourceId, u_int runNumber)
    200 /********************************************************************************************/
    201 {
    202   DEBUG_TEXT(DFDB_ROSEF, 8 , "Lost event " << level1Id << " begin to created"); //已被打印
    203   // This constructor is for the (hopefully) rare case that a ROD fragment 
    204   // does not get delivered by the ROL and the FragmentManager has to 
    205   // return an empty ROB fragment
    206 
    207   m_buffer = new Buffer(mempool); //出现问题代码行
    208 
    209   // Build the ROB header
    210   DEBUG_TEXT(DFDB_ROSEF, 8, "calling initialiseHeader "<<level1Id << std::endl); //未被打印
    211   initialiseHeader(sourceId, STATUS_TIMEOUT);
    212 
    213   DEBUG_TEXT(DFDB_ROSEF, 8, "ROBFragment::ROBFragment(lost): s_formatVersionNumber for ROD header is " << s_formatVersionNumber <<     " " << level1Id << std::endl);
    214   // Build the ROD header
    215   m_rodheader = new(m_buffer) RODFragment::RODHeader;
    216   DEBUG_TEXT(DFDB_ROSEF, 8, "m_rodheader is at " << m_rodheader << " " << level1Id << std::endl);
    217   m_rodheader->startOfHeaderMarker = s_rodMarker;
    218   m_rodheader->headerSize          = sizeof(RODFragment::RODHeader) / sizeof (u_int);
    219   m_rodheader->formatVersionNumber = s_rodformatVersionNumber;
    220   //The source ID of the ROD header should not be identical to that of the ROB header. As we don't know
    221   //it (without additional tricks in the FM) I duplicate it anyway. FIXME
    222   m_rodheader->sourceIdentifier    = sourceId & 0xffffff;
    223   m_rodheader->level1Id            = level1Id;
    224   m_rodheader->bunchCrossingId     = 0;
    225   m_rodheader->level1TriggerType   = 0;
    226   m_rodheader->detectorEventType   = 0;
    227   m_rodheader->runNumber           = runNumber;
    228 
    229   // Build the ROD body (just one status word)
    230   m_rodbody = new(m_buffer) u_int[1];
    231   *m_rodbody = STATUS_TIMEOUT;    // Error status
    232   DEBUG_TEXT(DFDB_ROSEF, 8, "m_rodbody is at " << m_rodbody << " " << level1Id << std::endl);
    233 
    234   // Build the ROD trailer
    235   m_rodtrailer = new(m_buffer) RODFragment::RODTrailer;
    236   DEBUG_TEXT(DFDB_ROSEF, 8, "m_rodtrailer is at " << m_rodtrailer << " " << level1Id << std::endl);
    237   m_rodtrailer->numberOfStatusElements = 1;
    238   m_rodtrailer->numberOfDataElements   = 0;
    239   m_rodtrailer->statusBlockPosition    = 0;
    240 
    241   // Generic ROB header   
    242   int rodsize = RODFragment::s_rodheaderSize + 1 + RODFragment::s_rodtrailerSize;
    243   m_header->generic.totalFragmentsize  = s_robheaderSize + rodsize;
    244 
    245   //No ROB trailer. crc_flag is 0
    246 
    247   m_rodFragmentExists = 1;
    248   DEBUG_TEXT(DFDB_ROSEF, 8 , "Lost event " << level1Id << " created");
    249 }

    查看../../ROSBufferManagement/src/Buffer.cpp 

    121 Buffer::Buffer(MemoryPool *memoryPool)
    122   : m_memoryPool(memoryPool),
    123     m_size(0),
    124     m_lastMemoryPage(m_memoryPool->getPage()),
    125     m_pageSize(m_memoryPool->pageSize()),
    126     m_numberOfPages(1),
    127     m_current(0),
    128     m_reserved(0)
    129 {
    130   m_pages[0]=m_lastMemoryPage;
    131 }

    ../../ROSMemoryPool/ROSMemoryPool/MemoryPool.h

    121   inline MemoryPage * MemoryPool::getPage()
    122   {
    123     if (m_freeIndex >= m_noPages)
    124       throw MemoryPoolException(MemoryPoolException::NOPAGESAVAILABLE);
    125 
    126     MemoryPage *rc = (*m_pageVector)[m_freeIndex];
    127     (*m_pageVector)[m_freeIndex] = 0;
    128     m_freeIndex++;
    129     return rc;
    130   }

    出现问题的原因:

    FragmentRequest::execute()执行时, getFragment时没找到对应L1id的ROBFragment, 于是要做一个空的ROBFragment,在getPage时报错。

    (批注:为什么会出现找不到ROB的情况呢?因为ROS是在数据到齐的情况下才会向L2SV发送消息,然后消息传递到SFI, SFI才会向ROS请求对应L1id的数据,这个时候缺少ROB从逻辑上说是不对的。所以怀疑检查数据完整性的逻辑是不是有问题。)

  • 相关阅读:
    Docker中查看Mysql数据库中的各环境参数
    Hbase shell 输入无法使用退格键删除解决办法
    HBase启动时报错:/bin/java: No such file or directory6/bin/../bin/hbase: line 412: /usr/local/jdk1.8.0_152/bin/java
    SSH无密码验证
    详解分布式应用程序协调服务Zookeeper
    zookeeper的原理及使用
    Hadoop、Yarn和vcpu资源的配置
    一文让您全面了解清楚HBase数据库的所有知识点,值得收藏!
    基于Docker一键部署大规模Hadoop集群及设计思路
    PHP ServerPush (推送) 技术的探讨【转】
  • 原文地址:https://www.cnblogs.com/zengtx/p/6407906.html
Copyright © 2020-2023  润新知