• 提取奖励办数据中人员信息(自用)


    2016年

    #coding=utf-8
    import re
    from pymongo import MongoClient
    
    client = MongoClient("localhost", 27017)
    db = client["nosta"]
    collection1 = db["nosta_2016"]
    collection2 = db["2016_list"]
    db.authenticate("zty","zty")
    
    n = 0
    for item in collection1.find({}, {"project_name":1, "project_content":1, "_id":0}):
        n += 1
        print n
        if item['project_content'].has_key(u'主要完成人'):
            ls = item['project_content'][u'主要完成人']
            if ls:
                for line in ls:
                    # print line
                    matchObj1 = re.search( ur'(姓名:.*?) .*', line)
                    matchObj2 = re.search( ur'.* (行政职务:.*?) .*', line)
                    matchObj3 = re.search( ur'.* (技术职称:.*?) .*', line)
                    matchObj4 = re.search( ur'.* (工作单位:.*?) .*', line)
                    matchObj5 = re.search( ur'.* (对本项目技术创造性贡献:.*?) .*', line)
                    matchObj6 = re.search( ur'.* (对本项目主要学术贡献:.*?) .*', line)
                    matchObj7 = re.search( ur'.* (曾获国家科技奖励情况:.*)', line)
    
                    dc = {}
                    dc['project_name'] = item['project_name']
                    dc['name'] = matchObj1.group(1) if matchObj1 else ''
                    dc['duty'] = matchObj2.group(1) if matchObj2 else ''
                    dc['title'] = matchObj3.group(1) if matchObj3 else ''
                    dc['unit'] = matchObj4.group(1) if matchObj4 else ''
                    dc['contribution'] = matchObj5.group(1) if matchObj5 else ''
                    if dc['contribution']=='':
                        dc['contribution'] = matchObj6.group(1) if matchObj6 else ''
                    dc['award'] = matchObj7.group(1) if matchObj7 else ''
                    # for k, v in dc.items():
                    #   print k, v
                    collection2.insert(dc)

    2017、2018年

    #coding=utf-8
    import re
    from pymongo import MongoClient
    
    client = MongoClient("localhost", 27017)
    db = client["nosta"]
    collection1 = db["nosta_2017"]
    collection2 = db["2017_list"]
    db.authenticate("zty","zty")
    
    n = 0
    for item in collection1.find({}, {"project_name":1, "project_content":1, "_id":0}):
    
        n += 1
        print n
    
        if item['project_content'].has_key(u'主要完成人:'):
            choice = item['project_content'][u'主要完成人:']
            if choice == []:
                continue
            ls = choice.split(u'姓名:')[1:]
            for line in ls:
                line = line.replace(u'排名:',u' 排名:')
                line = line.replace(u'行政职务:',u' 行政职务:')
                line = line.replace(u'技术职称:',u' 技术职称:')
                line = line.replace(u'工作单位:',u' 工作单位:')
                line = line.replace(u'完成项目时所在单位:',u' 完成项目时所在单位:')
                line = line.replace(u'对本项目技术创造性贡献:',u' 对本项目技术创造性贡献:')
                line = line.replace(u'对本项目主要学术贡献:',u' 对本项目主要学术贡献:')
                line = line.replace(u'曾获国家科技奖励情况:',u' 曾获国家科技奖励情况:')
                line = u'姓名:' + line
                # print line
                matchObj1 = re.search( ur'(姓名:.*?) .*', line)
                matchObj2 = re.search( ur'.* (行政职务:.*?) .*', line)
                matchObj3 = re.search( ur'.* (技术职称:.*?) .*', line)
                matchObj4 = re.search( ur'.* (工作单位:.*?) .*', line)
                matchObj5 = re.search( ur'.* (对本项目技术创造性贡献:.*?) .*', line)
                matchObj6 = re.search( ur'.* (对本项目主要学术贡献:.*?) .*', line)
                matchObj7 = re.search( ur'.* (曾获国家科技奖励情况:.*)', line)
    
                dc = {}
                dc['project_name'] = item['project_name']
                dc['name'] = matchObj1.group(1) if matchObj1 else ''
                dc['duty'] = matchObj2.group(1) if matchObj2 else ''
                dc['title'] = matchObj3.group(1) if matchObj3 else ''
                dc['unit'] = matchObj4.group(1) if matchObj4 else ''
                dc['contribution'] = matchObj5.group(1) if matchObj5 else ''
                if dc['contribution']=='':
                    dc['contribution'] = matchObj6.group(1) if matchObj6 else ''
                dc['award'] = matchObj7.group(1) if matchObj7 else ''
                # for k, v in dc.items():
                #   print k, v
                collection2.insert(dc)
  • 相关阅读:
    使用 WebSphere Adapter for SAP Software V7.5 配置 SAP 系统和客户端之间的安全网络通信 (SNC)
    在 ubuntu 12.04 上安装 redmine
    配置nat稳定网络防病毒
    利用 Replication Handler 备份索引
    .NET 4.5对Base Class Library做出改善
    redmine 和 gitolite 的整合
    IBM Power7 服务器 Hypervisor 内存使用情况研究
    Word域代码的显示
    转载:深入分析MFC文档视图
    VIM常用指令
  • 原文地址:https://www.cnblogs.com/zhangtianyuan/p/9438347.html
Copyright © 2020-2023  润新知