• 世界500强


    import psycopg2
    import json
    from news_project.config.sql_log import log
    from news_project.middlewares import Deal_Content

    class NewsProjectPipeline(object):
    def open_spider(self, spider):
    l = self.l = log()
    self.conn = psycopg2.connect(database=l.database, user=l.user, password=l.password, host=l.host, port=l.port)


    def process_item(self, item, spider):
    l = self.l = log()
    self.conn = psycopg2.connect(database=l.database, user=l.user, password=l.password, host=l.host, port=l.port)
    self.cur = self.conn.cursor()

    item = dict(item)
    d = Deal_Content()
    item['time'] = d.handleTime(item['time'],item['title_url']) #修改时间格式
    print("item*************************************///////////////////////", item['time'])

    for i in item.keys():
    if item[i] == "" or item[i] == None:
    item[i] = None

    if item['type_cn'] == None:
    item['type_cn'] = "行业新闻"

    if item['type_no'] == None:
    item['type_no'] = 16

    if item['content'] == None:
    return 0
    #两种存储状态。
    if not item.get('association_id'):
    self.cur.execute(
    "INSERT INTO bjzs_big_data.baoji_news(type_cn,source,level2,level1,event_time,title,url,content,lable,type_no) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
    (item['type_cn'], item['news'], item['id'], item['pid'], item['time'], item['title'], item['title_url'],
    item['content'], item['tags'], item['type_no']))
    elif item.get('association_id'):
    self.cur.execute(
    "INSERT INTO bjzs_big_data.baoji_news(type_cn,source,level2,level1,event_time,title,url,content,lable,type_no,association_id) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",(item['type_cn'],item['news'],item['id'],item['pid'],item['time'],item['title'],item['title_url'],item['content'],item['tags'],item['type_no'],item['association_id']))
    else:
    pass
    #提交
    self.conn.commit()
    self.cur.close()
    self.conn.close()

    return item


    def close_spider(self, spider):

    self.conn.close()

  • 相关阅读:
    2019-09-09 memcache
    2019-08-26 linux
    springmvc 实体与文件同时提交时需要注意的地方
    mysql linux转win平台 遇到的坑
    使用Redis为注册中心的Dubbo微服务架构(基于SpringBoot)
    基于SpringBoot+Redis的Session共享与单点登录
    Docker运行oracle12c注意事项
    mac MyEclipse2017 CI10安装破解心得
    zookeeper,hadoop安装部署其实与防火墙无关
    VirtualBox复制的虚拟机无法获取IP解决办法
  • 原文地址:https://www.cnblogs.com/yuanjia8888/p/10233834.html
Copyright © 2020-2023  润新知