web_page_info = requests.get(url_base,headers=headers, timeout=(3,7))
names = kw_df.keyword.tolist()
document = kw_df.keywords.tolist()
use_str = [项⽬进展,Gas,eBay,Amazon,Web 3.0,项⽬进展,期权,流动性挖矿,矿工,算力,节点,私钥,观点,Gas 费,技术,委内瑞拉,美联储,东加勒比中央银行,巴哈马
公链,项⽬进展,游戏,公链,钱包,跨链,基础设施,市场,推特,黑客攻击,溢价,观点,政策法规,监管,扩容,债券, USDT, 技术, 空投, 股票, ...]
document[0]
# '以太坊, DeFi, DEX, Gas, NFT, Loot, 项⽬进展'
document[0].split(',')
# ['以太坊', ' DeFi', ' DEX', ' Gas', ' NFT', ' Loot', ' 项⽬进展']
[item.strip(' ') for item in document[0].split(',')]
# ['以太坊', 'DeFi', 'DEX', 'Gas', 'NFT', 'Loot', '项⽬进展']
kw_dele = kw_df[~kw_df.keyword.isin(use_str)]
s=kw_dele['keyword'].str.findall(u"([u4e00-u9fa5])")
# 识别出一个中文
s=kw_dele['keyword'].str.findall(u"([u4e00-u9fa5]*)")
# 识别出一堆中文
item_sum.drop_duplicates(subset=None,keep='first',inplace=True)
def timestamp_2_standard_time(timestamp, unit = 's'):
'''
:param timestamp:
:param timestam:
:return: Local standard time时间戳数字转化为标准时间格式 ‘2021-09-12’
1631404800 标准的时间戳是10位的
timestamp_2_standtime(1631404800)
'''
if unit == 's':
timestamp = int(timestamp)
utc_time = datetime.datetime.utcfromtimestamp(timestamp)
standard_time = utc_time.strftime("%Y-%m-%d")
else:
timestamp = float(timestamp / 1000)
time_array = time.localtime(timestamp)
standard_time = time.strftime("%Y-%m-%d", time_array)
return standard_time
print(timestamp_2_standard_time(1631404800, 's'))