希望所有温柔又可爱的人最后都能幸福❤
今日总结:
代码量 | 400行 |
---|---|
博客量 | 一篇 |
所学时间 | 6小时左右 |
了解到的知识点 | python爬取疫情信息、Acwing每日一题 |
明日计划:
早上 | python数据分析入门 |
---|---|
下午 | python数据分析入门 |
晚上 | Acwing每日一题 |
具体内容:
采集最近一日的疫情数据
import requests
from bs4 import BeautifulSoup
import re
import json
response = requests.get("http://ncov.dxy.cn/ncovh5/view/pneumonia")
home_page = response.content.decode()
soup = BeautifulSoup(home_page, 'lxml')
script = soup.find(id="getListByCountryTypeService2true")
text = script.string
json_str = re.findall(r'[.+]', text)[0]
# print(json_str)
last_day_corona_virus = json.loads(json_str)
print(last_day_corona_virus)
with open('last_day_corona_virus.json','w',encoding='utf-8') as fp:
json.dump(last_day_corona_virus,fp,ensure_ascii=False)
改造:
import requests
from bs4 import BeautifulSoup
import re
import json
class CoronaVirusSpider(object):
def __init__(self):
self.home_url = "http://ncov.dxy.cn/ncovh5/view/pneumonia"
def get_content_from_url(self, url):
"""
根据url,获取响应内容的字符串数据
:param url:请求的url
"""
response = requests.get(url)
return response.content.decode()
def parse_home_page(self, home_page):
"""
解析首页内容,获取解析后的python数据
:param home_page:首页内容
:return:解析后的python数据
"""
soup = BeautifulSoup(home_page, 'lxml')
script = soup.find(id="getListByCountryTypeService2true")
text = script.string
json_str = re.findall(r'[.+]', text)[0]
# print(json_str)
data = json.loads(json_str)
return data
def save(self, data, path):
with open(path, 'w', encoding='utf-8') as fp:
json.dump(data, fp, ensure_ascii=False)
def crawl_last_day_corona_virus(self):
"""
采集最近一天的各国疫情数据
:return:
"""
home_page = self.get_content_from_url(self.home_url)
last_day_corona_virus = self.parse_home_page(home_page)
self.save(last_day_corona_virus, 'last_day_corona_virus.json')
def run(self):
self.crawl_last_day_corona_virus()
if __name__ == '__main__':
spider = CoronaVirusSpider()
spider.run()
采集从1月23号以来各国疫情数据
import requests
from bs4 import BeautifulSoup
import re
import json
from tqdm import tqdm
class CoronaVirusSpider(object):
def __init__(self):
self.home_url = "http://ncov.dxy.cn/ncovh5/view/pneumonia"
def get_content_from_url(self, url):
"""
根据url,获取响应内容的字符串数据
:param url:请求的url
"""
response = requests.get(url)
return response.content.decode()
def parse_home_page(self, home_page):
"""
解析首页内容,获取解析后的python数据
:param home_page:首页内容
:return:解析后的python数据
"""
soup = BeautifulSoup(home_page, 'lxml')
script = soup.find(id="getListByCountryTypeService2true")
text = script.string
json_str = re.findall(r'[.+]', text)[0]
# print(json_str)
data = json.loads(json_str)
return data
def save(self, data, path):
with open(path, 'w', encoding='utf-8') as fp:
json.dump(data, fp, ensure_ascii=False)
def crawl_last_day_corona_virus(self):
"""
采集最近一天的各国疫情数据
:return:
"""
home_page = self.get_content_from_url(self.home_url)
last_day_corona_virus = self.parse_home_page(home_page)
self.save(last_day_corona_virus, 'last_day_corona_virus.json')
def crawl_corona_virus(self):
"""
采集从1月23号以来各国疫情数据
:return:
"""
with open('last_day_corona_virus.json', encoding='utf-8') as fp:
last_day_corona_virus = json.load(fp)
# print(last_day_corona_virus)
corona_virus = []
for county in tqdm(last_day_corona_virus,'采集1月23日以来各国疫情信息'):
statistic_data_url = county['statisticsData']
statistic_data_json_str = self.get_content_from_url(statistic_data_url)
statistic_data = json.loads(statistic_data_json_str)['data']
# print(statistic_data)
for one_day in statistic_data:
one_day['provinceName'] = county['provinceName']
one_day['countryShortCode'] = county['countryShortCode']
# print(statistic_data)
corona_virus.extend(statistic_data)
self.save(corona_virus, 'corona_virus.json')
def run(self):
# self.crawl_last_day_corona_virus()
self.crawl_corona_virus()
if __name__ == '__main__':
spider = CoronaVirusSpider()
spider.run()
疫情爬虫项目
import requests
from bs4 import BeautifulSoup
import re
import json
from tqdm import tqdm
class CoronaVirusSpider(object):
def __init__(self):
self.home_url = "http://ncov.dxy.cn/ncovh5/view/pneumonia"
def get_content_from_url(self, url):
"""
根据url,获取响应内容的字符串数据
:param url:请求的url
"""
response = requests.get(url)
return response.content.decode()
def parse_home_page(self, home_page, tag_id):
"""
解析首页内容,获取解析后的python数据
:param home_page:首页内容
:return:解析后的python数据
"""
soup = BeautifulSoup(home_page, 'lxml')
script = soup.find(id=tag_id)
text = script.string
json_str = re.findall(r'[.+]', text)[0]
# print(json_str)
data = json.loads(json_str)
return data
def save(self, data, path):
with open(path, 'w', encoding='utf-8') as fp:
json.dump(data, fp, ensure_ascii=False)
def crawl_last_day_corona_virus(self):
"""
采集最近一天的各国疫情数据
:return:
"""
home_page = self.get_content_from_url(self.home_url)
last_day_corona_virus = self.parse_home_page(home_page, tag_id="getListByCountryTypeService2true")
self.save(last_day_corona_virus, 'data/last_day_corona_virus.json')
def crawl_corona_virus(self):
"""
采集从1月23号以来各国疫情数据
:return:
"""
with open('last_day_corona_virus.json', encoding='utf-8') as fp:
last_day_corona_virus = json.load(fp)
# print(last_day_corona_virus)
corona_virus = []
for county in tqdm(last_day_corona_virus, '采集1月23日以来各国疫情信息'):
statistic_data_url = county['statisticsData']
statistic_data_json_str = self.get_content_from_url(statistic_data_url)
statistic_data = json.loads(statistic_data_json_str)['data']
# print(statistic_data)
for one_day in statistic_data:
one_day['provinceName'] = county['provinceName']
one_day['countryShortCode'] = county['countryShortCode']
# print(statistic_data)
corona_virus.extend(statistic_data)
self.save(corona_virus, 'data/corona_virus.json')
def crawl_last_day_corona_virus_of_china(self):
"""
采集最近一日各省疫情数据
:return:
"""
# 1.发送请求
home_page = self.get_content_from_url(self.home_url)
# 2.解析疫情首页,获取最近一日
data = self.parse_home_page(home_page,tag_id='getAreaStat')
# 3.保存
self.save(data, 'data/last_day_corona_virus_of_china.json')
def run(self):
self.crawl_last_day_corona_virus()
# self.crawl_corona_virus()
self.crawl_last_day_corona_virus_of_china()
if __name__ == '__main__':
spider = CoronaVirusSpider()
spider.run()
了解可视化
https://www.acwing.com/activity/content/problem/content/3416/1/
直接bfs()
即可
#include <bits/stdc++.h>
using namespace std;
int a[25][25];
int visited[25][25];
int dx[4] = {1,-1,0,0};
int dy[4] = {0,0,1,-1};
int n,m,sx,sy;
struct node
{
int x,y;
};
void bfs()
{
memset(visited,0,sizeof visited);
int res = 0;
queue<node> q;
node st = {sx,sy};
q.push(st);
visited[st.x][st.y] = 1;
while(!q.empty())
{
node z = q.front();
q.pop();
for (int i = 0; i < 4; i ++)
{
node w;
w.x = z.x + dx[i];
w.y = z.y + dy[i];
if (a[w.x][w.y] == 1 && w.x>=1 && w.x <= n && w.y >= 1 && w.y <= m && visited[w.x][w.y] == 0)
{
q.push(w);
//cout<<"aa: "<<w.x<<" "<<w.y<<endl;
visited[w.x][w.y] = 1;
res ++;
}
}
}
cout<<res + 1<<endl;
}
int main()
{
char c;
while(cin>>m>>n)
{
if (n + m == 0) break;
memset(a,0,sizeof a);
for (int i = 1; i <= n; i ++)
{
for (int j = 1; j <= m; j ++)
{
cin>>c;
if (c == '.') a[i][j] = 1;
if (c== '@')
{
sx = i;
sy = j;
}
}
}
bfs();
}
}