import requests
# res = requests.get('http://httpbin.org/get')
# res1 = res.json()
#转换成json数据
# import json
# res1=json.loads(response.text) #太麻烦
#什么SSL,就是安全认证,就是http后面的那个s
# respone=requests.get('https://www.12306.cn',
# cert=('/path/server.crt',
# '/path/key'))
# print(respone.status_code)
#正向代理
#通过别人的服务器去访问你访问的地址
#ip 代理收费(通过代理访问自己的服务,在服务端取出客户端ip查看一下)
# proxies={
# # 'http':'http://egon:123@localhost:9743',#带用户名密码的代理,@符号前是用户名与密码
# # 'http':'http://localhost:9743',
# 'https':'https://localhost:9743',
# 'http':'http://124.205.155.148:9090'
# }
# respone=requests.get('https://www.12306.cn',
# proxies=proxies)
#
# print(respone.status_code)
#超时设置
# import requests
# respone=requests.get('https://www.baidu.com',
# timeout=0.0001)
#上传文件
import requests
files={'file':open('a.jpg','rb')}
respone=requests.post('http://httpbin.org/post',files=files)
print(respone.status_code)
from bs4 import BeautifulSoup
#可以将html页面数据转换成一个对象
'''
里面的两个方法,一个是find 还有一个是find_all
find:
-name="标签名" 标签
-id,class_,="" 把这个标签拿出来
-标签.text 取标签的内容
-标签.get(属性名) 取标签属性的内容
find_all
'''
url='https://www.autohome.com.cn/news/1/#liststart'
res = requests.get(url)
soup = BeautifulSoup(res.text,"lxml")
div = soup.find(id = "auto-channel-lazyload-article")
ul=div.find(name='ul') #只找第一个ul标签
# ul_list=div.find_all(class_="article") #找出下面所有类名为article的标签
# print(len(ul_list))
li_list=ul.find_all(name='li')
# print(len(li_list))
for li in li_list:
h3=li.find(name='h3')
if h3:
title=h3.text #把h3标签的text取出来
print(title)
a=li.find(name='a')
if a:
article_url=a.get('href') #取出a标签的href属性
print(article_url)
img=li.find(name='img')
if img:
img_url=img.get('src')
print(img_url)
p=li.find(name='p')
if p:
content=p.text
print(content)
#得到的数据永远都可以find,
from bs4 import BeautifulSoup
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title" id="bbaa"><b name="xx" age="18">The Dormouse's story</b><b>xxxx</b></p>
<p class="xxx" a="xxx">asdfasdf</p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""
# soup=BeautifulSoup(html_doc,'lxml')
# ress=soup.prettify() #美化一下
# soup=BeautifulSoup(ress,'lxml')
# print(ress)
#遍历文档树
# print(soup.p.name)
# print(soup.p.attrs)
# print(soup.p.string)
# print(list(soup.p.strings))
# print(soup.p.text)
# print(soup.body.p.text)
# print(soup.body.p.contents)
# print(list(soup.body.p.children))
# print(list(soup.body.p.descendants))
# print(soup.body.p.parent)
# print(list(soup.body.p.parents))
# print(len(list(soup.body.p.parents)))
# print(soup.body.p.previous_sibling)
# print(soup.body.p.previous_sibling)
# print(soup.find(class_="xxx").previous_sibling)
# print(soup.a.next_sibling)
# print(soup.a.previous_sibling)
# print(type(soup.p))
#查找文档
#五种过滤器 :字符串,正则,布尔,方法,列表
import re
# print(soup.find_all(name='b'))
# print(soup.find_all(name=re.compile('^b')))
# print(soup.find_all(id=re.compile('^b')))
# print(soup.find_all(name=['a','b']))
# print(soup.find_all(name=True))
# def has_class_but_no_id(tag):
# return tag.has_attr('class') and not tag.has_attr('id')
# print(soup.find_all(name=has_class_but_no_id))
#css选择
# xpath
# print(soup.select(".title"))
# print(soup.select("#bbaa"))
# print(soup.select('#bbaa b')[0].attrs.get('name'))
#recursive=False 只找同一层
#limit 找到第几个之后停止
sibling_soup = BeautifulSoup("<a><b>text1</b><c>text2</c></b></a>",'lxml')
print(sibling_soup.b.next_sibling)
print(sibling_soup.c.previous_sibling )
from selenium import webdriver
from selenium.webdriver.common.keys import Keys #键盘按键操作
import time
# from selenium.webdriver.chrome.options import Options
# chrome_options = Options()
# chrome_options.add_argument('window-size=1920x3000') #指定浏览器分辨率
# chrome_options.add_argument('--disable-gpu') #谷歌文档提到需要加上这个属性来规避bug
# chrome_options.add_argument('--hide-scrollbars') #隐藏滚动条, 应对一些特殊页面
# chrome_options.add_argument('blink-settings=imagesEnabled=false') #不加载图片, 提升速度
# chrome_options.add_argument('--headless') #浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败
# chrome_options.binary_location = r"C:Program Files (x86)GoogleChromeApplicationchrome.exe" #手动指定使
# bro=webdriver.PhantomJS()
# bro=webdriver.Chrome(chrome_options=chrome_options)
bro=webdriver.Chrome()
bro.get('https://www.baidu.com')
# print(bro.page_source)
# time.sleep(3)
time.sleep(1)
#取到输入框
inp=bro.find_element_by_id('kw')
#往框里写字
inp.send_keys("美女")
inp.send_keys(Keys.ENTER) #输入回车
#另一种方式,取出按钮,点击su
time.sleep(3)
bro.close()