• (六)python语法之网络编程


    1.TCP

    #1 服务端 server.py
    import socket    
    
    host = '127.0.0.1'       # 设置ip
    port = 9000              # 设置端口
    
    # 创建socket对象
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 
    s.bind((host, port))     # 绑定ip和端口
    s.listen(5)              # 等待客户端连接
    print("开始监听...")
    
    while True:
        c, addr = s.accept() # 建立客户端连接
        print('客户端地址:', addr)
    
        data = c.recv(2048)
        print("消息:", data.decode('utf-8'))
    
        c.send(b'Welcome to connect!')
        c.close()            # 关闭连接
    
        
    #2 客户端 client.py
    import socket  
    
    host = '127.0.0.1' 
    port = 9000        
    
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 
    s.connect((host, port))
    
    s.send(b'Hello')
    
    data, addr = s.recvfrom(1024)
    print(data.decode('utf-8'))
    
    s.close()
    

    2.UDP

    #1 server.py
    import socket
    
    host = '127.0.0.1' # 设置ip
    port = 9000        # 设置端口
    
    # 创建socket对象
    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)  
    s.bind((host, port))
    
    while True:
        data, addr = s.recvfrom(1024)
        print('server收到来自 {} 的消息:'.format(addr), data)
        s.sendto(data.upper(), addr)
        
    s.close()
    
    
    #2 client.py
    import socket
    
    host = '127.0.0.1' # 设置ip
    port = 9000        # 设置端口
    
    c = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    
    c.sendto(b'hello', (host, port))
    data, addr = c.recvfrom(1024)
    print('客户端收到来自 {} 的消息:'.format(addr), data)
        
    c.close()
    

    3.requests

    import requests
    
    #1 返回值
    r = requests.get('https://www.baidu.com')
    r.status_code        # 状态码
    r.text               # HTML源码
    r.content            # 网页二进制内容
    r.json               # json文件
    
    r.headers            # 返回的headers
    r.request.headers    # 请求的headers
    r.cookies            # 获取cookies
    
    r.encoding           # 从header中猜测的编码
    r.apparent_encoding  # 从内容分析的编码
    r.raise_for_status() # 状态码不是200触发异常
    
    #2 get请求
    headers = {'User-Agent': 'Mozilla/5.0'}
    payload = {'key': 'value'}
    cookies = dict(mycookies='mycookies')
    requests.get(url, headers=headers, params=payload, cookies=cookies, timeout=5)
         
    #3 post请求
    payload = {'key': 'value'}
    requests.post(url, data=payload)
    requests.post(url, data=json.dumps(payload)) 
    
    files = {'file': open('test.txt', 'rb')}
    requests.post(url, files=files) # 上传文件
    
    #4 proxies代理
    proxies = {'https': 'http://x.x.x.x:8000'}
    requests.post(url, proxies=proxies)
    

    4.selenium

    # pip install selenium  
    # 下载ChromeDriver,放置在代码同目录下即可 
    
    #1 基本使用
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.common.by import By
    
    browser = webdriver.Chrome()         
    browser.get("https://www.baidu.com") 
     
    print(browser.current_url)
    print(browser.get_cookies())
    print(browser.page_source)  # 获取HTML源码
    
    # 通过id获取元素
    element = browser.find_element_by_id("kw") 
    print(element.id)       # 获取ID
    print(element.text)     # 获取文本
    print(element.location) # 获取位置
    print(element.tag_name) # 获取标签名
    print(element.size)     # 获取大小
    print(element.get_attribute('class')) # 获取属性
    
    # 向元素输入内容
    element.send_keys("Python")   
    element.send_keys(Keys.ENTER) # 模拟回车键
    element.clear()               # 清除内容
    
    # 通过Xpath获取元素
    element = browser.find_element(By.XPATH, '//*[@id="su"]')
    element.click()  # 模拟点击
    
    #2 wait等待
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.common.exceptions import TimeoutException
    
    SERVICE_ARGS = ['--load-images=false', '--disk-cache=true']
    browser = webdriver.PhantomJS(service_args=SERVICE_ARGS)
    browser.set_window_size(1400, 900)
    
    # 实现页面等待的对象
    wait = WebDriverWait(browser, 10) 
    
    def search(url):
        try:
            browser.get(url)
            # 等待指定的元素加载
            element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#kw")))        
        except TimeoutException:
            search(url)
    
    search('https://www.baidu.com')
    

    5.BeautifulSoup

    from bs4 import BeautifulSoup
    import re
    
    html_doc = '''
    <!DOCTYPE html>
    <html><head><title>百度一下,你就知道 </title></head>
    <body>
      <div id="u1">
        <a class="mnav" href="http://news.baidu.com" id="news">新闻</a>
        <a class="mnav" href="https://www.hao123.com">hao123</a>
        <a class="mnav" href="http://map.baidu.com" id="map">地图</a>
        <a class="mnav" href="http://v.baidu.com" id="video">视频</a>
        <a class="mnav" href="http://tieba.baidu.com" id="tieba">贴吧</a>
      </div>
    </body>
    </html>
    '''
    
    #1 创建BeautifulSoup对象
    soup = BeautifulSoup(html_doc, 'html.parser') 
    # 获取格式化html内容
    print(soup.prettify())
    
    #2 获取第一个匹配标签a的内容
    print(soup.a)          
    # <a class="mnav" href="http://news.baidu.com" id="news">新闻</a>
    
    # 返回标签a属性class的值
    print(soup.a['class']) 
    # ['mnav']
    
    # 返回标签a属性的字典
    print(soup.a.attrs)    
    # {'class': ['mnav'], 'href': 'http://news.baidu.com', 'id': 'news'}
    
    # 返回标签a的内容
    print(soup.a.string)   
    # 新闻
    
    #3 CSS选择器,返回列表
    print(soup.select('#map'))
    # [<a class="mnav" href="http://map.baidu.com" id="map">地图</a>]
    print(soup.select('div a[id="map"]')) 
    # [<a class="mnav" href="http://map.baidu.com" id="map">地图</a>]
    
    #4 find_all()搜索
    print(soup.find_all('a', id='video'))
    # [<a class="mnav" href="http://v.baidu.com" id="video">视频</a>]
    print(soup.find_all(re.compile("^a")))
    # [<a class="mnav" href="http://news.baidu.com" id="news">新闻</a>, <a class="mnav" href="https://www.hao123.com">hao123</a>, <a class="mnav" href="http://map.baidu.com" id="map">地图</a>, <a class="mnav" href="http://v.baidu.com" id="video">视频</a>, <a class="mnav" href="http://tieba.baidu.com" id="tieba">贴吧</a>]
    
    # find_all()搜索文档中的字符串
    print(soup.find_all(text=re.compile("^贴"))) 
    # ['贴吧']
    
    # find_all()传自定义方法搜索
    def func(tag):
        return tag.has_attr('class') and not tag.has_attr('id')
    print(soup.find_all(func))  
    # [<a class="mnav" href="https://www.hao123.com">hao123</a>]
    

    6.PyQuery

    from pyquery import PyQuery as pq  
    from lxml import etree
    
    html_doc = '''
    <!DOCTYPE html>
    <html><head><title>百度一下,你就知道 </title></head>
    <body>
      <div id="u1">
        <a class="mnav" href="http://news.baidu.com" id="news">新闻</a>
        <a class="mnav" href="https://www.hao123.com">hao123</a>
        <a class="mnav" href="http://map.baidu.com" id="map">地图</a>
        <a class="mnav" href="http://v.baidu.com" id="video">视频</a>
        <a class="mnav" href="http://tieba.baidu.com" id="tieba">贴吧</a>
      </div>
    </body>
    </html>
    '''
    
    #1 初始化
    doc = pq(etree.fromstring(html_doc)) 
    
    #2 CSS选择器
    item = doc('#news')
    
    print(item.attr('href')) 
    # http://news.baidu.com
    
    print(item.text())       
    # 新闻
    
    #3 链式使用 
    items = doc('#u1').find('a').items()
    for item in items:
        print(item.text()) 
        # 新闻 hao123 地图 视频 贴吧
    
  • 相关阅读:
    eclipse导入项目后出现红色叉号
    java.lang.ArithmeticException: Non-terminating decimal expansion
    IDEA 2020 注册码
    最新调查:游戏女性玩家群体达3亿!这意味着什么,嘻嘻......
    Linux安装配置Nginx服务器
    mysql/mariadb 远程登陆
    redis 集群 新增 master 节点 与 slave 挂靠命令
    RedisCluster 集群 之 Ruby 安装
    Linux CentOS 7 关闭防火墙
    springcloud 与 springboot 对应关系
  • 原文地址:https://www.cnblogs.com/qxcheng/p/13536108.html
Copyright © 2020-2023  润新知