• pyquery的中文编码问题


    # coding=UTF-8
    import urllib.request
    import pyquery
    import requests
    import time
    import json
    import pymysql
    import sys
    import math
    from datetime import datetime
    import time
    import csv
    from time import sleep
    import random
    from bs4 import BeautifulSoup
    import asyncio
    from pyppeteer import launch
    import pyppeteer
    from pyquery import PyQuery as pq
    import chardet
    import codecs
    
    path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
    txt = open(path, 'rb').read()
    encodings = chardet.detect(txt)['encoding']
    
    with open(path, "r", encoding=encodings)as f:
        content = f.read()
        doc = pq(content)
        name=doc('.tit.clearfix h1>strong')
        title=name.text()
        print(title) #扬州天下花园
    
    # path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
    # with open(path, "r")as f:
    #     content = f.read()
    #     doc = pq(content)
    #     name=doc('.tit.clearfix h1>strong')
    #     title=name.text()
    #     print(title) #UnicodeDecodeError: 'gbk' codec can't decode byte 
    
    
    # path='D:/code-py-download/02fang/小区详情demo.txt'   #ansi编码都不行
    # with open(path, "r")as f:
    #     content = f.read()
    #     doc = pq(content)
    #     name=doc('.tit.clearfix h1>strong')
    #     title=name.text()
    #     print(title) # 扬州天下花园
    
    # path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
    # doc = pq(filename=path, encoding='utf-8') 直接读也不行
    # name=doc('.tit.clearfix h1>strong')
    # title=name.text()
    # print(title) # UnicodeDecodeError: 'gbk' codec can't decode byte
    
    # path='D:/code-py-download/02fang/小区详情demo.txt'
    # doc = pq(filename=path, encoding='utf-8')
    # name=doc('.tit.clearfix h1>strong')
    # title=name.text()
    # print(title)  # æ¬å·¤©ä¸è±å
    
    # path='D:/code-py-download/02fang/小区详情demo.txt'
    # doc = pq(filename=path, encoding='gbk')
    # name=doc('.tit.clearfix h1>strong')
    # title=name.text()
    # print(title)  # æ¬å·¤©ä¸è±å
  • 相关阅读:
    如何配置MySQL
    软件工程第三次作业
    软件工程第二次作业
    软件工程第一次作业
    Python 【面试总结】
    Vue【你知道吗?】
    Python 【面试强化宝典】
    Python 【基础面试题】
    Vue 【前端面试题】
    Redis 【常识与进阶】
  • 原文地址:https://www.cnblogs.com/yansc/p/15512731.html
Copyright © 2020-2023  润新知