• python编程库


    编程库
    1.time
    import time
    print(time.time())
    time = time.localtime( time.time() )
    print(time)
    print(time.tm_year)
    """
    1563803665.310865
    time.struct_time(tm_year=2019, tm_mon=7, tm_mday=22, tm_hour=21, tm_min=54, tm_sec=25, tm_wday=0, tm_yday=203, tm_isdst=0)
    2019
    """
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    2.Matplotlib
    绘图工具包

    3.Scikit-learn
    封装了大量经典以及最新的机器学习模型

    4.Pandas
    针对于数据处理和分析的python工具包,实现了大量便于数据读写,清洗,填充及分析功能

    4.1读取文件
    import pandas as pd
    # 两个数据类型:Series, DataFrame

    data_path = "C:/Users/admin/Desktop/111.csv"
    # 读取文件
    def read_file(data_path):
    datas = pd.read_csv(data_path, encoding="GBK")
    datas = datas.dropna()
    return datas

    data = read_file(data_path)
    print(data)

    # 获取某一列文字 Python 字典(Dictionary) -->contents_agent 转化成一整段
    contents_agent = data["asr_agent_raw"]
    contents = contents_agent.values.tolist()
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    5.jieba
    import jieba
    stopwords_file = "D:/gitProject/smartlink-sqc/smartlink-sqc-wordle/python/dict/user_dict.txt"
    def seg_word(contents):
    contents = contents.values.tolist()
    jieba.load_userdict(stopwords_file)
    segment = []
    for line in contents:
    try:
    segs = jieba.lcut(line)
    for seg in segs:
    if len(seg) > 1 and seg != ' ' and
    u'u4e00' <= seg <= u'u9fa5' or
    u'u0041' <= seg <= u'u005a' or
    u'u0061' <= seg <= u'u007a':
    segment.append(seg)

    except:
    print(line)
    continue

    words_df = pd.DataFrame({'words': segment})
    stopwords = pd.read_csv(stopwords_file,
    index_col=False,
    quoting=3,
    sep=" ",
    names=['stopwords'],
    encoding='utf-8') # quoting=3全不引用
    words_df = words_df[~words_df.words.isin(stopwords.stopwords)]
    return words_df

    # 进行分词
    words_agent = seg_word(contents_agent)
    print(words_agent)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    6.NumPy & SciPy(http://www.amjmh.com)
    NumPy最基础的编程库,提供一些高级的数学运算机制和高效的向量和矩阵运算功能
    SciPy是子啊NumPy的基础上构建的,更为强大的科学计算包

    import numpy as np
    def word_freq(words_df):
    words_stat = words_df.groupby(by=['words'])['words'].agg({"count":np.size})
    words_stat = words_stat.reset_index().sort_values(by=["count"], ascending=False)
    return words_stat

    words_stat_agent = word_freq(words_agent)
    # 打印词频较高的前10
    print(words_stat_agent.head(10))
    1
    2
    3
    4
    5
    6
    7
    8
    9
    7.Anaconda平台
    一次性获得300多种用于科学和工程计算相关任务的python编程库的支持
    ---------------------

  • 相关阅读:
    数据结构一
    MVC5.0(一)
    异步多线程(六)lock锁
    异步多线程(五)多线程异常处理
    异步多线程(四)Task
    paypal payflow设置视频教程
    Java栈Stack知识点
    Java知识树梳理
    js定时器
    jdk环境变量配置改变不生效的问题
  • 原文地址:https://www.cnblogs.com/hyhy904/p/11322387.html
Copyright © 2020-2023  润新知