• keras字符编码


    https://www.jianshu.com/p/258a21ae0390
    https://blog.csdn.net/apengpengpeng/article/details/80866034
    #-*-coding:utf-8-*-
    # import numpy as np
    #
    # samples = ['The cat sat on the mat.', 'The dog ate my homework.']
    #
    # # 10
    # # 定义一个集合,得到{'The': 1, 'cat': 2, 'sat': 3, 'on': 4, 'the': 5, 'mat.': 6, 'dog': 7, 'ate': 8, 'my': 9, 'homework.': 10},也就是筛选出这个句子中对应的了哪些词,然后并赋予索引值,其实就是个词库
    # token_index = {}
    # for sample in samples:
    # for word in sample.split():
    # if word not in token_index:
    # token_index[word] = len(token_index) + 1
    #
    # # 限制了读取的句子的长度,一句话最长10个词
    # print(token_index)
    # max_length = 10
    # results = np.zeros(shape=(len(samples),
    # max_length,
    # max(token_index.values()) + 1))
    #
    # # print(results) 2, 10, 11
    # for i, sample in enumerate(samples):
    # for j, word in list(enumerate(sample.split()))[:max_length]:
    # index = token_index.get(word)
    # results[i, j, index] = 1.
    # print(results)

    import numpy as np
    import string
    samples = ['The cat sat on the mat.', 'The dog ate my homework.']
    # 预先定义一个字符集 '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\]^_`{|}~‘
    characters = string.printable
    token_index = dict(zip(range(1, len(characters) + 1), characters))

    max_length = 50
    results = np.zeros((len(samples), max_length, max(token_index.keys()) + 1))
    for i, sample in enumerate(samples):
    for j, character in enumerate(sample):
    for key, value in token_index.items():
    if value == character:
    index = key
    results[i, j, index] = 1.


    print(results)
  • 相关阅读:
    nginx详解
    keeplived高可用集群
    mysql主从同步
    elasticsearch基础
    redis集群管理--sentinel
    socket阻塞与非阻塞,同步与异步,select,pool,epool
    django+channels+dephne实现websockrt部署
    Django+Nginx+uWSGI生产环境部署
    进制转换
    对golang指针的理解
  • 原文地址:https://www.cnblogs.com/shuimuqingyang/p/10422725.html
Copyright © 2020-2023  润新知