keras字符编码

https://www.jianshu.com/p/258a21ae0390
https://blog.csdn.net/apengpengpeng/article/details/80866034
#-*-coding:utf-8-*-
# import numpy as np
#
# samples = ['The cat sat on the mat.', 'The dog ate my homework.']
#
# # 10
# # 定义一个集合，得到{'The': 1, 'cat': 2, 'sat': 3, 'on': 4, 'the': 5, 'mat.': 6, 'dog': 7, 'ate': 8, 'my': 9, 'homework.': 10}，也就是筛选出这个句子中对应的了哪些词，然后并赋予索引值，其实就是个词库
# token_index = {}
# for sample in samples:
#     for word in sample.split():
#         if word not in token_index:
#             token_index[word] = len(token_index) + 1
#
# #　限制了读取的句子的长度，一句话最长10个词
# print(token_index)
# max_length = 10
# results = np.zeros(shape=(len(samples),
#                           max_length,
#                           max(token_index.values()) + 1))
#
# # print(results) 2, 10, 11
# for i, sample in enumerate(samples):
#     for j, word in list(enumerate(sample.split()))[:max_length]:
#         index = token_index.get(word)
#         results[i, j, index] = 1.
# print(results)

import numpy as np
import string
samples = ['The cat sat on the mat.', 'The dog ate my homework.']
# 预先定义一个字符集 '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\]^_`{|}~‘
characters = string.printable
token_index = dict(zip(range(1, len(characters) + 1), characters))

max_length = 50
results = np.zeros((len(samples), max_length, max(token_index.keys()) + 1))
for i, sample in enumerate(samples):
    for j, character in enumerate(sample):
        for key, value in token_index.items():
            if value == character:
                index = key
                results[i, j, index] = 1.


print(results)

相关阅读:
nginx详解
keeplived高可用集群
mysql主从同步
elasticsearch基础
redis集群管理--sentinel
socket阻塞与非阻塞，同步与异步，select，pool，epool
django+channels+dephne实现websockrt部署
Django+Nginx+uWSGI生产环境部署
进制转换
对golang指针的理解

原文地址：https://www.cnblogs.com/shuimuqingyang/p/10422725.html