• 词向量可视化--[tensorflow , python]


    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    """
    ----------------------------------
    Version    : ??
    File Name :     visual_vec.py
    Description :   
    Author  :       xijun1
    Email   :
    Date    :       2018/12/25
    -----------------------------------
    Change Activiy  :   2018/12/25
    -----------------------------------
    
    """
    __author__ = 'xijun1'
    from tqdm import tqdm
    import numpy as np
    import tensorflow as tf
    from tensorflow.contrib.tensorboard.plugins import projector
    import os
    import codecs
    
    words, embeddings = [], []
    log_path = 'model'
    
    with codecs.open('/Users/xxx/github/python_demo/vec.txt', 'r') as f:
        header = f.readline()
        vocab_size, vector_size = map(int, header.split())
        for line in tqdm(range(vocab_size)):
            word_list = f.readline().split(' ')
            word = word_list[0]
            vector = word_list[1:-1]
            if word == "":
                continue
            words.append(word)
            embeddings.append(np.array(vector))
    assert len(words) == len(embeddings)
    print(len(words))
    
    with tf.Session() as sess:
        X = tf.Variable([0.0], name='embedding')
        place = tf.placeholder(tf.float32, shape=[len(words), vector_size])
        set_x = tf.assign(X, place, validate_shape=False)
        sess.run(tf.global_variables_initializer())
        sess.run(set_x, feed_dict={place: embeddings})
        with codecs.open(log_path + '/metadata.tsv', 'w') as f:
            for word in tqdm(words):
                f.write(word + '
    ')
    
        # with summary
        summary_writer = tf.summary.FileWriter(log_path, sess.graph)
        config = projector.ProjectorConfig()
        embedding_conf = config.embeddings.add()
        embedding_conf.tensor_name = 'embedding:0'
        embedding_conf.metadata_path = os.path.join('metadata.tsv')
        projector.visualize_embeddings(summary_writer, config)
    
        # save
        saver = tf.train.Saver()
        saver.save(sess, os.path.join(log_path, "model.ckpt"))
    
    

    结果:

  • 相关阅读:
    git merge merge错误 —— 纠正
    copy —— docker cp & kubectl cp
    docker —— 获取 仓库中的 tag 列表
    课程——《深度学习的优化方法》
    基础知识篇(干货,次次都有新体悟)——十大经典排序算法2
    基础知识篇(干货,次次都有新体悟)——数据结构
    criteo 接口升级——MAPI deprecated
    CAP
    Redis 数据类型
    十大经典排序算法(转发)
  • 原文地址:https://www.cnblogs.com/gongxijun/p/10175937.html
Copyright © 2020-2023  润新知