• TensorFlow v2.0实现Word2Vec算法


    使用TensorFlow v2.0实现Word2Vec算法计算单词的向量表示,这个例子是使用一小部分维基百科文章来训练的。

    更多信息请查看论文: Mikolov, Tomas et al. “Efficient Estimation of Word Representations in Vector Space.”, 20131

    from __future__ import division, print_function, absolute_import
    
    import collections
    import os
    import random
    import urllib
    import zipfile
    
    import numpy as np
    import tensorflow as tf
    
    
    learning_rate = 0.1
    batch_size = 128
    num_steps = 3000000
    display_step = 10000
    eval_step = 200000
    
    # 训练参数
    learning_rate = 0.1
    batch_size = 128
    num_steps = 3000000
    display_step = 10000
    eval_step = 200000
    
    # 评估参数
    eval_words = ['five', 'of', 'going', 'hardware', 'american', 'britain']
    
    # Word2Vec 参数
    embedding_size = 200 # 嵌入向量的维度 vector.
    max_vocabulary_size = 50000 # 词汇表中不同单词的总数words in the vocabulary.
    min_occurrence = 10  # 删除出现小于n次的所有单词
    skip_window = 3 # 左右各要考虑多少个单词
    num_skips = 2 # 重复使用输入生成标签的次数
    num_sampled = 64 # 负采样数量
    
    # 下载一小部分维基百科文章集
    url = 'http://mattmahoney.net/dc/text8.zip'
    data_path = 'text8.zip'
    if not os.path.exists(data_path):
        print("Downloading the dataset... (It may take some time)")
        filename, _ = urllib.urlretrieve(url, data_path)
        print("Done!")
    
    # 解压数据集文件,文本已处理完毕
    with zipfile.ZipFile(data_path) as f:
        text_words = f.read(f.namelist()[0]).lower().split()
    
    # 构建词典并用 UNK 标记替换频数较低的词
    count = [('UNK', -1)]
    # 检索最常见的单词
    count.extend(collections.Counter(text_words).most_common(max_vocabulary_size - 1))
    # 删除少于'min_occurrence'次数的样本
    for i in range(len(count) - 1, -1, -1):
        if count[i][1] < min_occurrence:
            count.pop(i)
        else:
            #该集合是有序的,因此在当出现小于'min_occurrence'时停止
            break
    # 计算单词表单词个数
    vocabulary_size = len(count)
    # 为每一个词分配id
    word2id = dict()
    for i, (word, _)in enumerate(count):
        word2id[word] = i
    
    data = list()
    unk_count = 0
    for word in text_words:
         # 检索单词id,或者如果不在字典中则为其指定索引0('UNK')
        index = word2id.get(word, 0)
        if index == 0:
            unk_count  = 1
        data.append(index)
    count[0] = ('UNK', unk_count)
    id2word = dict(zip(word2id.values(), word2id.keys()))
    
    print("Words count:", len(text_words))
    print("Unique words:", len(set(text_words)))
    print("Vocabulary size:", vocabulary_size)
    print("Most common words:", count[:10])
    

    output:

    Words count: 17005207
    Unique words: 253854
    Vocabulary size: 47135
    Most common words: [('UNK', 444176), ('the', 1061396), ('of', 593677), ('and', 416629), ('one', 411764), ('in', 372201), ('a', 325873), ('to', 316376), ('zero', 264975), ('nine', 250430)]
    
    data_index = 0
    # 为skip-gram模型生成训练批次
    def next_batch(batch_size, num_skips, skip_window):
        global data_index
        assert batch_size % num_skips == 0
        assert num_skips <= 2 * skip_window
        batch = np.ndarray(shape=(batch_size), dtype=np.int32)
        labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
        # 得到窗口长度( 当前单词左边和右边   当前单词)
        span = 2 * skip_window   1
        buffer = collections.deque(maxlen=span)
        if data_index   span > len(data):
            data_index = 0
        buffer.extend(data[data_index:data_index   span])
        data_index  = span
        for i in range(batch_size // num_skips):
            context_words = [w for w in range(span) if w != skip_window]
            words_to_use = random.sample(context_words, num_skips)
            for j, context_word in enumerate(words_to_use):
                batch[i * num_skips   j] = buffer[skip_window]
                labels[i * num_skips   j, 0] = buffer[context_word]
            if data_index == len(data):
                buffer.extend(data[0:span])
                data_index = span
            else:
                buffer.append(data[data_index])
                data_index  = 1
        #回溯一点,以避免在批处理结束时跳过单词
        data_index = (data_index   len(data) - span) % len(data)
        return batch, labels
    
    # 确保在CPU上分配以下操作和变量
    # (某些操作在GPU上不兼容)
    with tf.device('/cpu:0'):
        # 创建嵌入变量(每一行代表一个词嵌入向量) embedding vector).
        embedding = tf.Variable(tf.random.normal([vocabulary_size, embedding_size]))
        # 构造NCE损失的变量
        nce_weights = tf.Variable(tf.random.normal([vocabulary_size, embedding_size]))
        nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
    
    def get_embedding(x):
        with tf.device('/cpu:0'):
           # 对于X中的每一个样本查找对应的嵌入向量
            x_embed = tf.nn.embedding_lookup(embedding, x)
            return x_embed
    
    def nce_loss(x_embed, y):
        with tf.device('/cpu:0'):
            # 计算批处理的平均NCE损失
            y = tf.cast(y, tf.int64)
            loss = tf.reduce_mean(
                tf.nn.nce_loss(weights=nce_weights,
                               biases=nce_biases,
                               labels=y,
                               inputs=x_embed,
                               num_sampled=num_sampled,
                               num_classes=vocabulary_size))
            return loss
    
    # 评估
    def evaluate(x_embed):
        with tf.device('/cpu:0'):
             # 计算输入数据嵌入与每个嵌入向量之间的余弦相似度
            x_embed = tf.cast(x_embed, tf.float32)
            x_embed_norm = x_embed / tf.sqrt(tf.reduce_sum(tf.square(x_embed)))
            embedding_norm = embedding / tf.sqrt(tf.reduce_sum(tf.square(embedding), 1, keepdims=True), tf.float32)
            cosine_sim_op = tf.matmul(x_embed_norm, embedding_norm, transpose_b=True)
            return cosine_sim_op
    
    # 定义优化器
    optimizer = tf.optimizers.SGD(learning_rate)
    
    # 优化过程
    def run_optimization(x, y):
        with tf.device('/cpu:0'):
           # 将计算封装在GradientTape中以实现自动微分
            with tf.GradientTape() as g:
                emb = get_embedding(x)
                loss = nce_loss(emb, y)
    
            # 计算梯度
            gradients = g.gradient(loss, [embedding, nce_weights, nce_biases])
    
             # 按gradients更新 W 和 b
            optimizer.apply_gradients(zip(gradients, [embedding, nce_weights, nce_biases]))
    
    # 用于测试的单词
    x_test = np.array([word2id[w] for w in eval_words])
    
    # 针对给定步骤数进行训练
    for step in xrange(1, num_steps   1):
        batch_x, batch_y = next_batch(batch_size, num_skips, skip_window)
        run_optimization(batch_x, batch_y)
        
        if step % display_step == 0 or step == 1:
            loss = nce_loss(get_embedding(batch_x), batch_y)
            print("step: %i, loss: %f" % (step, loss))
            
        # 评估
        if step % eval_step == 0 or step == 1:
            print("Evaluation...")
            sim = evaluate(get_embedding(x_test)).numpy()
            for i in xrange(len(eval_words)):
                top_k = 8  # 最相似的单词数量
                nearest = (-sim[i, :]).argsort()[1:top_k   1]
                log_str = '"%s" nearest neighbors:' % eval_words[i]
                for k in xrange(top_k):
                    log_str = '%s %s,' % (log_str, id2word[nearest[k]])
                print(log_str)
    
    step: 1, loss: 504.444214
    Evaluation...
    "five" nearest neighbors: censure, stricken, anglicanism, stick, streetcars, shrines, horrified, sparkle,
    "of" nearest neighbors: jolly, weary, clinicians, kerouac, economist, owls, safe, playoff,
    "going" nearest neighbors: filament, platforms, moderately, micheal, despotic, krag, disclosed, your,
    "hardware" nearest neighbors: occupants, paraffin, vera, reorganized, rename, declares, prima, condoned,
    "american" nearest neighbors: portfolio, rhein, aalto, angle, lifeson, tucker, sexton, dench,
    "britain" nearest neighbors: indivisible, disbelief, scripture, pepsi, scriptores, sighting, napalm, strike,
    step: 10000, loss: 117.166962
    step: 20000, loss: 65.478333
    step: 30000, loss: 46.580460
    step: 40000, loss: 25.563128
    step: 50000, loss: 50.924446
    step: 60000, loss: 51.696526
    step: 70000, loss: 17.272142
    step: 80000, loss: 32.579414
    step: 90000, loss: 68.372032
    step: 100000, loss: 36.026573
    step: 110000, loss: 22.502020
    step: 120000, loss: 15.788742
    step: 130000, loss: 31.832420
    step: 140000, loss: 25.096617
    step: 150000, loss: 12.013027
    step: 160000, loss: 20.574780
    step: 170000, loss: 12.201975
    step: 180000, loss: 20.983793
    step: 190000, loss: 11.366720
    step: 200000, loss: 19.431549
    Evaluation...
    "five" nearest neighbors: three, four, eight, six, two, seven, nine, zero,
    "of" nearest neighbors: the, a, and, first, with, on, but, from,
    "going" nearest neighbors: have, more, used, out, be, with, on, however,
    "hardware" nearest neighbors: be, known, system, apollo, and, a, such, used,
    "american" nearest neighbors: UNK, and, from, s, at, in, after, about,
    "britain" nearest neighbors: of, and, many, the, as, used, but, such,
    step: 210000, loss: 16.361233
    step: 220000, loss: 17.529526
    step: 230000, loss: 16.805817
    step: 240000, loss: 6.365625
    step: 250000, loss: 8.083097
    step: 260000, loss: 11.262514
    step: 270000, loss: 9.842708
    step: 280000, loss: 6.363440
    step: 290000, loss: 8.732617
    step: 300000, loss: 10.484728
    step: 310000, loss: 12.099487
    step: 320000, loss: 11.496288
    step: 330000, loss: 9.283813
    step: 340000, loss: 10.777218
    step: 350000, loss: 16.310440
    step: 360000, loss: 7.495782
    step: 370000, loss: 9.287696
    step: 380000, loss: 6.982735
    step: 390000, loss: 8.549622
    step: 400000, loss: 8.388112
    Evaluation...
    "five" nearest neighbors: four, three, six, two, seven, eight, one, zero,
    "of" nearest neighbors: the, a, with, also, for, and, which, by,
    "going" nearest neighbors: have, are, both, called, being, a, of, had,
    "hardware" nearest neighbors: may, de, some, have, so, which, other, also,
    "american" nearest neighbors: s, british, UNK, from, in, including, first, see,
    "britain" nearest neighbors: against, include, including, both, british, other, an, most,
    step: 410000, loss: 8.757725
    step: 420000, loss: 12.303110
    step: 430000, loss: 12.325478
    step: 440000, loss: 7.659882
    step: 450000, loss: 6.028089
    step: 460000, loss: 12.700299
    step: 470000, loss: 7.063077
    step: 480000, loss: 18.004183
    step: 490000, loss: 7.510474
    step: 500000, loss: 10.089376
    step: 510000, loss: 11.404436
    step: 520000, loss: 9.494527
    step: 530000, loss: 7.797963
    step: 540000, loss: 7.390718
    step: 550000, loss: 13.911215
    step: 560000, loss: 6.975731
    step: 570000, loss: 6.179163
    step: 580000, loss: 7.066525
    step: 590000, loss: 6.487288
    step: 600000, loss: 5.361528
    Evaluation...
    "five" nearest neighbors: four, six, three, seven, two, one, eight, zero,
    "of" nearest neighbors: the, and, from, with, a, including, in, include,
    "going" nearest neighbors: have, even, they, term, who, many, which, were,
    "hardware" nearest neighbors: include, computer, an, which, other, each, than, may,
    "american" nearest neighbors: english, french, s, german, from, in, film, see,
    "britain" nearest neighbors: several, first, modern, part, government, german, was, were,
    step: 610000, loss: 4.144980
    step: 620000, loss: 5.865635
    step: 630000, loss: 6.826498
    step: 640000, loss: 8.376097
    step: 650000, loss: 7.117930
    step: 660000, loss: 7.639544
    step: 670000, loss: 5.973255
    step: 680000, loss: 4.908459
    step: 690000, loss: 6.164993
    step: 700000, loss: 7.360281
    step: 710000, loss: 12.693079
    step: 720000, loss: 6.410182
    step: 730000, loss: 7.499201
    step: 740000, loss: 6.509094
    step: 750000, loss: 10.625893
    step: 760000, loss: 7.177696
    step: 770000, loss: 12.639092
    step: 780000, loss: 8.441635
    step: 790000, loss: 7.529139
    step: 800000, loss: 6.579177
    Evaluation...
    "five" nearest neighbors: four, three, six, seven, eight, two, one, zero,
    "of" nearest neighbors: and, with, in, the, its, from, by, including,
    "going" nearest neighbors: have, they, how, include, people, however, also, their,
    "hardware" nearest neighbors: computer, large, include, may, or, which, other, there,
    "american" nearest neighbors: born, french, british, english, german, b, john, d,
    "britain" nearest neighbors: country, including, include, general, part, various, several, by,
    step: 810000, loss: 6.934138
    step: 820000, loss: 5.686094
    step: 830000, loss: 7.310243
    step: 840000, loss: 5.028157
    step: 850000, loss: 7.079705
    step: 860000, loss: 6.768996
    step: 870000, loss: 5.604030
    step: 880000, loss: 8.208309
    step: 890000, loss: 6.301597
    step: 900000, loss: 5.733234
    step: 910000, loss: 6.577081
    step: 920000, loss: 6.774826
    step: 930000, loss: 7.068932
    step: 940000, loss: 6.694956
    step: 950000, loss: 7.944673
    step: 960000, loss: 5.988618
    step: 970000, loss: 6.651366
    step: 980000, loss: 4.595577
    step: 990000, loss: 6.564834
    step: 1000000, loss: 4.327858
    Evaluation...
    "five" nearest neighbors: four, three, seven, six, eight, two, nine, zero,
    "of" nearest neighbors: the, first, and, became, from, under, at, with,
    "going" nearest neighbors: others, has, then, have, how, become, had, also,
    "hardware" nearest neighbors: computer, large, systems, these, different, either, include, using,
    "american" nearest neighbors: b, born, d, UNK, nine, english, german, french,
    "britain" nearest neighbors: government, island, local, country, by, including, control, within,
    step: 1010000, loss: 5.841236
    step: 1020000, loss: 5.805200
    step: 1030000, loss: 9.962063
    step: 1040000, loss: 6.281199
    step: 1050000, loss: 7.147995
    step: 1060000, loss: 5.721184
    step: 1070000, loss: 7.080662
    step: 1080000, loss: 6.638658
    step: 1090000, loss: 5.814178
    step: 1100000, loss: 5.195928
    step: 1110000, loss: 6.724787
    step: 1120000, loss: 6.503905
    step: 1130000, loss: 5.762966
    step: 1140000, loss: 5.790243
    step: 1150000, loss: 5.958191
    step: 1160000, loss: 5.997983
    step: 1170000, loss: 7.065348
    step: 1180000, loss: 6.073387
    step: 1190000, loss: 6.644097
    step: 1200000, loss: 5.934450
    Evaluation...
    "five" nearest neighbors: three, four, six, eight, seven, two, nine, zero,
    "of" nearest neighbors: the, and, including, in, its, with, from, on,
    "going" nearest neighbors: others, then, through, has, had, another, people, when,
    "hardware" nearest neighbors: computer, control, systems, either, these, large, small, other,
    "american" nearest neighbors: born, german, john, d, british, b, UNK, french,
    "britain" nearest neighbors: local, against, british, island, country, general, including, within,
    step: 1210000, loss: 5.832344
    step: 1220000, loss: 6.453851
    step: 1230000, loss: 6.583966
    step: 1240000, loss: 5.571673
    step: 1250000, loss: 5.720917
    step: 1260000, loss: 7.663424
    step: 1270000, loss: 6.583741
    step: 1280000, loss: 8.503859
    step: 1290000, loss: 5.540640
    step: 1300000, loss: 6.703249
    step: 1310000, loss: 5.274101
    step: 1320000, loss: 5.846446
    step: 1330000, loss: 5.438172
    step: 1340000, loss: 6.367691
    step: 1350000, loss: 6.558622
    step: 1360000, loss: 9.822924
    step: 1370000, loss: 4.982378
    step: 1380000, loss: 6.159739
    step: 1390000, loss: 5.819083
    step: 1400000, loss: 7.775135
    Evaluation...
    "five" nearest neighbors: four, three, six, seven, two, eight, one, zero,
    "of" nearest neighbors: and, the, in, with, its, within, for, including,
    "going" nearest neighbors: others, through, while, has, to, how, particularly, their,
    "hardware" nearest neighbors: computer, systems, large, control, research, using, information, either,
    "american" nearest neighbors: english, french, german, born, film, british, s, former,
    "britain" nearest neighbors: british, country, europe, local, military, island, against, western,
    step: 1410000, loss: 8.214248
    step: 1420000, loss: 4.696859
    step: 1430000, loss: 5.873761
    step: 1440000, loss: 5.971557
    step: 1450000, loss: 4.992722
    step: 1460000, loss: 5.197714
    step: 1470000, loss: 6.916918
    step: 1480000, loss: 6.441984
    step: 1490000, loss: 5.443647
    step: 1500000, loss: 5.178482
    step: 1510000, loss: 6.060414
    step: 1520000, loss: 6.373306
    step: 1530000, loss: 5.098322
    step: 1540000, loss: 6.674916
    step: 1550000, loss: 6.712685
    step: 1560000, loss: 5.280202
    step: 1570000, loss: 6.454964
    step: 1580000, loss: 4.896697
    step: 1590000, loss: 6.239226
    step: 1600000, loss: 5.709726
    Evaluation...
    "five" nearest neighbors: three, four, two, six, seven, eight, one, zero,
    "of" nearest neighbors: the, and, including, in, with, within, its, following,
    "going" nearest neighbors: others, people, who, they, that, far, were, have,
    "hardware" nearest neighbors: computer, systems, include, high, research, some, information, large,
    "american" nearest neighbors: born, english, french, british, german, d, john, b,
    "britain" nearest neighbors: country, military, china, europe, against, local, central, british,
    step: 1610000, loss: 6.334940
    step: 1620000, loss: 5.093616
    step: 1630000, loss: 6.119366
    step: 1640000, loss: 4.975187
    step: 1650000, loss: 6.490408
    step: 1660000, loss: 7.464082
    step: 1670000, loss: 4.977184
    step: 1680000, loss: 5.658133
    step: 1690000, loss: 5.352454
    step: 1700000, loss: 6.810776
    step: 1710000, loss: 5.687447
    step: 1720000, loss: 5.992206
    step: 1730000, loss: 5.513011
    step: 1740000, loss: 5.548522
    step: 1750000, loss: 6.200248
    step: 1760000, loss: 13.070073
    step: 1770000, loss: 4.621058
    step: 1780000, loss: 5.301342
    step: 1790000, loss: 4.777030
    step: 1800000, loss: 6.912136
    Evaluation...
    "five" nearest neighbors: three, four, six, seven, eight, two, nine, zero,
    "of" nearest neighbors: the, in, first, from, became, and, following, under,
    "going" nearest neighbors: others, their, through, which, therefore, open, how, that,
    "hardware" nearest neighbors: computer, systems, include, research, standard, different, system, small,
    "american" nearest neighbors: b, d, born, actor, UNK, english, nine, german,
    "britain" nearest neighbors: china, country, europe, against, canada, military, island, including,
    step: 1810000, loss: 5.584600
    step: 1820000, loss: 5.619820
    step: 1830000, loss: 6.078709
    step: 1840000, loss: 5.052518
    step: 1850000, loss: 5.430106
    step: 1860000, loss: 7.396770
    step: 1870000, loss: 5.344787
    step: 1880000, loss: 5.937998
    step: 1890000, loss: 5.706491
    step: 1900000, loss: 5.140662
    step: 1910000, loss: 5.607048
    step: 1920000, loss: 5.407231
    step: 1930000, loss: 6.238531
    step: 1940000, loss: 5.567973
    step: 1950000, loss: 4.894245
    step: 1960000, loss: 6.104193
    step: 1970000, loss: 5.282631
    step: 1980000, loss: 6.189069
    step: 1990000, loss: 6.169409
    step: 2000000, loss: 6.470152
    Evaluation...
    "five" nearest neighbors: four, three, six, seven, eight, two, nine, zero,
    "of" nearest neighbors: the, its, in, with, and, including, within, against,
    "going" nearest neighbors: others, only, therefore, will, how, a, far, though,
    "hardware" nearest neighbors: computer, systems, for, network, software, program, research, system,
    "american" nearest neighbors: born, actor, d, italian, german, john, robert, b,
    "britain" nearest neighbors: china, country, europe, canada, british, former, island, france,
    step: 2010000, loss: 5.298714
    step: 2020000, loss: 5.494207
    step: 2030000, loss: 5.410875
    step: 2040000, loss: 6.228232
    step: 2050000, loss: 5.044596
    step: 2060000, loss: 4.624638
    step: 2070000, loss: 4.919327
    step: 2080000, loss: 4.639625
    step: 2090000, loss: 4.865627
    step: 2100000, loss: 4.951073
    step: 2110000, loss: 5.973768
    step: 2120000, loss: 7.366824
    step: 2130000, loss: 5.149571
    step: 2140000, loss: 7.846234
    step: 2150000, loss: 5.449315
    step: 2160000, loss: 5.359211
    step: 2170000, loss: 5.171029
    step: 2180000, loss: 6.106437
    step: 2190000, loss: 6.043995
    step: 2200000, loss: 5.642351
    Evaluation...
    "five" nearest neighbors: four, three, six, two, eight, seven, zero, one,
    "of" nearest neighbors: the, and, its, see, for, in, with, including,
    "going" nearest neighbors: others, therefore, how, even, them, your, have, although,
    "hardware" nearest neighbors: computer, systems, system, network, program, research, software, include,
    "american" nearest neighbors: english, french, german, canadian, british, film, author, italian,
    "britain" nearest neighbors: europe, china, country, germany, british, england, france, throughout,
    step: 2210000, loss: 4.427110
    step: 2220000, loss: 6.240989
    step: 2230000, loss: 5.184978
    step: 2240000, loss: 8.035570
    step: 2250000, loss: 5.793781
    step: 2260000, loss: 4.908427
    step: 2270000, loss: 8.807668
    step: 2280000, loss: 6.083229
    step: 2290000, loss: 5.773360
    step: 2300000, loss: 5.613671
    step: 2310000, loss: 6.080076
    step: 2320000, loss: 5.288568
    step: 2330000, loss: 5.949232
    step: 2340000, loss: 5.479994
    step: 2350000, loss: 7.717686
    step: 2360000, loss: 5.163609
    step: 2370000, loss: 5.989407
    step: 2380000, loss: 5.785729
    step: 2390000, loss: 5.345478
    step: 2400000, loss: 6.627133
    Evaluation...
    "five" nearest neighbors: three, four, six, two, seven, eight, zero, nine,
    "of" nearest neighbors: the, in, and, including, from, within, its, with,
    "going" nearest neighbors: therefore, people, they, out, only, according, your, now,
    "hardware" nearest neighbors: computer, systems, network, program, system, software, run, design,
    "american" nearest neighbors: author, born, actor, english, canadian, british, italian, d,
    "britain" nearest neighbors: china, europe, country, throughout, france, canada, england, western,
    step: 2410000, loss: 5.666146
    step: 2420000, loss: 5.316198
    step: 2430000, loss: 5.129625
    step: 2440000, loss: 5.247949
    step: 2450000, loss: 5.741394
    step: 2460000, loss: 5.833083
    step: 2470000, loss: 7.704844
    step: 2480000, loss: 5.398345
    step: 2490000, loss: 5.089633
    step: 2500000, loss: 5.620508
    step: 2510000, loss: 4.976034
    step: 2520000, loss: 5.884676
    step: 2530000, loss: 6.649922
    step: 2540000, loss: 5.002588
    step: 2550000, loss: 5.072144
    step: 2560000, loss: 5.165375
    step: 2570000, loss: 5.310089
    step: 2580000, loss: 5.481957
    step: 2590000, loss: 6.104440
    step: 2600000, loss: 5.339644
    Evaluation...
    "five" nearest neighbors: three, four, six, seven, eight, nine, two, zero,
    "of" nearest neighbors: the, first, from, with, became, in, following, and,
    "going" nearest neighbors: how, therefore, back, will, through, always, your, make,
    "hardware" nearest neighbors: computer, systems, system, network, program, technology, design, software,
    "american" nearest neighbors: actor, singer, born, b, author, d, english, writer,
    "britain" nearest neighbors: europe, china, throughout, great, england, france, country, india,
    step: 2610000, loss: 7.754117
    step: 2620000, loss: 5.979313
    step: 2630000, loss: 5.394362
    step: 2640000, loss: 4.866740
    step: 2650000, loss: 5.219806
    step: 2660000, loss: 6.074809
    step: 2670000, loss: 6.216953
    step: 2680000, loss: 5.944881
    step: 2690000, loss: 5.863350
    step: 2700000, loss: 6.128705
    step: 2710000, loss: 5.502523
    step: 2720000, loss: 5.300839
    step: 2730000, loss: 6.358493
    step: 2740000, loss: 6.058306
    step: 2750000, loss: 4.689510
    step: 2760000, loss: 6.032880
    step: 2770000, loss: 5.844904
    step: 2780000, loss: 5.385874
    step: 2790000, loss: 5.370956
    step: 2800000, loss: 4.912577
    Evaluation...
    "five" nearest neighbors: four, six, three, eight, seven, two, nine, one,
    "of" nearest neighbors: in, the, and, from, including, following, with, under,
    "going" nearest neighbors: your, then, through, will, how, so, back, even,
    "hardware" nearest neighbors: computer, systems, program, network, design, standard, physical, software,
    "american" nearest neighbors: actor, singer, born, author, writer, canadian, italian, d,
    "britain" nearest neighbors: europe, china, england, throughout, france, india, great, germany,
    step: 2810000, loss: 5.897756
    step: 2820000, loss: 7.194932
    step: 2830000, loss: 7.430175
    step: 2840000, loss: 7.258231
    step: 2850000, loss: 5.837617
    step: 2860000, loss: 5.496673
    step: 2870000, loss: 6.173716
    step: 2880000, loss: 6.095749
    step: 2890000, loss: 6.064944
    step: 2900000, loss: 5.560488
    step: 2910000, loss: 4.966107
    step: 2920000, loss: 5.789579
    1. step: 2930000, loss: 4.525987
    step: 2940000, loss: 6.704808
    step: 2950000, loss: 4.506433
    step: 2960000, loss: 6.251270
    step: 2970000, loss: 5.588204
    step: 2980000, loss: 5.423235
    step: 2990000, loss: 5.613834
    step: 3000000, loss: 5.137326
    Evaluation...
    "five" nearest neighbors: four, three, six, seven, eight, two, zero, one,
    "of" nearest neighbors: the, including, and, with, in, its, includes, within,
    "going" nearest neighbors: how, they, when, them, make, always, your, though,
    "hardware" nearest neighbors: computer, systems, network, program, physical, design, technology, software,
    "american" nearest neighbors: canadian, english, australian, british, german, film, italian, author,
    "britain" nearest neighbors: europe, england, china, throughout, india, france, great, british,
    

    欢迎关注磐创博客资源汇总站:
    http://docs.panchuang.net/

    欢迎关注PyTorch官方中文教程站:
    http://pytorch.panchuang.net/


    1. https://arxiv.org/pdf/1301.3781.pdf ↩︎

  • 相关阅读:
    LeetCode 230. 二叉搜索树中第K小的元素(Kth Smallest Element in a BST)
    LeetCode 216. 组合总和 III(Combination Sum III)
    LeetCode 179. 最大数(Largest Number)
    LeetCode 199. 二叉树的右视图(Binary Tree Right Side View)
    LeetCode 114. 二叉树展开为链表(Flatten Binary Tree to Linked List)
    LeetCode 106. 从中序与后序遍历序列构造二叉树(Construct Binary Tree from Inorder and Postorder Traversal)
    指针变量、普通变量、内存和地址的全面对比
    MiZ702学习笔记8——让MiZ702变身PC的方法
    你可能不知道的,定义,声明,初始化
    原创zynq文章整理(MiZ702教程+例程)
  • 原文地址:https://www.cnblogs.com/panchuangai/p/12567958.html
Copyright © 2020-2023  润新知