• Stanford cs224n 第三课: GloVe 代码解读


    Makefile

      Makefile是linux中特有的一种文件, 方便自动化编译。 GloVe的源码是用C语言编写的, 在linux的环境当中需要编写一个Makefile文件来编译。关于Makefile文件的介绍可移步Linux Makefile与make命令详细介绍。下面只解读程序当中用到的知识点。执行make的时候会生成 .o 文件, 之后会链接 link, 我也不是很懂  : ( 。

      

    CC = gcc                    # 声明一个变量CC来表示 gcc
    #For older gcc, use -O3 or -O2 instead of -Ofast
    CFLAGS = -lm -pthread -Ofast -march=native -funroll-loops -Wno-unused-result
    BUILDDIR := build           # 这里的 "=" 前面为什么会有一个 “:", 没有理解。 猜测和”=“的功能是一样的
    SRCDIR := src
    
    all: dir glove shuffle cooccur vocab_count  # 第一个目标, 其后面所有的依赖项会执行
    
    dir :
        mkdir -p $(BUILDDIR)     # 生成build文件夹, ”$"符号解析括号中变量的内容,也就是build
    glove : $(SRCDIR)/glove.c
        $(CC) $(SRCDIR)/glove.c -o $(BUILDDIR)/glove $(CFLAGS)  # $(CC) = gcc
    shuffle : $(SRCDIR)/shuffle.c
        $(CC) $(SRCDIR)/shuffle.c -o $(BUILDDIR)/shuffle $(CFLAGS)
    cooccur : $(SRCDIR)/cooccur.c
        $(CC) $(SRCDIR)/cooccur.c -o $(BUILDDIR)/cooccur $(CFLAGS)
    vocab_count : $(SRCDIR)/vocab_count.c
        $(CC) $(SRCDIR)/vocab_count.c -o $(BUILDDIR)/vocab_count $(CFLAGS)
    
    clean:
    rm -rf glove shuffle cooccur vocab_count build
    #!/bin/bash
    set -e        # 设置环境, 发生错误就退出
    
    # Makes programs, downloads sample data, trains a GloVe model, and then evaluates it.
    # One optional argument can specify the language used for eval script: matlab, octave or [default] python
    
    make          # 编译Makefile中的内容
    if [ ! -e text8 ]; then        # 检查文件 text8 是否存在
      if hash wget 2>/dev/null; then       # 这行代码没有理解到, 求大佬指点
        wget http://mattmahoney.net/dc/text8.zip
      else
        curl -O http://mattmahoney.net/dc/text8.zip
      fi
      unzip text8.zip
      rm text8.zip
    fi
    
    CORPUS=text8
    VOCAB_FILE=vocab.txt
    COOCCURRENCE_FILE=cooccurrence.bin
    COOCCURRENCE_SHUF_FILE=cooccurrence.shuf.bin
    BUILDDIR=build
    SAVE_FILE=vectors
    VERBOSE=2
    MEMORY=4.0
    VOCAB_MIN_COUNT=5
    VECTOR_SIZE=50
    MAX_ITER=15
    WINDOW_SIZE=15
    BINARY=2
    NUM_THREADS=8
    X_MAX=10
    
    echo "$ $BUILDDIR/vocab_count -min-count $VOCAB_MIN_COUNT -verbose $VERBOSE < $CORPUS > $VOCAB_FILE"
    $BUILDDIR/vocab_count -min-count $VOCAB_MIN_COUNT -verbose $VERBOSE < $CORPUS > $VOCAB_FILE       # 这行代码该怎么理解
    echo "$ $BUILDDIR/cooccur -memory $MEMORY -vocab-file $VOCAB_FILE -verbose $VERBOSE -window-size $WINDOW_SIZE < $CORPUS > $COOCCURRENCE_FILE"
    $BUILDDIR/cooccur -memory $MEMORY -vocab-file $VOCAB_FILE -verbose $VERBOSE -window-size $WINDOW_SIZE < $CORPUS > $COOCCURRENCE_FILE
    echo "$ $BUILDDIR/shuffle -memory $MEMORY -verbose $VERBOSE < $COOCCURRENCE_FILE > $COOCCURRENCE_SHUF_FILE"
    $BUILDDIR/shuffle -memory $MEMORY -verbose $VERBOSE < $COOCCURRENCE_FILE > $COOCCURRENCE_SHUF_FILE
    echo "$ $BUILDDIR/glove -save-file $SAVE_FILE -threads $NUM_THREADS -input-file $COOCCURRENCE_SHUF_FILE -x-max $X_MAX -iter $MAX_ITER -vector-size $VECTOR_SIZE -binary $BINARY -vocab-file $VOCAB_FILE -verbose $VERBOSE"
    $BUILDDIR/glove -save-file $SAVE_FILE -threads $NUM_THREADS -input-file $COOCCURRENCE_SHUF_FILE -x-max $X_MAX -iter $MAX_ITER -vector-size $VECTOR_SIZE -binary $BINARY -vocab-file $VOCAB_FILE -verbose $VERBOSE
    if [ "$CORPUS" = 'text8' ]; then
       if [ "$1" = 'matlab' ]; then
           matlab -nodisplay -nodesktop -nojvm -nosplash < ./eval/matlab/read_and_evaluate.m 1>&2 
       elif [ "$1" = 'octave' ]; then
           octave < ./eval/octave/read_and_evaluate_octave.m 1>&2
       else
           echo "$ python eval/python/evaluate.py"
           python eval/python/evaluate.py
       fi
    fi
    
  • 相关阅读:
    python 全栈开发大纲
    2018/6/22 晚
    python——小知识
    变量与常量
    1.2数据结构-抽象数据类型的表示和实现
    1.1数据结构-基本概念和术语
    人机交互实践04-定位作业
    人机交互实践04-图像浮动至右边
    人机交互实践03-课堂作业2
    人机交互实践03-链接到的网页
  • 原文地址:https://www.cnblogs.com/yangkang77/p/8551902.html
Copyright © 2020-2023  润新知