• Linux企业级项目实践之网络爬虫(5)——处理配置文件



    配置文件在Linux下使用得非常普遍,但是Linux下没有统一个配置文件标准。

    我们把配置文件的规则制定如下:

    1、把“#”视作注释开始

    2、所有的配置项都都是以键值对的形式出现

    3、严格区分大小写

    4、允许数据类型为整型的配置项

    5、允许数据类型为字符串类型的配置项

    6、允许数据类型为逻辑型的配置项,取值为yes或者no。

    同时我们需要对配置文件做初始化和载入两个操作。

    代码如下:

    /* confparser.c*/
     
    #ifndef CONFPARSER_H
    #define CONFPARSER_H
     
    #include <vector>
    using namespace std;
     
    #define MAX_CONF_LEN  1024
    #define CONF_FILE     "spider.conf"
     
    /* see the spiderq.conf to get meaning foreach member variable below */
    typedef struct Config {
       int              max_job_num;
       char            *seeds;
       char            *include_prefixes;
       char            *exclude_prefixes;
       char            *logfile;
       int              log_level;
       int              max_depth;
       int              make_hostdir;
       int              stat_interval;
     
       char *           module_path;
       vector<char *>   modules;
       vector<char *>  accept_types;
    };
     
    extern Config * initconfig();
     
    extern void loadconfig(Config *conf);
     
    #endif

     
    /* confparser.c*/
     
    #include "spider.h"
    #include "qstring.h"
    #include "confparser.h"
     
    #define INF 0x7FFFFFFF
     
    Config * initconfig()
    {
       Config *conf = (Config *)malloc(sizeof(Config));
     
       conf->max_job_num = 10;
       conf->seeds = NULL;
       conf->include_prefixes = NULL;
       conf->exclude_prefixes = NULL;
       conf->logfile = NULL;
       conf->log_level = 0;
       conf->max_depth = INF;
       conf->make_hostdir = 0;
       conf->module_path = NULL;
       conf->stat_interval = 0;
       //conf->modules
     
       return conf;
    }
     
    void loadconfig(Config *conf)
    {
       FILE *fp = NULL;
       char buf[MAX_CONF_LEN+1];
       int argc = 0;
       char **argv = NULL;
       int linenum = 0;
       char *line = NULL;
       const char *err = NULL;
     
       if ((fp = fopen(CONF_FILE, "r")) == NULL) {
           SPIDER_LOG(SPIDER_LEVEL_ERROR, "Can't load conf_file %s",CONF_FILE);      
        }
     
       while (fgets(buf, MAX_CONF_LEN+1, fp) != NULL) {
           linenum++;
           line = strim(buf);
     
           if (line[0] == '#' || line[0] == '') continue;
     
           argv = strsplit(line, '=', &argc, 1);
           if (argc == 2) {
               if (strcasecmp(argv[0], "max_job_num") == 0) {
                    conf->max_job_num =atoi(argv[1]);
               } else if (strcasecmp(argv[0], "logfile") == 0) {
                   conf->logfile =strdup(argv[1]);
               } else if (strcasecmp(argv[0], "include_prefixes") == 0) {
                    conf->include_prefixes =strdup(argv[1]);
               } else if (strcasecmp(argv[0], "exclude_prefixes") == 0) {
                    conf->exclude_prefixes =strdup(argv[1]);
               } else if (strcasecmp(argv[0], "seeds") == 0) {
                    conf->seeds =strdup(argv[1]);
               } else if (strcasecmp(argv[0], "module_path") == 0) {
                    conf->module_path =strdup(argv[1]);
               } else if (strcasecmp(argv[0], "load_module") == 0) {
                   conf->modules.push_back(strdup(argv[1]));
               } else if (strcasecmp(argv[0], "log_level") == 0) {
                    conf->log_level =atoi(argv[1]);
               } else if (strcasecmp(argv[0],"max_depth") == 0) {
                    conf->max_depth =atoi(argv[1]);
               } else if (strcasecmp(argv[0], "stat_interval") == 0) {
                    conf->stat_interval =atoi(argv[1]);
               } else if (strcasecmp(argv[0], "make_hostdir") == 0) {
                    conf->make_hostdir =yesnotoi(argv[1]);
               } else if (strcasecmp(argv[0], "accept_types") == 0) {
                   conf->accept_types.push_back(strdup(argv[1]));
                } else {
                    err = "Unknowndirective"; goto conferr;
               }
           } else {
               err = "directive must be 'key=value'"; goto conferr;
           }
     
        }
       return;
     
    conferr:
       SPIDER_LOG(SPIDER_LEVEL_ERROR, "Bad directive in %s[line:%d]%s", CONF_FILE, linenum, err);  
    }



  • 相关阅读:
    我所经历的大文件数据导出(后台执行,自动生成)
    snowflake ID生成器
    docker搭建php环境
    全局唯一随机邀请码实现方式
    sitemap xml文件生成
    浏览器输入一个地址的过程分析
    DNS解析全过程分析
    nginx编译安装on mac
    nginx image_filter 配置记录
    ImageMagick简单记录
  • 原文地址:https://www.cnblogs.com/new0801/p/6177006.html
Copyright © 2020-2023  润新知