Linux企業級專案實踐之網路爬蟲(5)——處理配置檔案

尹成發表於2014-08-28


配置檔案在Linux下使用得非常普遍,但是Linux下沒有統一個配置檔案標準。

我們把配置檔案的規則制定如下:

1、把“#”視作註釋開始

2、所有的配置項都都是以鍵值對的形式出現

3、嚴格區分大小寫

4、允許資料型別為整型的配置項

5、允許資料型別為字串型別的配置項

6、允許資料型別為邏輯型的配置項,取值為yes或者no。

 

同時我們需要對配置檔案做初始化和載入兩個操作。

 

程式碼如下:

/* confparser.c*/
 
#ifndef CONFPARSER_H
#define CONFPARSER_H
 
#include <vector>
using namespace std;
 
#define MAX_CONF_LEN  1024
#define CONF_FILE     "spider.conf"
 
/* see the spiderq.conf to get meaning foreach member variable below */
typedef struct Config {
   int              max_job_num;
   char            *seeds;
   char            *include_prefixes;
   char            *exclude_prefixes;
   char            *logfile;
   int              log_level;
   int              max_depth;
   int              make_hostdir;
   int              stat_interval;
 
   char *           module_path;
   vector<char *>   modules;
   vector<char *>  accept_types;
};
 
extern Config * initconfig();
 
extern void loadconfig(Config *conf);
 
#endif

 
/* confparser.c*/
 
#include "spider.h"
#include "qstring.h"
#include "confparser.h"
 
#define INF 0x7FFFFFFF
 
Config * initconfig()
{
   Config *conf = (Config *)malloc(sizeof(Config));
 
   conf->max_job_num = 10;
   conf->seeds = NULL;
   conf->include_prefixes = NULL;
   conf->exclude_prefixes = NULL;
   conf->logfile = NULL;
   conf->log_level = 0;
   conf->max_depth = INF;
   conf->make_hostdir = 0;
   conf->module_path = NULL;
   conf->stat_interval = 0;
   //conf->modules
 
   return conf;
}
 
void loadconfig(Config *conf)
{
   FILE *fp = NULL;
   char buf[MAX_CONF_LEN+1];
   int argc = 0;
   char **argv = NULL;
   int linenum = 0;
   char *line = NULL;
   const char *err = NULL;
 
   if ((fp = fopen(CONF_FILE, "r")) == NULL) {
       SPIDER_LOG(SPIDER_LEVEL_ERROR, "Can't load conf_file %s",CONF_FILE);      
    }
 
   while (fgets(buf, MAX_CONF_LEN+1, fp) != NULL) {
       linenum++;
       line = strim(buf);
 
       if (line[0] == '#' || line[0] == '\0') continue;
 
       argv = strsplit(line, '=', &argc, 1);
       if (argc == 2) {
           if (strcasecmp(argv[0], "max_job_num") == 0) {
                conf->max_job_num =atoi(argv[1]);
           } else if (strcasecmp(argv[0], "logfile") == 0) {
               conf->logfile =strdup(argv[1]);
           } else if (strcasecmp(argv[0], "include_prefixes") == 0) {
                conf->include_prefixes =strdup(argv[1]);
           } else if (strcasecmp(argv[0], "exclude_prefixes") == 0) {
                conf->exclude_prefixes =strdup(argv[1]);
           } else if (strcasecmp(argv[0], "seeds") == 0) {
                conf->seeds =strdup(argv[1]);
           } else if (strcasecmp(argv[0], "module_path") == 0) {
                conf->module_path =strdup(argv[1]);
           } else if (strcasecmp(argv[0], "load_module") == 0) {
               conf->modules.push_back(strdup(argv[1]));
           } else if (strcasecmp(argv[0], "log_level") == 0) {
                conf->log_level =atoi(argv[1]);
           } else if (strcasecmp(argv[0],"max_depth") == 0) {
                conf->max_depth =atoi(argv[1]);
           } else if (strcasecmp(argv[0], "stat_interval") == 0) {
                conf->stat_interval =atoi(argv[1]);
           } else if (strcasecmp(argv[0], "make_hostdir") == 0) {
                conf->make_hostdir =yesnotoi(argv[1]);
           } else if (strcasecmp(argv[0], "accept_types") == 0) {
               conf->accept_types.push_back(strdup(argv[1]));
            } else {
                err = "Unknowndirective"; goto conferr;
           }
       } else {
           err = "directive must be 'key=value'"; goto conferr;
       }
 
    }
   return;
 
conferr:
   SPIDER_LOG(SPIDER_LEVEL_ERROR, "Bad directive in %s[line:%d]%s", CONF_FILE, linenum, err);  
}


 

 


相關文章