当我们设计好程序框架之后就要开始实现它了。第一步当然是要实现主程序的流程框架。之后我们逐渐填充每个流程的细节和其需要调用的模块。
主程序的流程如下:
1、 解析命令行参数,并根据参数跳转到相应的处理分支
2、 解析配置文件
3、 载入处理模块
4、 加载种子URL
5、 启动抓取任务
主程序的代码如下:
int main(int argc, void *argv[]) { struct epoll_event events[10]; int daemonized = 0; char ch; while ((ch = getopt(argc, (char* const*)argv, "vhd")) != -1) { switch(ch) { case 'v': version(); break; case 'd': daemonized = 1; break; case 'h': case '?': default: usage(); } } g_conf = initconfig(); loadconfig(g_conf); set_nofile(1024); vector<char *>::iterator it = g_conf->modules.begin(); for(; it != g_conf->modules.end(); it++) { dso_load(g_conf->module_path, *it); } if (g_conf->seeds == NULL) { SPIDER_LOG(SPIDER_LEVEL_ERROR, "Wehave no seeds, Buddy!"); } else { int c = 0; char ** splits =strsplit(g_conf->seeds, ',', &c, 0); while (c--) { Surl * surl = (Surl*)malloc(sizeof(Surl)); surl->url =url_normalized(strdup(splits[c])); surl->level = 0; surl->type = TYPE_HTML; if (surl->url != NULL) push_surlqueue(surl); } } if (daemonized) daemonize(); chdir("download"); int err = -1; if ((err = create_thread(urlparser, NULL, NULL, NULL)) < 0) { SPIDER_LOG(SPIDER_LEVEL_ERROR,"Create urlparser thread fail: %s", strerror(err)); } int try_num = 1; while(try_num < 8 && is_ourlqueue_empty()) usleep((10000 << try_num++)); if (try_num >= 8) { SPIDER_LOG(SPIDER_LEVEL_ERROR, "NOourl! DNS parse error?"); } if (g_conf->stat_interval > 0) { signal(SIGALRM, stat); set_ticker(g_conf->stat_interval); } int ourl_num = 0; g_epfd = epoll_create(g_conf->max_job_num); while(ourl_num++ < g_conf->max_job_num) { if (attach_epoll_task() < 0) break; } int n, i; while(1) { n = epoll_wait(g_epfd, events, 10,2000); printf("epoll:%d ",n); if (n == -1) printf("epollerrno:%s ",strerror(errno)); fflush(stdout); if (n <= 0) { if (g_cur_thread_num <= 0&& is_ourlqueue_empty() && is_surlqueue_empty()) { sleep(1); if (g_cur_thread_num <= 0&& is_ourlqueue_empty() && is_surlqueue_empty()) break; } } for (i = 0; i < n; i++) { evso_arg * arg = (evso_arg*)(events[i].data.ptr); if ((events[i].events &EPOLLERR) || (events[i].events &EPOLLHUP) || (!(events[i].events &EPOLLIN))) { SPIDER_LOG(SPIDER_LEVEL_WARN,"epoll fail, close socket %d",arg->fd); close(arg->fd); continue; } epoll_ctl(g_epfd, EPOLL_CTL_DEL,arg->fd, &events[i]); /* del event */ printf("helloepoll:event=%d ",events[i].events); fflush(stdout); create_thread(recv_response, arg,NULL, NULL); } } SPIDER_LOG(SPIDER_LEVEL_DEBUG, "Task done!"); close(g_epfd); return 0; }