Flume简介与使用(二)——Thrift Source采集数据
继上一篇安装Flume后,本篇将介绍如何使用Thrift Source采集数据。
Thrift是Google开发的用于跨语言RPC通信,它拥有功能强大的软件堆栈和代码生成引擎,允许定义一个简单的IDL文件来生成不同语言的代码,服务器端和客户端通过共享这个IDL文件来构建来完成通信。
Flume的Thrift Source是其实现的众多Source中的一个,Flume已经实现了服务器端,因此我们可以用任意自己熟悉的语言编写自己的Thrift Source客户端来采集数据,然后发送给Thrift Source服务器端。
[一]、生成C++代码
下载源码版的Flume,在apache-flume-1.6.0-srcflume-ng-sdksrcmain hrift目录下有Flume定义好的flume.thrift文件,现在只要用这个文件来生成我们需要的代码就行了。
flume.thrift文件内容如下:
1 namespace java org.apache.flume.thrift 2 3 struct ThriftFlumeEvent { 4 1: required map <string, string> headers, 5 2: required binary body, 6 } 7 8 enum Status { 9 OK, 10 FAILED, 11 ERROR, 12 UNKNOWN 13 } 14 15 service ThriftSourceProtocol { 16 Status append(1: ThriftFlumeEvent event), 17 Status appendBatch(1: list<ThriftFlumeEvent> events), 18 }
1、定义了一个ThriftFlumeEvent结构体,用来封装发送的数据;
2、定义了一个service类ThriftSourceProtocol,服务器端具体实现ThriftSourceProtocol里面的两个方法,再由客户端调用这些方法把数据传给Thrift Source服务器端。
3、运行下面的命令:thrift --gen cpp flume.thrift,会在当前目录生成gen-cpp目录,里面是Thrift自动生成c++头文件和代码。(在这之前要先安装Thrift)
[二]、下面是编写自己的客户端代码,我这里是接收远程传过来的数据,然后发送给Flume的Thrift Source服务器。
1 #include <arpa/inet.h> 2 #include <sys/types.h> 3 #include <sys/socket.h> 4 #include <pthread.h> 5 #include <unistd.h> 6 #include <stdlib.h> 7 #include "include/MESA_prof_load.h" 8 #include "include/MESA_handle_logger.h" 9 10 #include <string> 11 #include <iostream> 12 #include "gen-cpp/flume_constants.h" 13 #include "gen-cpp/flume_types.h" 14 #include "gen-cpp/ThriftSourceProtocol.h" 15 #include <thrift/protocol/TBinaryProtocol.h> 16 #include <thrift/protocol/TCompactProtocol.h> 17 #include <thrift/transport/TSocket.h> 18 #include <thrift/transport/TTransportUtils.h> 19 using namespace std; 20 using namespace apache::thrift; 21 using namespace apache::thrift::protocol; 22 using namespace apache::thrift::transport; 23 24 #define LOG_PATH "/home/zjf/DFcode/trafficlog/traffic_source.log" 25 #define DATA_BUFFER 2048 //send buffer data length 26 #define BUFLEN 2048 //received buffer data length 27 #define BATCH_SIZE 1000 //send event num to flume once 28 29 //defined my C++ object 30 class ThriftClient{ 31 public: 32 // Thrift protocol needings... 33 boost::shared_ptr<TTransport> socket; 34 boost::shared_ptr<TTransport> transport; 35 boost::shared_ptr<TProtocol> protocol; 36 ThriftSourceProtocolClient* pClient; 37 38 public: 39 ThriftClient(); 40 }; 41 //cconstruction function, init the thrift source server ip and port 42 ThriftClient::ThriftClient(): 43 socket(new TSocket("10.208.129.12",5497)), 44 transport(new TFramedTransport(socket)), 45 protocol(new TCompactProtocol(transport)) 46 { 47 pClient = new ThriftSourceProtocolClient(protocol); 48 } 49 50 //log 51 struct log_info_t{ 52 char *path; 53 int log_level; 54 void * handle; 55 }; 56 struct log_info_t log_info; 57 const char *module = "zjf_traffic_data_collector"; 58 59 //类的对象 60 ThriftClient *client = new ThriftClient(); 61 std::map<std::string, std::string> headers; 62 std::vector<ThriftFlumeEvent> eventbatch; 63 unsigned long long pkt_num_tgl = 0; 64 65 int RecvAndSendUDP(){ 66 MESA_handle_runtime_log(log_info.handle, RLOG_LV_INFO, module, "RecvUDP be called"); 67 int listen_socket; //socket id 68 struct sockaddr_in local; //client IP, where to recevied data 69 struct sockaddr_in from; //server IP(local host) 70 char server_addr[16] = "10.208.129.12"; //received traffic IP 71 int server_port = 6789; //received traffic port 72 char send_buf[DATA_BUFFER] = {0}; //data send to flume 73 char Buf[BUFLEN] = {0}; 74 int fromlen; 75 int len; 76 77 //init socket 78 reconnect: 79 memset(&local, 0, sizeof(local)); 80 local.sin_family = AF_INET; 81 local.sin_addr.s_addr = inet_addr(server_addr); 82 local.sin_port = htons(server_port); 83 listen_socket = socket(AF_INET, SOCK_DGRAM, 0); // UDP socket 84 if(listen_socket < 0) { 85 printf("error udp socket "); 86 }else{ 87 printf("listen_socket create OK "); 88 } 89 if(bind(listen_socket, (struct sockaddr *)&local, sizeof(local)) < 0) { 90 printf("error udp bind "); 91 return -1; 92 }else{ 93 printf("socket bind OK "); 94 } 95 96 while(1){ 97 char sip[16] = {0}; 98 char dip[16] = {0}; 99 char srcport[6] = {0}; 100 char destport[6] = {0}; 101 char url[BUFLEN] = {0}; 102 memset(Buf,0,BUFLEN); 103 fromlen = sizeof(from); 104 len = recvfrom(listen_socket, (void *)Buf, (size_t)BUFLEN, 0, (struct sockaddr *)&from,(socklen_t *)&fromlen); 105 if(len == -1) { 106 printf("error udp recvfrom "); 107 close(listen_socket); 108 goto reconnect; 109 } 110 //parse received buf, transform to key-value 111 int i; 112 int sip_loc = 0; 113 int sport_loc = 0; 114 int dip_loc = 0; 115 int dport_loc = 0; 116 int dotcount = 0; 117 for(i=0;Buf[i] != '