• socket抓取网页


    #include <iostream>
    #include <string>
    #include <netdb.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <arpa/inet.h>
    #include <sys/types.h>
    #include <sys/socket.h>
    #include <sys/types.h>
    #include <sys/stat.h>
    #include <fcntl.h>
    #include <strings.h>
    #include <string.h>
    #include <unistd.h>
    using namespace std;
    
    void func()
    {
    	string url;
    	cout << "输入网址:" << endl;
    	cin >> url;
    	
    	//依据域名获取ip地址
    	struct hostent *website_host = NULL;
    	website_host = gethostbyname(url.c_str());
    	if (website_host == NULL)
    	{
    		perror("gethostbyname error");
    		exit(-1);
    	}
    	cout << "主机名称:";
    	cout << website_host->h_name << endl;
    	cout << "地址类型:";
    	cout << website_host->h_addrtype << endl;
    	cout << "地址长度:";
    	cout << website_host->h_length << endl;
    	
    	//建立socket描写叙述符
    	int sockfd;
    	sockfd = socket(AF_INET, SOCK_STREAM, 0);
    	if (sockfd == -1)
    	{
    		perror("socket error");
    		exit(-1);
    	}
    	cout << "建立socket完毕" << endl; 
    	
    	//初始化地址结构
    	struct sockaddr_in website_addr;
    	bzero((void*)&website_addr, sizeof(website_addr));
    	website_addr.sin_family = AF_INET;
    	website_addr.sin_port = htons(80);
    	website_addr.sin_addr.s_addr = ((struct in_addr *)(website_host->h_addr))->s_addr;
    	cout << "地址初始化完毕" << endl; 
    
    	//连接
    	int ret;
    	ret = connect(sockfd, (struct sockaddr*)&website_addr, sizeof(website_addr));
    	if (ret == -1)
    	{
    		perror("connect error");
    		exit(-1);
    	}
    	cout << "连接完毕" << endl;
    	
    	//向80端口发送http头
    	char buf[10*1024];
    	char addr[100];
    	sprintf(buf, "GET / HTTP/1.1
    ");
    	strcat(buf, "Host:");
    	strcat(buf,url.c_str());
    	strcat(buf, "
    ");
    	strcat(buf, "Accept: */*
    ");
    	strcat(buf, "User-Agent: Mozilla/4.0(compatible)
    ");
    	strcat(buf, "connection:Keep-Alive
    ");
    	strcat(buf, "
    
    "); 
    	cout << "请求头构造完毕" << endl;
    	cout << buf << endl;
    	ret = send(sockfd, buf, strlen(buf), 0);
    	cout << "发送完毕" << endl;
    	cout << "send:
    " << ret << endl;
    	
    	//打开接收文件
    	int fd;
    	fd = open("recv.html", O_RDWR);
    	if (fd == -1)
    	{
    		perror("open error");
    		exit(-1);
    	}
    	
    	//開始接收
    	while(1)
    	{
    		ret = recv(sockfd, buf, sizeof(buf), 0);
    		if (ret == 0)
    		{
    			cout << "对端关闭" << endl;
    			exit(-1);
    		}
    		if (ret == -1)
    		{
    			perror("read error");
    			exit(-1);
    		}
    		buf[ret] = 0;
    		cout << "recv:" << ret << endl;
    		cout << buf << endl;
    		write(fd, buf, strlen(buf));
    	}
    }
    
    int main()
    {
    	func();
    	return 0;
    }






    版权声明:本文博客原创文章,博客,未经同意,不得转载。

  • 相关阅读:
    单元测试小示例
    分布式版本控制系统Git的安装与使用
    第一次作业:准备
    结对项目四则运算 “软件”之升级版
    个人项目 — 小学四则运算 “软件”之初版
    生命力
    .net 的前景
    力求简洁,应对变化
    现代软件工程 第一章 【概论】练习与讨论 第6题 邓杰
    现代软件工程 第一章 【概论】练习与讨论 第2题 邓杰
  • 原文地址:https://www.cnblogs.com/bhlsheji/p/4660602.html
Copyright © 2020-2023  润新知